예제 #1
0
def test_multihop_requests_created(rse_factory, did_factory, root_account,
                                   core_config_mock, caches_mock):
    """
    Ensure that multihop transfers are handled and intermediate request correctly created
    """
    rs0_name, src_rse_id = rse_factory.make_posix_rse()
    _, intermediate_rse_id = rse_factory.make_posix_rse()
    dst_rse_name, dst_rse_id = rse_factory.make_posix_rse()
    rse_core.add_rse_attribute(intermediate_rse_id, 'available_for_multihop',
                               True)

    add_distance(src_rse_id, intermediate_rse_id, ranking=10)
    add_distance(intermediate_rse_id, dst_rse_id, ranking=10)

    did = did_factory.upload_test_file(rs0_name)
    rule_core.add_rule(dids=[did],
                       account=root_account,
                       copies=1,
                       rse_expression=dst_rse_name,
                       grouping='ALL',
                       weight=None,
                       lifetime=None,
                       locked=False,
                       subscription_id=None)

    [[_, [transfer]]
     ] = next_transfers_to_submit(rses=rse_factory.created_rses).items()
    # the intermediate request was correctly created
    assert request_core.get_request_by_did(rse_id=intermediate_rse_id, **did)
예제 #2
0
파일: test_s3.py 프로젝트: rak108/rucio
    def test_s3s_fts_src(self):
        """ S3: TPC a file from S3 to storage """

        expected_src_url = 's3s://fake-rucio.s3-eu-south-8.amazonaws.com:443/mock/69/3b/file-on-aws'
        expected_dst_url = 'https://somestorage.ch:1094/my/prefix/mock/69/3b/file-on-aws'

        rule_id = add_rule(dids=self.files3,
                           account=self.root,
                           copies=1,
                           rse_expression=self.rsenons3,
                           grouping='NONE',
                           weight=None,
                           lifetime=None,
                           locked=False,
                           subscription_id=None)
        [[_, [transfer_path]]
         ] = next_transfers_to_submit(rses=[self.rsenons3_id]).items()
        assert transfer_path[0].rws.rule_id == rule_id[0]
        assert transfer_path[0].legacy_sources[0][1] == expected_src_url
        assert transfer_path[0].dest_url == expected_dst_url
예제 #3
0
def run_once(bulk, group_bulk, rse_ids, scheme, failover_scheme, transfertool_kwargs, heartbeat_handler, activity):
    worker_number, total_workers, logger = heartbeat_handler.live()

    start_time = time.time()
    transfers = next_transfers_to_submit(
        total_workers=total_workers,
        worker_number=worker_number,
        failover_schemes=failover_scheme,
        limit=bulk,
        activity=activity,
        rses=rse_ids,
        schemes=scheme,
        transfertools_by_name={'fts3': FTS3Transfertool},
        older_than=None,
        request_type=RequestType.STAGEIN,
        logger=logger,
    )
    total_transfers = len(list(hop for paths in transfers.values() for path in paths for hop in path))
    record_timer('daemons.conveyor.stager.get_stagein_transfers.per_transfer', (time.time() - start_time) * 1000 / (total_transfers if transfers else 1))
    record_counter('daemons.conveyor.stager.get_stagein_transfers', total_transfers)
    record_timer('daemons.conveyor.stager.get_stagein_transfers.transfers', total_transfers)
    logger(logging.INFO, 'Got %s stagein transfers for %s' % (total_transfers, activity))

    for builder, transfer_paths in transfers.items():
        transfertool_obj = builder.make_transfertool(logger=logger, **transfertool_kwargs.get(builder.transfertool_class, {}))
        logger(logging.INFO, 'Starting to group transfers for %s (%s)' % (activity, transfertool_obj))
        start_time = time.time()
        grouped_jobs = transfertool_obj.group_into_submit_jobs(transfer_paths)
        record_timer('daemons.conveyor.stager.bulk_group_transfer', (time.time() - start_time) * 1000 / (len(transfer_paths) or 1))

        logger(logging.INFO, 'Starting to submit transfers for %s (%s)' % (activity, transfertool_obj))
        for job in grouped_jobs:
            worker_number, total_workers, logger = heartbeat_handler.live()
            submit_transfer(transfertool_obj=transfertool_obj, transfers=job['transfers'], job_params=job['job_params'], submitter='transfer_submitter', logger=logger)

    queue_empty = False
    if total_transfers < group_bulk:
        queue_empty = True
        logger(logging.INFO, 'Only %s transfers for %s which is less than group bulk %s' % (total_transfers, activity, group_bulk))
    return queue_empty
예제 #4
0
def run_once(bulk, group_bulk, filter_transfertool, transfertools,
             ignore_availability, rse_ids, scheme, failover_scheme,
             partition_hash_var, timeout, transfertool_kwargs,
             heartbeat_handler, activity):
    worker_number, total_workers, logger = heartbeat_handler.live()

    start_time = time.time()
    transfers = next_transfers_to_submit(
        total_workers=total_workers,
        worker_number=worker_number,
        partition_hash_var=partition_hash_var,
        failover_schemes=failover_scheme,
        limit=bulk,
        activity=activity,
        rses=rse_ids,
        schemes=scheme,
        filter_transfertool=filter_transfertool,
        transfertool_classes=[
            TRANSFERTOOL_CLASSES_BY_NAME[transfertool]
            for transfertool in transfertools
        ],
        older_than=None,
        request_type=RequestType.TRANSFER,
        ignore_availability=ignore_availability,
        logger=logger,
    )
    total_transfers = len(
        list(hop for paths in transfers.values() for path in paths
             for hop in path))

    record_timer(
        'daemons.conveyor.transfer_submitter.get_transfers.per_transfer',
        (time.time() - start_time) * 1000 / (total_transfers or 1))
    GET_TRANSFERS_COUNTER.inc(total_transfers)
    record_timer('daemons.conveyor.transfer_submitter.get_transfers.transfers',
                 total_transfers)
    logger(logging.INFO, 'Got %s transfers for %s in %s seconds',
           total_transfers, activity,
           time.time() - start_time)

    for builder, transfer_paths in transfers.items():
        # Globus Transfertool is not yet production-ready, but we need to partially activate it
        # in all submitters if we want to enable native multi-hopping between transfertools.
        # This "if" can be triggered in a FTS submitter if it tries to multi-hop from
        # a globus-only RSE via a dual-stack RSE towards an FTS-only RSE.
        #
        # Just ignore this transfer and keep it in a queued state, so that it's picked up
        # latter by that special submitter instance dedicated to globus transfers.
        #
        # TODO: remove this "if"
        if transfertools[
                0] != GlobusTransferTool.external_name and builder.transfertool_class == GlobusTransferTool:
            logger(logging.INFO,
                   'Skipping submission of following transfers: %s',
                   [transfer_path_str(p) for p in transfer_paths])
            continue

        transfertool_obj = builder.make_transfertool(
            logger=logger,
            **transfertool_kwargs.get(builder.transfertool_class, {}))
        start_time = time.time()
        logger(logging.DEBUG, 'Starting to group transfers for %s (%s)',
               activity, transfertool_obj)
        grouped_jobs = transfertool_obj.group_into_submit_jobs(transfer_paths)
        record_timer('daemons.conveyor.transfer_submitter.bulk_group_transfer',
                     (time.time() - start_time) * 1000 /
                     (len(transfer_paths) or 1))

        logger(logging.DEBUG, 'Starting to submit transfers for %s (%s)',
               activity, transfertool_obj)
        for job in grouped_jobs:
            worker_number, total_workers, logger = heartbeat_handler.live()
            logger(
                logging.DEBUG, 'submitjob: transfers=%s, job_params=%s' %
                ([str(t) for t in job['transfers']], job['job_params']))
            submit_transfer(transfertool_obj=transfertool_obj,
                            transfers=job['transfers'],
                            job_params=job['job_params'],
                            submitter='transfer_submitter',
                            timeout=timeout,
                            logger=logger)

    queue_empty = False
    if total_transfers < group_bulk:
        queue_empty = True
        logger(logging.DEBUG,
               'Only %s transfers for %s which is less than group bulk %s',
               total_transfers, activity, group_bulk)
    return queue_empty
예제 #5
0
def test_singlehop_vs_multihop_priority(rse_factory, root_account, mock_scope,
                                        core_config_mock, caches_mock):
    """
    On small distance difference, singlehop is prioritized over multihop
    due to HOP_PENALTY. On big difference, multihop is prioritized
    """
    # +------+    +------+
    # |      | 10 |      |
    # | RSE0 +--->| RSE1 |
    # |      |    |      +-+ 10
    # +------+    +------+ |  +------+       +------+
    #                      +->|      |  200  |      |
    # +------+                | RSE3 |<------| RSE4 |
    # |      |   30      +--->|      |       |      |
    # | RSE2 +-----------+    +------+       +------+
    # |      |
    # +------+
    _, rse0_id = rse_factory.make_posix_rse()
    _, rse1_id = rse_factory.make_posix_rse()
    _, rse2_id = rse_factory.make_posix_rse()
    rse3_name, rse3_id = rse_factory.make_posix_rse()
    _, rse4_id = rse_factory.make_posix_rse()

    add_distance(rse0_id, rse1_id, ranking=10)
    add_distance(rse1_id, rse3_id, ranking=10)
    add_distance(rse2_id, rse3_id, ranking=30)
    add_distance(rse4_id, rse3_id, ranking=200)
    rse_core.add_rse_attribute(rse1_id, 'available_for_multihop', True)

    # add same file to two source RSEs
    file = {
        'scope': mock_scope,
        'name': 'lfn.' + generate_uuid(),
        'type': 'FILE',
        'bytes': 1,
        'adler32': 'beefdead'
    }
    did = {'scope': file['scope'], 'name': file['name']}
    for rse_id in [rse0_id, rse2_id]:
        add_replicas(rse_id=rse_id, files=[file], account=root_account)

    rule_core.add_rule(dids=[did],
                       account=root_account,
                       copies=1,
                       rse_expression=rse3_name,
                       grouping='ALL',
                       weight=None,
                       lifetime=None,
                       locked=False,
                       subscription_id=None)

    # The singlehop must be prioritized
    [[_, [transfer]]
     ] = next_transfers_to_submit(rses=rse_factory.created_rses).items()
    assert len(transfer) == 1
    assert transfer[0].src.rse.id == rse2_id
    assert transfer[0].dst.rse.id == rse3_id

    # add same file to two source RSEs
    file = {
        'scope': mock_scope,
        'name': 'lfn.' + generate_uuid(),
        'type': 'FILE',
        'bytes': 1,
        'adler32': 'beefdead'
    }
    did = {'scope': file['scope'], 'name': file['name']}
    for rse_id in [rse0_id, rse4_id]:
        add_replicas(rse_id=rse_id, files=[file], account=root_account)

    rule_core.add_rule(dids=[did],
                       account=root_account,
                       copies=1,
                       rse_expression=rse3_name,
                       grouping='ALL',
                       weight=None,
                       lifetime=None,
                       locked=False,
                       subscription_id=None)

    # The multihop must be prioritized
    [[_, transfers]
     ] = next_transfers_to_submit(rses=rse_factory.created_rses).items()
    transfer = next(iter(t for t in transfers
                         if t[0].rws.name == file['name']))
    assert len(transfer) == 2
예제 #6
0
def test_disk_vs_tape_priority(rse_factory, root_account, mock_scope):
    tape1_rse_name, tape1_rse_id = rse_factory.make_posix_rse(
        rse_type=RSEType.TAPE)
    tape2_rse_name, tape2_rse_id = rse_factory.make_posix_rse(
        rse_type=RSEType.TAPE)
    disk1_rse_name, disk1_rse_id = rse_factory.make_posix_rse(
        rse_type=RSEType.DISK)
    disk2_rse_name, disk2_rse_id = rse_factory.make_posix_rse(
        rse_type=RSEType.DISK)
    dst_rse_name, dst_rse_id = rse_factory.make_posix_rse()
    source_rses = [tape1_rse_id, tape2_rse_id, disk1_rse_id, disk2_rse_id]
    all_rses = source_rses + [dst_rse_id]
    add_distance(disk1_rse_id, dst_rse_id, ranking=15)
    add_distance(disk2_rse_id, dst_rse_id, ranking=10)
    add_distance(tape1_rse_id, dst_rse_id, ranking=15)
    add_distance(tape2_rse_id, dst_rse_id, ranking=10)

    # add same file to all source RSEs
    file = {
        'scope': mock_scope,
        'name': 'lfn.' + generate_uuid(),
        'type': 'FILE',
        'bytes': 1,
        'adler32': 'beefdead'
    }
    did = {'scope': file['scope'], 'name': file['name']}
    for rse_id in source_rses:
        add_replicas(rse_id=rse_id, files=[file], account=root_account)

    rule_core.add_rule(dids=[did],
                       account=root_account,
                       copies=1,
                       rse_expression=dst_rse_name,
                       grouping='ALL',
                       weight=None,
                       lifetime=None,
                       locked=False,
                       subscription_id=None)
    request = request_core.get_request_by_did(rse_id=dst_rse_id, **did)

    @transactional_session
    def __fake_source_ranking(source_rse_id, new_ranking, session=None):
        rowcount = session.query(models.Source).filter(
            models.Source.rse_id == source_rse_id).update(
                {'ranking': new_ranking})
        if not rowcount:
            models.Source(request_id=request['id'],
                          scope=request['scope'],
                          name=request['name'],
                          rse_id=source_rse_id,
                          dest_rse_id=request['dest_rse_id'],
                          ranking=new_ranking,
                          bytes=request['bytes'],
                          url=None,
                          is_using=False). \
                save(session=session, flush=False)

    # On equal priority and distance, disk should be preferred over tape. Both disk sources will be returned
    [[_, [transfer]]] = next_transfers_to_submit(rses=all_rses).items()
    assert len(transfer[0].legacy_sources) == 2
    assert transfer[0].legacy_sources[0][0] in (disk1_rse_name, disk2_rse_name)

    # Change the rating of the disk RSEs. Disk still preferred, because it must fail twice before tape is tried
    __fake_source_ranking(disk1_rse_id, -1)
    __fake_source_ranking(disk2_rse_id, -1)
    [[_, [transfer]]] = next_transfers_to_submit(rses=all_rses).items()
    assert len(transfer[0].legacy_sources) == 2
    assert transfer[0].legacy_sources[0][0] in (disk1_rse_name, disk2_rse_name)

    # Change the rating of the disk RSEs again. Tape RSEs must now be preferred.
    # Multiple tape sources are not allowed. Only one tape RSE source must be returned.
    __fake_source_ranking(disk1_rse_id, -2)
    __fake_source_ranking(disk2_rse_id, -2)
    [[_, transfers]] = next_transfers_to_submit(rses=all_rses).items()
    assert len(transfers) == 1
    transfer = transfers[0]
    assert len(transfer[0].legacy_sources) == 1
    assert transfer[0].legacy_sources[0][0] in (tape1_rse_name, tape2_rse_name)

    # On equal source ranking, but different distance; the smaller distance is preferred
    [[_, [transfer]]] = next_transfers_to_submit(rses=all_rses).items()
    assert len(transfer[0].legacy_sources) == 1
    assert transfer[0].legacy_sources[0][0] == tape2_rse_name

    # On different source ranking, the bigger ranking is preferred
    __fake_source_ranking(tape2_rse_id, -1)
    [[_, [transfer]]] = next_transfers_to_submit(rses=all_rses).items()
    assert len(transfer[0].legacy_sources) == 1
    assert transfer[0].legacy_sources[0][0] == tape1_rse_name
예제 #7
0
def test_tpc(containerized_rses, root_account, test_scope, did_factory, rse_client, rule_client, artifact):
    if len(containerized_rses) < 2:
        pytest.skip("TPC tests need at least 2 containerized rse's for execution}")
    rse1_name, rse1_id = containerized_rses[0]
    rse2_name, rse2_id = containerized_rses[1]

    base_file_name = generate_uuid()
    test_file = did_factory.upload_test_file(rse1_name, name=base_file_name + '.000', return_full_item=True)
    test_file_did_str = '%s:%s' % (test_file['did_scope'], test_file['did_name'])
    test_file_did = {
        'scope': test_scope,
        'name': test_file['did_name']
    }
    test_file_name_hash = hashlib.md5(test_file_did_str.encode('utf-8')).hexdigest()
    test_file_expected_pfn = '%s/%s/%s/%s' % (test_file_did['scope'], test_file_name_hash[0:2], test_file_name_hash[2:4], test_file_did['name'])

    rse1_hostname = rse_client.get_protocols(rse1_name)[0]['hostname']
    rse2_hostname = rse_client.get_protocols(rse2_name)[0]['hostname']

    rule_id = add_rule(dids=[test_file_did], account=root_account, copies=1, rse_expression=rse2_name,
                       grouping='NONE', weight=None, lifetime=None, locked=False, subscription_id=None)
    rule = rule_client.get_replication_rule(rule_id[0])

    re_evaluator(once=True)

    assert rule['locks_ok_cnt'] == 0
    assert rule['locks_replicating_cnt'] == 1

    [[_, [transfer_path]]] = next_transfers_to_submit(rses=[rse1_id, rse2_id]).items()
    assert transfer_path[0].rws.rule_id == rule_id[0]
    src_url = transfer_path[0].legacy_sources[0][1]
    dest_url = transfer_path[0].dest_url
    check_url(src_url, rse1_hostname, test_file_expected_pfn)
    check_url(dest_url, rse2_hostname, test_file_expected_pfn)

    # Run Submitter
    submitter.submitter(once=True)

    # Get FTS transfer job id
    request = get_request_by_did(rse_id=rse2_id, **test_file_did)
    fts_transfer_id = request['external_id']

    # Check FTS transfer job
    assert fts_transfer_id is not None

    # Wait for the FTS transfer to finish
    fts_transfer_status = None
    for _ in range(MAX_POLL_WAIT_SECONDS):
        fts_transfer_status = poll_fts_transfer_status(fts_transfer_id)
        if fts_transfer_status not in ['SUBMITTED', 'ACTIVE']:
            break
        time.sleep(1)
    assert fts_transfer_status == 'FINISHED'

    poller.run(once=True, older_than=0)
    finisher.run(once=True)
    rule = rule_client.get_replication_rule(rule_id[0])
    assert rule['locks_ok_cnt'] == 1
    assert rule['locks_replicating_cnt'] == 0

    if artifact is not None:
        date = datetime.date.today().strftime("%Y-%m-%d")
        with open(artifact, 'w') as artifact_file:
            artifact_file.write(
                f"/var/log/fts3/{date}/{rse1_name.lower()}__{rse2_name.lower()}/*__{fts_transfer_id}"
            )
예제 #8
0
def run_once(bulk, group_bulk, filter_transfertool, transfertool,
             ignore_availability, rse_ids, scheme, failover_scheme,
             partition_hash_var, timeout, transfertool_kwargs,
             heartbeat_handler, activity):
    worker_number, total_workers, logger = heartbeat_handler.live()

    start_time = time.time()
    transfers = next_transfers_to_submit(
        total_workers=total_workers,
        worker_number=worker_number,
        partition_hash_var=partition_hash_var,
        failover_schemes=failover_scheme,
        limit=bulk,
        activity=activity,
        rses=rse_ids,
        schemes=scheme,
        filter_transfertool=filter_transfertool,
        transfertools_by_name={
            transfertool: TRANSFERTOOL_CLASSES_BY_NAME[transfertool]
        },
        older_than=None,
        request_type=RequestType.TRANSFER,
        ignore_availability=ignore_availability,
        logger=logger,
    )
    total_transfers = len(
        list(hop for paths in transfers.values() for path in paths
             for hop in path))

    record_timer(
        'daemons.conveyor.transfer_submitter.get_transfers.per_transfer',
        (time.time() - start_time) * 1000 / (total_transfers or 1))
    GET_TRANSFERS_COUNTER.inc(total_transfers)
    record_timer('daemons.conveyor.transfer_submitter.get_transfers.transfers',
                 total_transfers)
    logger(logging.INFO, 'Got %s transfers for %s in %s seconds',
           total_transfers, activity,
           time.time() - start_time)

    for builder, transfer_paths in transfers.items():
        transfertool_obj = builder.make_transfertool(
            logger=logger,
            **transfertool_kwargs.get(builder.transfertool_class, {}))
        start_time = time.time()
        logger(logging.DEBUG, 'Starting to group transfers for %s (%s)',
               activity, transfertool_obj)
        grouped_jobs = transfertool_obj.group_into_submit_jobs(transfer_paths)
        record_timer('daemons.conveyor.transfer_submitter.bulk_group_transfer',
                     (time.time() - start_time) * 1000 /
                     (len(transfer_paths) or 1))

        logger(logging.DEBUG, 'Starting to submit transfers for %s (%s)',
               activity, transfertool_obj)
        for job in grouped_jobs:
            worker_number, total_workers, logger = heartbeat_handler.live()
            logger(
                logging.DEBUG, 'submitjob: transfers=%s, job_params=%s' %
                ([str(t) for t in job['transfers']], job['job_params']))
            submit_transfer(transfertool_obj=transfertool_obj,
                            transfers=job['transfers'],
                            job_params=job['job_params'],
                            submitter='transfer_submitter',
                            timeout=timeout,
                            logger=logger)

    queue_empty = False
    if total_transfers < group_bulk:
        queue_empty = True
        logger(logging.DEBUG,
               'Only %s transfers for %s which is less than group bulk %s',
               total_transfers, activity, group_bulk)
    return queue_empty