Beispiel #1
0
def test_multihop_receiver_on_success(vo, did_factory, root_account,
                                      core_config_mock, caches_mock):
    """
    Verify that the receiver correctly handles successful multihop jobs
    """
    receiver_thread = threading.Thread(target=receiver,
                                       kwargs={
                                           'id': 0,
                                           'full_mode': True,
                                           'all_vos': True,
                                           'total_threads': 1
                                       })
    receiver_thread.start()

    try:
        src_rse = 'XRD1'
        src_rse_id = rse_core.get_rse_id(rse=src_rse, vo=vo)
        jump_rse = 'XRD3'
        jump_rse_id = rse_core.get_rse_id(rse=jump_rse, vo=vo)
        dst_rse = 'XRD4'
        dst_rse_id = rse_core.get_rse_id(rse=dst_rse, vo=vo)

        all_rses = [src_rse_id, jump_rse_id, dst_rse_id]

        did = did_factory.upload_test_file(src_rse)
        rule_core.add_rule(dids=[did],
                           account=root_account,
                           copies=1,
                           rse_expression=dst_rse,
                           grouping='ALL',
                           weight=None,
                           lifetime=None,
                           locked=False,
                           subscription_id=None)
        submitter(once=True,
                  rses=[{
                      'id': rse_id
                  } for rse_id in all_rses],
                  group_bulk=2,
                  partition_wait_time=None,
                  transfertype='single',
                  filter_transfertool=None)

        request = __wait_for_request_state(dst_rse_id=jump_rse_id,
                                           state=RequestState.DONE,
                                           run_poller=False,
                                           **did)
        assert request['state'] == RequestState.DONE
        request = __wait_for_request_state(dst_rse_id=dst_rse_id,
                                           state=RequestState.DONE,
                                           run_poller=False,
                                           **did)
        assert request['state'] == RequestState.DONE
    finally:
        receiver_graceful_stop.set()
        receiver_thread.join(timeout=5)
        receiver_graceful_stop.clear()
Beispiel #2
0
def test_multihop_receiver_on_failure(vo, did_factory, replica_client,
                                      root_account, core_config_mock,
                                      caches_mock):
    """
    Verify that the receiver correctly handles multihop jobs which fail
    """
    receiver_thread = threading.Thread(target=receiver,
                                       kwargs={
                                           'id': 0,
                                           'full_mode': True,
                                           'all_vos': True,
                                           'total_threads': 1
                                       })
    receiver_thread.start()

    try:
        src_rse = 'XRD1'
        src_rse_id = rse_core.get_rse_id(rse=src_rse, vo=vo)
        jump_rse = 'XRD3'
        jump_rse_id = rse_core.get_rse_id(rse=jump_rse, vo=vo)
        dst_rse = 'XRD4'
        dst_rse_id = rse_core.get_rse_id(rse=dst_rse, vo=vo)

        all_rses = [src_rse_id, jump_rse_id, dst_rse_id]

        # Register a did which doesn't exist. It will trigger a failure error during the FTS transfer.
        did = did_factory.random_did()
        replica_client.add_replicas(rse=src_rse,
                                    files=[{
                                        'scope': did['scope'].external,
                                        'name': did['name'],
                                        'bytes': 1,
                                        'adler32': 'aaaaaaaa'
                                    }])

        rule_core.add_rule(dids=[did],
                           account=root_account,
                           copies=1,
                           rse_expression=dst_rse,
                           grouping='ALL',
                           weight=None,
                           lifetime=None,
                           locked=False,
                           subscription_id=None)
        submitter(once=True,
                  rses=[{
                      'id': rse_id
                  } for rse_id in all_rses],
                  group_bulk=2,
                  partition_wait_time=None,
                  transfertype='single',
                  filter_transfertool=None)

        request = __wait_for_request_state(dst_rse_id=jump_rse_id,
                                           state=RequestState.FAILED,
                                           run_poller=False,
                                           **did)
        assert request['state'] == RequestState.FAILED
        # We use FTS "Completion" messages in receiver. In case of multi-hops transfer failures, FTS doesn't start
        # next transfers; so it never sends a "completion" message for some hops. Rely on poller in such cases.
        # TODO: set the run_poller argument to False if we ever manage to make the receiver correctly handle multi-hop failures.
        request = __wait_for_request_state(dst_rse_id=dst_rse_id,
                                           state=RequestState.FAILED,
                                           run_poller=True,
                                           **did)
        assert request['state'] == RequestState.FAILED
    finally:
        receiver_graceful_stop.set()
        receiver_thread.join(timeout=5)
        receiver_graceful_stop.clear()
Beispiel #3
0
def test_multisource_receiver(vo, did_factory, replica_client, root_account):
    """
    Run receiver as a background thread to automatically handle fts notifications.
    Ensure that a multi-source job in which the first source fails is correctly handled by receiver.
    """
    receiver_thread = threading.Thread(target=receiver,
                                       kwargs={
                                           'id': 0,
                                           'full_mode': True,
                                           'all_vos': True,
                                           'total_threads': 1
                                       })
    receiver_thread.start()

    try:
        src_rse1 = 'XRD4'
        src_rse1_id = rse_core.get_rse_id(rse=src_rse1, vo=vo)
        src_rse2 = 'XRD1'
        src_rse2_id = rse_core.get_rse_id(rse=src_rse2, vo=vo)
        dst_rse = 'XRD3'
        dst_rse_id = rse_core.get_rse_id(rse=dst_rse, vo=vo)

        all_rses = [src_rse1_id, src_rse2_id, dst_rse_id]

        # Add a good replica on the RSE which has a higher distance ranking
        did = did_factory.upload_test_file(src_rse1)
        # Add non-existing replica which will fail during multisource transfers on the RSE with lower cost (will be the preferred source)
        replica_client.add_replicas(rse=src_rse2,
                                    files=[{
                                        'scope': did['scope'].external,
                                        'name': did['name'],
                                        'bytes': 1,
                                        'adler32': 'aaaaaaaa'
                                    }])

        rule_core.add_rule(dids=[did],
                           account=root_account,
                           copies=1,
                           rse_expression=dst_rse,
                           grouping='ALL',
                           weight=None,
                           lifetime=None,
                           locked=False,
                           subscription_id=None)
        submitter(once=True,
                  rses=[{
                      'id': rse_id
                  } for rse_id in all_rses],
                  group_bulk=2,
                  partition_wait_time=None,
                  transfertype='single',
                  filter_transfertool=None)

        request = None
        for _ in range(MAX_POLL_WAIT_SECONDS):
            request = request_core.get_request_by_did(rse_id=dst_rse_id, **did)
            # The request must not be marked as failed. Not even temporarily. It is a multi-source transfer and the
            # the first, failed, source must not change the replica state. We must wait for all sources to be tried.
            assert request['state'] != RequestState.FAILED
            if request['state'] == RequestState.DONE:
                break
            time.sleep(1)
        assert request['state'] == RequestState.DONE
    finally:
        receiver_graceful_stop.set()
        receiver_thread.join(timeout=5)
        receiver_graceful_stop.clear()