Esempio n. 1
0
def get_stagein_requests_and_source_replicas(total_workers=0, worker_number=0, failover_schemes=None, limit=None, activity=None, older_than=None,
                                             rses=None, mock=False, schemes=None, bring_online=43200, retry_other_fts=False, logging_prepend_str='', session=None):
    """
    Get staging requests and the associated source replicas

    :param total_workers:         Number of total workers.
    :param worker_number:         Id of the executing worker.
    :param failover_schemes:      Failover schemes.
    :param limit:                 Limit.
    :param activity:              Activity.
    :param older_than:            Get transfers older than.
    :param rses:                  Include RSES.
    :param mock:                  Mock testing.
    :param schemes:               Include schemes.
    :param bring_online:          Bring online timeout.
    :parm retry_other_fts:        Retry other fts servers.
    :param logging_prepend_str:   String to prepend to the logging
    :session:                     The database session in use.
    :returns:                     transfers, reqs_no_source, reqs_scheme_mismatch, reqs_only_tape_source
    """

    prepend_str = ''
    if logging_prepend_str:
        prepend_str = logging_prepend_str

    req_sources = request.list_stagein_requests_and_source_replicas(total_workers=total_workers,
                                                                    worker_number=worker_number,
                                                                    limit=limit,
                                                                    activity=activity,
                                                                    older_than=older_than,
                                                                    rses=rses,
                                                                    session=session)

    transfers, rses_info, protocols, rse_attrs, reqs_no_source = {}, {}, {}, {}, []
    for req_id, rule_id, scope, name, md5, adler32, bytes, activity, attributes, dest_rse_id, source_rse_id, rse, deterministic, rse_type, path, staging_buffer, retry_count, previous_attempt_id, src_url, ranking in req_sources:
        try:
            if rses and dest_rse_id not in rses:
                continue

            current_schemes = schemes
            if previous_attempt_id and failover_schemes:
                current_schemes = failover_schemes

            if req_id not in transfers:
                if req_id not in reqs_no_source:
                    reqs_no_source.append(req_id)

                if not src_url:
                    # source_rse_id will be None if no source replicas
                    # rse will be None if rse is staging area
                    # staging_buffer will be None if rse has no key 'staging_buffer'
                    if source_rse_id is None or rse is None or staging_buffer is None:
                        continue

                    # Get destination rse information and protocol
                    dest_rse_name = get_rse_name(rse_id=dest_rse_id, session=session)
                    if dest_rse_id not in rses_info:
                        rses_info[dest_rse_id] = rsemgr.get_rse_info(rse_id=dest_rse_id, session=session)

                    if staging_buffer != dest_rse_id:
                        continue

                    attr = None
                    if attributes:
                        if isinstance(attributes, dict):
                            attr = json.loads(json.dumps(attributes))
                        else:
                            attr = json.loads(str(attributes))

                    source_replica_expression = attr["source_replica_expression"] if "source_replica_expression" in attr else None
                    if source_replica_expression:
                        try:
                            parsed_rses = parse_expression(source_replica_expression, filter={'vo': scope.vo}, session=session)
                        except InvalidRSEExpression as error:
                            logging.error(prepend_str + "Invalid RSE exception %s: %s" % (source_replica_expression, error))
                            continue
                        else:
                            allowed_rses = [x['id'] for x in parsed_rses]
                            if source_rse_id not in allowed_rses:
                                continue

                    source_rse_name = get_rse_name(rse_id=source_rse_id, session=session)
                    if source_rse_id not in rses_info:
                        rses_info[source_rse_id] = rsemgr.get_rse_info(rse_id=source_rse_id, session=session)
                    if source_rse_id not in rse_attrs:
                        rse_attrs[source_rse_id] = get_rse_attributes(rse_id=source_rse_id, session=session)

                    if source_rse_id not in protocols:
                        protocols[source_rse_id] = rsemgr.create_protocol(rses_info[source_rse_id], 'write', current_schemes)

                    # we need to set the spacetoken if we use SRM
                    dest_spacetoken = None
                    if protocols[source_rse_id].attributes and \
                       'extended_attributes' in protocols[source_rse_id].attributes and \
                       protocols[source_rse_id].attributes['extended_attributes'] and \
                       'space_token' in protocols[source_rse_id].attributes['extended_attributes']:
                        dest_spacetoken = protocols[source_rse_id].attributes['extended_attributes']['space_token']

                    source_url = protocols[source_rse_id].lfns2pfns(lfns={'scope': scope.external, 'name': name, 'path': path}).values()[0]
                else:
                    # source_rse_id will be None if no source replicas
                    # rse will be None if rse is staging area
                    # staging_buffer will be None if rse has no key 'staging_buffer'
                    if source_rse_id is None or rse is None or staging_buffer is None:
                        continue

                    attr = None
                    if attributes:
                        if type(attributes) is dict:
                            attr = json.loads(json.dumps(attributes))
                        else:
                            attr = json.loads(str(attributes))

                    # to get space token and fts attribute
                    source_rse_name = get_rse_name(rse_id=source_rse_id, session=session)
                    if source_rse_id not in rses_info:
                        rses_info[source_rse_id] = rsemgr.get_rse_info(rse_id=source_rse_id, session=session)
                    if source_rse_id not in rse_attrs:
                        rse_attrs[source_rse_id] = get_rse_attributes(rse_id=source_rse_id, session=session)

                    if source_rse_id not in protocols:
                        protocols[source_rse_id] = rsemgr.create_protocol(rses_info[source_rse_id], 'write', current_schemes)

                    # we need to set the spacetoken if we use SRM
                    dest_spacetoken = None
                    if protocols[source_rse_id].attributes and \
                       'extended_attributes' in protocols[source_rse_id].attributes and \
                       protocols[source_rse_id].attributes['extended_attributes'] and \
                       'space_token' in protocols[source_rse_id].attributes['extended_attributes']:
                        dest_spacetoken = protocols[source_rse_id].attributes['extended_attributes']['space_token']
                    source_url = src_url

                fts_hosts = rse_attrs[source_rse_id].get('fts', None)
                if not fts_hosts:
                    logging.error(prepend_str + 'Source RSE %s FTS attribute not defined - SKIP REQUEST %s' % (rse, req_id))
                    continue
                if not retry_count:
                    retry_count = 0
                fts_list = fts_hosts.split(",")

                external_host = fts_list[0]
                if retry_other_fts:
                    external_host = fts_list[retry_count % len(fts_list)]

                if req_id in reqs_no_source:
                    reqs_no_source.remove(req_id)

                file_metadata = {'request_id': req_id,
                                 'scope': scope,
                                 'name': name,
                                 'activity': activity,
                                 'request_type': str(RequestType.STAGEIN).lower(),
                                 'src_type': "TAPE",
                                 'dst_type': "DISK",
                                 'src_rse': source_rse_name,
                                 'dst_rse': dest_rse_name,
                                 'src_rse_id': source_rse_id,
                                 'dest_rse_id': dest_rse_id,
                                 'filesize': bytes,
                                 'md5': md5,
                                 'adler32': adler32}
                if previous_attempt_id:
                    file_metadata['previous_attempt_id'] = previous_attempt_id

                transfers[req_id] = {'request_id': req_id,
                                     # 'src_urls': [source_url],
                                     'sources': [(rse, source_url, source_rse_id, ranking)],
                                     'dest_urls': [source_url],
                                     'src_spacetoken': None,
                                     'dest_spacetoken': dest_spacetoken,
                                     'overwrite': False,
                                     'bring_online': bring_online,
                                     'copy_pin_lifetime': attr.get('lifetime', -1) if attr else -1,
                                     'external_host': external_host,
                                     'selection_strategy': 'auto',
                                     'rule_id': rule_id,
                                     'file_metadata': file_metadata}
                logging.debug(prepend_str + "Transfer for request(%s): %s" % (req_id, transfers[req_id]))
        except Exception:
            logging.critical(prepend_str + "Exception happened when trying to get transfer for request %s: %s" % (req_id, traceback.format_exc()))
            break

    return transfers, reqs_no_source
Esempio n. 2
0
def get_transfer_requests_and_source_replicas(total_workers=0, worker_number=0, limit=None, activity=None, older_than=None, rses=None, schemes=None,
                                              bring_online=43200, retry_other_fts=False, failover_schemes=None, session=None):
    """
    Get transfer requests and the associated source replicas

    :param total_workers:         Number of total workers.
    :param worker_number:         Id of the executing worker.
    :param limit:                 Limit.
    :param activity:              Activity.
    :param older_than:            Get transfers older than.
    :param rses:                  Include RSES.
    :param schemes:               Include schemes.
    :param bring_online:          Bring online timeout.
    :parm retry_other_fts:        Retry other fts servers.
    :param failover_schemes:      Failover schemes.
    :session:                     The database session in use.
    :returns:                     transfers, reqs_no_source, reqs_scheme_mismatch, reqs_only_tape_source
    """

    req_sources = __list_transfer_requests_and_source_replicas(total_workers=total_workers,
                                                               worker_number=worker_number,
                                                               limit=limit,
                                                               activity=activity,
                                                               older_than=older_than,
                                                               rses=rses,
                                                               session=session)

    unavailable_read_rse_ids = __get_unavailable_read_rse_ids(session=session)

    bring_online_local = bring_online
    transfers, rses_info, protocols, rse_attrs, reqs_no_source, reqs_only_tape_source, reqs_scheme_mismatch = {}, {}, {}, {}, [], [], []
    for req_id, rule_id, scope, name, md5, adler32, bytes, activity, attributes, previous_attempt_id, dest_rse_id, source_rse_id, rse, deterministic, rse_type, path, retry_count, src_url, ranking, link_ranking in req_sources:
        transfer_src_type = "DISK"
        transfer_dst_type = "DISK"
        allow_tape_source = True
        try:
            if rses and dest_rse_id not in rses:
                continue

            current_schemes = schemes
            if previous_attempt_id and failover_schemes:
                current_schemes = failover_schemes

            if req_id not in transfers:
                if req_id not in reqs_no_source:
                    reqs_no_source.append(req_id)

                # source_rse_id will be None if no source replicas
                # rse will be None if rse is staging area
                if source_rse_id is None or rse is None:
                    continue

                if link_ranking is None:
                    logging.debug("Request %s: no link from %s to %s" % (req_id, source_rse_id, dest_rse_id))
                    continue

                if source_rse_id in unavailable_read_rse_ids:
                    continue

                # Get destination rse information
                if dest_rse_id not in rses_info:
                    dest_rse = get_rse_name(rse_id=dest_rse_id, session=session)
                    rses_info[dest_rse_id] = rsemgr.get_rse_info(dest_rse, session=session)
                if dest_rse_id not in rse_attrs:
                    rse_attrs[dest_rse_id] = get_rse_attributes(dest_rse_id, session=session)

                # Get the source rse information
                if source_rse_id not in rses_info:
                    source_rse = get_rse_name(rse_id=source_rse_id, session=session)
                    rses_info[source_rse_id] = rsemgr.get_rse_info(source_rse, session=session)
                if source_rse_id not in rse_attrs:
                    rse_attrs[source_rse_id] = get_rse_attributes(source_rse_id, session=session)

                attr = None
                if attributes:
                    if type(attributes) is dict:
                        attr = json.loads(json.dumps(attributes))
                    else:
                        attr = json.loads(str(attributes))

                # parse source expression
                source_replica_expression = attr["source_replica_expression"] if (attr and "source_replica_expression" in attr) else None
                if source_replica_expression:
                    try:
                        parsed_rses = parse_expression(source_replica_expression, session=session)
                    except InvalidRSEExpression as error:
                        logging.error("Invalid RSE exception %s: %s" % (source_replica_expression, error))
                        continue
                    else:
                        allowed_rses = [x['rse'] for x in parsed_rses]
                        if rse not in allowed_rses:
                            continue

                # parse allow tape source expression, not finally version.
                # allow_tape_source = attr["allow_tape_source"] if (attr and "allow_tape_source" in attr) else True
                allow_tape_source = True

                # Find matching scheme between destination and source
                try:
                    matching_scheme = rsemgr.find_matching_scheme(rse_settings_dest=rses_info[dest_rse_id],
                                                                  rse_settings_src=rses_info[source_rse_id],
                                                                  operation_src='third_party_copy',
                                                                  operation_dest='third_party_copy',
                                                                  domain='wan',
                                                                  scheme=current_schemes)
                except RSEProtocolNotSupported:
                    logging.error('No matching schemes in %s for operation "third_party_copy" between %s and %s' % (current_schemes, rses_info[source_rse_id]['rse'], rses_info[dest_rse_id]['rse']))
                    if req_id in reqs_no_source:
                        reqs_no_source.remove(req_id)
                    if req_id not in reqs_scheme_mismatch:
                        reqs_scheme_mismatch.append(req_id)
                    continue

                # Get destination protocol
                dest_rse_id_key = '%s_%s' % (dest_rse_id, matching_scheme[0])
                if dest_rse_id_key not in protocols:
                    try:
                        protocols[dest_rse_id_key] = rsemgr.create_protocol(rses_info[dest_rse_id], 'third_party_copy', matching_scheme[0])
                    except RSEProtocolNotSupported:
                        logging.error('Operation "third_party_copy" not supported by dest_rse %s with schemes %s' % (rses_info[dest_rse_id]['rse'], current_schemes))
                        if req_id in reqs_no_source:
                            reqs_no_source.remove(req_id)
                        if req_id not in reqs_scheme_mismatch:
                            reqs_scheme_mismatch.append(req_id)
                        continue

                # get dest space token
                dest_spacetoken = None
                if protocols[dest_rse_id_key].attributes and \
                   'extended_attributes' in protocols[dest_rse_id_key].attributes and \
                   protocols[dest_rse_id_key].attributes['extended_attributes'] and \
                   'space_token' in protocols[dest_rse_id_key].attributes['extended_attributes']:
                    dest_spacetoken = protocols[dest_rse_id_key].attributes['extended_attributes']['space_token']

                # Compute the destination url
                if rses_info[dest_rse_id]['deterministic']:
                    dest_url = list(protocols[dest_rse_id_key].lfns2pfns(lfns={'scope': scope, 'name': name}).values())[0]
                else:
                    # compute dest url in case of non deterministic
                    # naming convention, etc.
                    dsn = 'other'
                    if attr and 'ds_name' in attr:
                        dsn = attr["ds_name"]

                    else:
                        # select a containing dataset
                        for parent in did.list_parent_dids(scope, name):
                            if parent['type'] == DIDType.DATASET:
                                dsn = parent['name']
                                break
                    # DQ2 path always starts with /, but prefix might not end with /
                    naming_convention = rse_attrs[dest_rse_id].get('naming_convention', None)
                    dest_path = construct_surl(dsn, name, naming_convention)
                    if rses_info[dest_rse_id]['rse_type'] == RSEType.TAPE or rses_info[dest_rse_id]['rse_type'] == 'TAPE':
                        if retry_count or activity == 'Recovery':
                            dest_path = '%s_%i' % (dest_path, int(time.time()))

                    dest_url = list(protocols[dest_rse_id_key].lfns2pfns(lfns={'scope': scope, 'name': name, 'path': dest_path}).values())[0]

                # Get source protocol
                source_rse_id_key = '%s_%s' % (source_rse_id, '_'.join([matching_scheme[0], matching_scheme[1]]))
                if source_rse_id_key not in protocols:
                    try:
                        protocols[source_rse_id_key] = rsemgr.create_protocol(rses_info[source_rse_id], 'third_party_copy', matching_scheme[1])
                    except RSEProtocolNotSupported:
                        logging.error('Operation "third_party_copy" not supported by source_rse %s with schemes %s' % (rses_info[source_rse_id]['rse'], matching_scheme[1]))
                        if req_id in reqs_no_source:
                            reqs_no_source.remove(req_id)
                        if req_id not in reqs_scheme_mismatch:
                            reqs_scheme_mismatch.append(req_id)
                        continue

                source_url = list(protocols[source_rse_id_key].lfns2pfns(lfns={'scope': scope, 'name': name, 'path': path}).values())[0]

                # Extend the metadata dictionary with request attributes
                overwrite, bring_online = True, None
                if rses_info[source_rse_id]['rse_type'] == RSEType.TAPE or rses_info[source_rse_id]['rse_type'] == 'TAPE':
                    bring_online = bring_online_local
                    transfer_src_type = "TAPE"
                    if not allow_tape_source:
                        if req_id not in reqs_only_tape_source:
                            reqs_only_tape_source.append(req_id)
                        if req_id in reqs_no_source:
                            reqs_no_source.remove(req_id)
                        continue

                if rses_info[dest_rse_id]['rse_type'] == RSEType.TAPE or rses_info[dest_rse_id]['rse_type'] == 'TAPE':
                    overwrite = False
                    transfer_dst_type = "TAPE"

                # get external_host
                fts_hosts = rse_attrs[dest_rse_id].get('fts', None)
                if not fts_hosts:
                    logging.error('Destination RSE %s FTS attribute not defined - SKIP REQUEST %s' % (dest_rse, req_id))
                    continue
                if retry_count is None:
                    retry_count = 0
                fts_list = fts_hosts.split(",")

                verify_checksum = 'both'
                if not rse_attrs[dest_rse_id].get('verify_checksum', True):
                    if not rse_attrs[source_rse_id].get('verify_checksum', True):
                        verify_checksum = 'none'
                    else:
                        verify_checksum = 'source'
                else:
                    if not rse_attrs[source_rse_id].get('verify_checksum', True):
                        verify_checksum = 'destination'
                    else:
                        verify_checksum = 'both'

                external_host = fts_list[0]
                if retry_other_fts:
                    external_host = fts_list[retry_count % len(fts_list)]

                file_metadata = {'request_id': req_id,
                                 'scope': scope,
                                 'name': name,
                                 'activity': activity,
                                 'request_type': str(RequestType.TRANSFER).lower(),
                                 'src_type': transfer_src_type,
                                 'dst_type': transfer_dst_type,
                                 'src_rse': rse,
                                 'dst_rse': rses_info[dest_rse_id]['rse'],
                                 'src_rse_id': source_rse_id,
                                 'dest_rse_id': dest_rse_id,
                                 'filesize': bytes,
                                 'md5': md5,
                                 'adler32': adler32,
                                 'verify_checksum': verify_checksum}

                if previous_attempt_id:
                    file_metadata['previous_attempt_id'] = previous_attempt_id

                transfers[req_id] = {'request_id': req_id,
                                     'schemes': __add_compatible_schemes(schemes=[matching_scheme[0]], allowed_schemes=current_schemes),
                                     # 'src_urls': [source_url],
                                     'sources': [(rse, source_url, source_rse_id, ranking if ranking is not None else 0, link_ranking)],
                                     'dest_urls': [dest_url],
                                     'src_spacetoken': None,
                                     'dest_spacetoken': dest_spacetoken,
                                     'overwrite': overwrite,
                                     'bring_online': bring_online,
                                     'copy_pin_lifetime': attr.get('lifetime', -1),
                                     'external_host': external_host,
                                     'selection_strategy': 'auto',
                                     'rule_id': rule_id,
                                     'file_metadata': file_metadata}
            else:
                current_schemes = transfers[req_id]['schemes']

                # source_rse_id will be None if no source replicas
                # rse will be None if rse is staging area
                if source_rse_id is None or rse is None:
                    continue

                if link_ranking is None:
                    logging.debug("Request %s: no link from %s to %s" % (req_id, source_rse_id, dest_rse_id))
                    continue

                if source_rse_id in unavailable_read_rse_ids:
                    continue

                attr = None
                if attributes:
                    if type(attributes) is dict:
                        attr = json.loads(json.dumps(attributes))
                    else:
                        attr = json.loads(str(attributes))

                # parse source expression
                source_replica_expression = attr["source_replica_expression"] if (attr and "source_replica_expression" in attr) else None
                if source_replica_expression:
                    try:
                        parsed_rses = parse_expression(source_replica_expression, session=session)
                    except InvalidRSEExpression as error:
                        logging.error("Invalid RSE exception %s: %s" % (source_replica_expression, error))
                        continue
                    else:
                        allowed_rses = [x['rse'] for x in parsed_rses]
                        if rse not in allowed_rses:
                            continue

                # parse allow tape source expression, not finally version.
                allow_tape_source = attr["allow_tape_source"] if (attr and "allow_tape_source" in attr) else True

                # Compute the source rse information
                if source_rse_id not in rses_info:
                    source_rse = get_rse_name(rse_id=source_rse_id, session=session)
                    rses_info[source_rse_id] = rsemgr.get_rse_info(source_rse, session=session)

                # Get protocol
                source_rse_id_key = '%s_%s' % (source_rse_id, '_'.join(current_schemes))
                if source_rse_id_key not in protocols:
                    try:
                        protocols[source_rse_id_key] = rsemgr.create_protocol(rses_info[source_rse_id], 'third_party_copy', current_schemes)
                    except RSEProtocolNotSupported:
                        logging.error('Operation "third_party_copy" not supported by %s with schemes %s' % (rses_info[source_rse_id]['rse'], current_schemes))
                        continue
                source_url = list(protocols[source_rse_id_key].lfns2pfns(lfns={'scope': scope, 'name': name, 'path': path}).values())[0]

                if ranking is None:
                    ranking = 0
                # TAPE should not mixed with Disk and should not use as first try
                # If there is a source whose ranking is no less than the Tape ranking, Tape will not be used.
                if rses_info[source_rse_id]['rse_type'] == RSEType.TAPE or rses_info[source_rse_id]['rse_type'] == 'TAPE':
                    # current src_rse is Tape
                    if not allow_tape_source:
                        continue
                    if not transfers[req_id]['bring_online']:
                        # the sources already founded are disks.

                        avail_top_ranking = None
                        founded_sources = transfers[req_id]['sources']
                        for founded_source in founded_sources:
                            if avail_top_ranking is None:
                                avail_top_ranking = founded_source[3]
                                continue
                            if founded_source[3] is not None and founded_source[3] > avail_top_ranking:
                                avail_top_ranking = founded_source[3]

                        if avail_top_ranking >= ranking:
                            # current Tape source is not the highest ranking, will use disk sources
                            continue
                        else:
                            transfers[req_id]['sources'] = []
                            transfers[req_id]['bring_online'] = bring_online_local
                            transfer_src_type = "TAPE"
                            transfers[req_id]['file_metadata']['src_type'] = transfer_src_type
                            transfers[req_id]['file_metadata']['src_rse'] = rse
                    else:
                        # the sources already founded is Tape too.
                        # multiple Tape source replicas are not allowed in FTS3.
                        if transfers[req_id]['sources'][0][3] > ranking or (transfers[req_id]['sources'][0][3] == ranking and transfers[req_id]['sources'][0][4] <= link_ranking):
                            continue
                        else:
                            transfers[req_id]['sources'] = []
                            transfers[req_id]['bring_online'] = bring_online_local
                            transfers[req_id]['file_metadata']['src_rse'] = rse
                else:
                    # current src_rse is Disk
                    if transfers[req_id]['bring_online']:
                        # the founded sources are Tape

                        avail_top_ranking = None
                        founded_sources = transfers[req_id]['sources']
                        for founded_source in founded_sources:
                            if avail_top_ranking is None:
                                avail_top_ranking = founded_source[3]
                                continue
                            if founded_source[3] is not None and founded_source[3] > avail_top_ranking:
                                avail_top_ranking = founded_source[3]

                        if ranking >= avail_top_ranking:
                            # current disk replica has higher ranking than founded sources
                            # remove founded Tape sources
                            transfers[req_id]['sources'] = []
                            transfers[req_id]['bring_online'] = None
                            transfer_src_type = "DISK"
                            transfers[req_id]['file_metadata']['src_type'] = transfer_src_type
                            transfers[req_id]['file_metadata']['src_rse'] = rse
                        else:
                            continue

                # transfers[id]['src_urls'].append((source_rse_id, source_url))
                transfers[req_id]['sources'].append((rse, source_url, source_rse_id, ranking, link_ranking))

        except Exception:
            logging.critical("Exception happened when trying to get transfer for request %s: %s" % (req_id, traceback.format_exc()))
            break

    for req_id in transfers:
        if req_id in reqs_no_source:
            reqs_no_source.remove(req_id)
        if req_id in reqs_only_tape_source:
            reqs_only_tape_source.remove(req_id)
        if req_id in reqs_scheme_mismatch:
            reqs_scheme_mismatch.remove(req_id)

    return transfers, reqs_no_source, reqs_scheme_mismatch, reqs_only_tape_source