Esempio n. 1
0
def __handle_requests(reqs, suspicious_patterns, prepend_str=''):
    """
    Used by finisher to handle terminated requests,

    :param reqs: List of requests.
    :param suspicious_patterns: List of suspicious patterns.
    :param prepend_str: String to prepend to logging.
    """

    undeterministic_rses = __get_undeterministic_rses()
    rses_info, protocols = {}, {}
    replicas = {}
    for req in reqs:
        try:
            replica = {'scope': req['scope'], 'name': req['name'], 'rse_id': req['dest_rse_id'], 'bytes': req['bytes'], 'adler32': req['adler32'], 'request_id': req['request_id']}

            replica['pfn'] = req['dest_url']
            replica['request_type'] = req['request_type']
            replica['error_message'] = None

            if req['request_type'] not in replicas:
                replicas[req['request_type']] = {}
            if req['rule_id'] not in replicas[req['request_type']]:
                replicas[req['request_type']][req['rule_id']] = []

            if req['state'] == RequestState.DONE:
                replica['state'] = ReplicaState.AVAILABLE
                replica['archived'] = False

                # for TAPE, replica path is needed
                if req['request_type'] in (RequestType.TRANSFER, RequestType.STAGEIN) and req['dest_rse_id'] in undeterministic_rses:
                    if req['dest_rse_id'] not in rses_info:
                        dest_rse = get_rse_name(rse_id=req['dest_rse_id'])
                        rses_info[req['dest_rse_id']] = rsemanager.get_rse_info(dest_rse)
                    pfn = req['dest_url']
                    scheme = urlparse(pfn).scheme
                    dest_rse_id_scheme = '%s_%s' % (req['dest_rse_id'], scheme)
                    if dest_rse_id_scheme not in protocols:
                        protocols[dest_rse_id_scheme] = rsemanager.create_protocol(rses_info[req['dest_rse_id']], 'write', scheme)
                    path = protocols[dest_rse_id_scheme].parse_pfns([pfn])[pfn]['path']
                    replica['path'] = os.path.join(path, os.path.basename(pfn))

                # replica should not be added to replicas until all info are filled
                replicas[req['request_type']][req['rule_id']].append(replica)

            elif req['state'] == RequestState.FAILED:
                __check_suspicious_files(req, suspicious_patterns)
                if request_core.should_retry_request(req):
                    tss = time.time()
                    new_req = request_core.requeue_and_archive(req['request_id'])
                    record_timer('daemons.conveyor.common.update_request_state.request-requeue_and_archive', (time.time() - tss) * 1000)
                    if new_req:
                        # should_retry_request and requeue_and_archive are not in one session,
                        # another process can requeue_and_archive and this one will return None.
                        logging.warn(prepend_str + 'REQUEUED DID %s:%s REQUEST %s AS %s TRY %s' % (req['scope'],
                                                                                                   req['name'],
                                                                                                   req['request_id'],
                                                                                                   new_req['request_id'],
                                                                                                   new_req['retry_count']))
                else:
                    logging.warn(prepend_str + 'EXCEEDED DID %s:%s REQUEST %s' % (req['scope'], req['name'], req['request_id']))
                    replica['state'] = ReplicaState.UNAVAILABLE
                    replica['archived'] = False
                    replica['error_message'] = req['err_msg'] if req['err_msg'] else request_core.get_transfer_error(req['state'])
                    replicas[req['request_type']][req['rule_id']].append(replica)
            elif req['state'] == RequestState.SUBMITTING or req['state'] == RequestState.SUBMISSION_FAILED or req['state'] == RequestState.LOST:
                if req['updated_at'] > (datetime.datetime.utcnow() - datetime.timedelta(minutes=120)):
                    continue

                if request_core.should_retry_request(req):
                    tss = time.time()
                    new_req = request_core.requeue_and_archive(req['request_id'])
                    record_timer('daemons.conveyor.common.update_request_state.request-requeue_and_archive', (time.time() - tss) * 1000)
                    if new_req:
                        logging.warn(prepend_str + 'REQUEUED SUBMITTING DID %s:%s REQUEST %s AS %s TRY %s' % (req['scope'],
                                                                                                              req['name'],
                                                                                                              req['request_id'],
                                                                                                              new_req['request_id'],
                                                                                                              new_req['retry_count']))
                else:
                    logging.warn(prepend_str + 'EXCEEDED SUBMITTING DID %s:%s REQUEST %s' % (req['scope'], req['name'], req['request_id']))
                    replica['state'] = ReplicaState.UNAVAILABLE
                    replica['archived'] = False
                    replica['error_message'] = req['err_msg'] if req['err_msg'] else request_core.get_transfer_error(req['state'])
                    replicas[req['request_type']][req['rule_id']].append(replica)
            elif req['state'] == RequestState.NO_SOURCES or req['state'] == RequestState.ONLY_TAPE_SOURCES or req['state'] == RequestState.MISMATCH_SCHEME:
                if request_core.should_retry_request(req):
                    tss = time.time()
                    new_req = request_core.requeue_and_archive(req['request_id'])
                    record_timer('daemons.conveyor.common.update_request_state.request-requeue_and_archive', (time.time() - tss) * 1000)
                    if new_req:
                        # should_retry_request and requeue_and_archive are not in one session,
                        # another process can requeue_and_archive and this one will return None.
                        logging.warn(prepend_str + 'REQUEUED DID %s:%s REQUEST %s AS %s TRY %s' % (req['scope'],
                                                                                                   req['name'],
                                                                                                   req['request_id'],
                                                                                                   new_req['request_id'],
                                                                                                   new_req['retry_count']))
                else:
                    logging.warn(prepend_str + 'EXCEEDED DID %s:%s REQUEST %s' % (req['scope'], req['name'], req['request_id']))
                    replica['state'] = ReplicaState.UNAVAILABLE  # should be broken here
                    replica['archived'] = False
                    replica['error_message'] = req['err_msg'] if req['err_msg'] else request_core.get_transfer_error(req['state'])
                    replicas[req['request_type']][req['rule_id']].append(replica)

        except Exception as error:
            logging.error(prepend_str + "Something unexpected happened when handling request %s(%s:%s) at %s: %s" % (req['request_id'],
                                                                                                                     req['scope'],
                                                                                                                     req['name'],
                                                                                                                     req['dest_rse_id'],
                                                                                                                     str(error)))

    __handle_terminated_replicas(replicas, prepend_str)
Esempio n. 2
0
def __handle_requests(reqs,
                      suspicious_patterns,
                      retry_protocol_mismatches,
                      logger=logging.log):
    """
    Used by finisher to handle terminated requests,

    :param reqs:                         List of requests.
    :param suspicious_patterns:          List of suspicious patterns.
    :param retry_protocol_mismatches:    Boolean to retry the transfer in case of protocol mismatch.
    :param prepend_str: String to prepend to logging.
    """

    failed_during_submission = [
        RequestState.SUBMITTING, RequestState.SUBMISSION_FAILED,
        RequestState.LOST
    ]
    failed_no_submission_attempts = [
        RequestState.NO_SOURCES, RequestState.ONLY_TAPE_SOURCES,
        RequestState.MISMATCH_SCHEME
    ]
    undeterministic_rses = __get_undeterministic_rses(logger=logger)
    rses_info, protocols = {}, {}
    replicas = {}
    for req in reqs:
        try:
            replica = {
                'scope': req['scope'],
                'name': req['name'],
                'rse_id': req['dest_rse_id'],
                'bytes': req['bytes'],
                'adler32': req['adler32'],
                'request_id': req['request_id']
            }

            replica['pfn'] = req['dest_url']
            replica['request_type'] = req['request_type']
            replica['error_message'] = None

            if req['request_type'] not in replicas:
                replicas[req['request_type']] = {}
            if req['rule_id'] not in replicas[req['request_type']]:
                replicas[req['request_type']][req['rule_id']] = []

            if req['state'] == RequestState.DONE:
                replica['state'] = ReplicaState.AVAILABLE
                replica['archived'] = False

                # for TAPE, replica path is needed
                if req['request_type'] in (
                        RequestType.TRANSFER, RequestType.STAGEIN
                ) and req['dest_rse_id'] in undeterministic_rses:
                    if req['dest_rse_id'] not in rses_info:
                        rses_info[
                            req['dest_rse_id']] = rsemanager.get_rse_info(
                                rse_id=req['dest_rse_id'])
                    pfn = req['dest_url']
                    scheme = urlparse(pfn).scheme
                    dest_rse_id_scheme = '%s_%s' % (req['dest_rse_id'], scheme)
                    if dest_rse_id_scheme not in protocols:
                        protocols[
                            dest_rse_id_scheme] = rsemanager.create_protocol(
                                rses_info[req['dest_rse_id']], 'write', scheme)
                    path = protocols[dest_rse_id_scheme].parse_pfns(
                        [pfn])[pfn]['path']
                    replica['path'] = os.path.join(path, os.path.basename(pfn))

                # replica should not be added to replicas until all info are filled
                replicas[req['request_type']][req['rule_id']].append(replica)

            # Standard failure from the transfer tool
            elif req['state'] == RequestState.FAILED:
                __check_suspicious_files(req,
                                         suspicious_patterns,
                                         logger=logger)
                tss = time.time()
                try:
                    if request_core.should_retry_request(
                            req, retry_protocol_mismatches):
                        new_req = request_core.requeue_and_archive(
                            req,
                            source_ranking_update=True,
                            retry_protocol_mismatches=retry_protocol_mismatches,
                            logger=logger)
                        # should_retry_request and requeue_and_archive are not in one session,
                        # another process can requeue_and_archive and this one will return None.
                        record_timer(
                            'daemons.conveyor.common.update_request_state.request_requeue_and_archive',
                            (time.time() - tss) * 1000)
                        logger(
                            logging.WARNING,
                            'REQUEUED DID %s:%s REQUEST %s AS %s TRY %s' %
                            (req['scope'], req['name'], req['request_id'],
                             new_req['request_id'], new_req['retry_count']))
                    else:
                        # No new_req is return if should_retry_request returns False
                        logger(
                            logging.WARNING,
                            'EXCEEDED SUBMITTING DID %s:%s REQUEST %s in state %s',
                            req['scope'], req['name'], req['request_id'],
                            req['state'])
                        replica['state'] = ReplicaState.UNAVAILABLE
                        replica['archived'] = False
                        replica['error_message'] = req['err_msg'] if req[
                            'err_msg'] else request_core.get_transfer_error(
                                req['state'])
                        replicas[req['request_type']][req['rule_id']].append(
                            replica)
                except RequestNotFound:
                    logger(logging.WARNING, 'Cannot find request %s anymore',
                           req['request_id'])

            # All other failures
            elif req['state'] in failed_during_submission or req[
                    'state'] in failed_no_submission_attempts:
                if req['state'] in failed_during_submission and req[
                        'updated_at'] > (datetime.datetime.utcnow() -
                                         datetime.timedelta(minutes=120)):
                    # To prevent race conditions
                    continue
                try:
                    tss = time.time()
                    if request_core.should_retry_request(
                            req, retry_protocol_mismatches):
                        new_req = request_core.requeue_and_archive(
                            req,
                            source_ranking_update=False,
                            retry_protocol_mismatches=retry_protocol_mismatches,
                            logger=logger)
                        record_timer(
                            'daemons.conveyor.common.update_request_state.request_requeue_and_archive',
                            (time.time() - tss) * 1000)
                        logger(
                            logging.WARNING,
                            'REQUEUED SUBMITTING DID %s:%s REQUEST %s AS %s TRY %s'
                            % (req['scope'], req['name'], req['request_id'],
                               new_req['request_id'], new_req['retry_count']))
                    else:
                        # No new_req is return if should_retry_request returns False
                        logger(
                            logging.WARNING,
                            'EXCEEDED SUBMITTING DID %s:%s REQUEST %s in state %s',
                            req['scope'], req['name'], req['request_id'],
                            req['state'])
                        replica['state'] = ReplicaState.UNAVAILABLE
                        replica['archived'] = False
                        replica['error_message'] = req['err_msg'] if req[
                            'err_msg'] else request_core.get_transfer_error(
                                req['state'])
                        replicas[req['request_type']][req['rule_id']].append(
                            replica)
                except RequestNotFound:
                    logger(logging.WARNING, 'Cannot find request %s anymore',
                           req['request_id'])

        except Exception as error:
            logger(
                logging.ERROR,
                "Something unexpected happened when handling request %s(%s:%s) at %s: %s"
                % (req['request_id'], req['scope'], req['name'],
                   req['dest_rse_id'], str(error)))

    __handle_terminated_replicas(replicas, logger=logger)
Esempio n. 3
0
def update_transfer_state(external_host, transfer_id, state, logging_prepend_str=None, session=None):
    """
    Used by poller to update the internal state of transfer,
    after the response by the external transfertool.

    :param request_host:          Name of the external host.
    :param transfer_id:           External transfer job id as a string.
    :param state:                 Request state as a string.
    :param logging_prepend_str:   String to prepend to the logging
    :param session:               The database session to use.
    :returns commit_or_rollback:  Boolean.
    """

    prepend_str = ''
    if logging_prepend_str:
        prepend_str = logging_prepend_str
    try:
        if state == RequestState.LOST:
            reqs = request_core.get_requests_by_transfer(external_host, transfer_id, session=session)
            for req in reqs:
                logging.info(prepend_str + 'REQUEST %s OF TRANSFER %s ON %s STATE %s' % (str(req['request_id']), external_host, transfer_id, str(state)))
                src_rse_id = req.get('source_rse_id', None)
                dst_rse_id = req.get('dest_rse_id', None)
                src_rse = None
                dst_rse = None
                if src_rse_id:
                    src_rse = get_rse_name(src_rse_id, session=session)
                if dst_rse_id:
                    dst_rse = get_rse_name(dst_rse_id, session=session)
                response = {'new_state': state,
                            'transfer_id': transfer_id,
                            'job_state': state,
                            'src_url': None,
                            'dst_url': req['dest_url'],
                            'duration': 0,
                            'reason': "The FTS job lost",
                            'scope': req.get('scope', None),
                            'name': req.get('name', None),
                            'src_rse': src_rse,
                            'dst_rse': dst_rse,
                            'request_id': req.get('request_id', None),
                            'activity': req.get('activity', None),
                            'src_rse_id': req.get('source_rse_id', None),
                            'dst_rse_id': req.get('dest_rse_id', None),
                            'previous_attempt_id': req.get('previous_attempt_id', None),
                            'adler32': req.get('adler32', None),
                            'md5': req.get('md5', None),
                            'filesize': req.get('filesize', None),
                            'external_host': external_host,
                            'job_m_replica': None,
                            'created_at': req.get('created_at', None),
                            'submitted_at': req.get('submitted_at', None),
                            'details': None,
                            'account': req.get('account', None)}

                err_msg = request_core.get_transfer_error(response['new_state'], response['reason'] if 'reason' in response else None)
                request_core.set_request_state(req['request_id'],
                                               response['new_state'],
                                               transfer_id=transfer_id,
                                               src_rse_id=src_rse_id,
                                               err_msg=err_msg,
                                               session=session)

                request_core.add_monitor_message(req, response, session=session)
        else:
            __set_transfer_state(external_host, transfer_id, state, session=session)
        return True
    except UnsupportedOperation as error:
        logging.warning(prepend_str + "Transfer %s on %s doesn't exist - Error: %s" % (transfer_id, external_host, str(error).replace('\n', '')))
        return False