def __handle_requests(reqs, suspicious_patterns, prepend_str=''): """ Used by finisher to handle terminated requests, :param reqs: List of requests. :param suspicious_patterns: List of suspicious patterns. :param prepend_str: String to prepend to logging. """ undeterministic_rses = __get_undeterministic_rses() rses_info, protocols = {}, {} replicas = {} for req in reqs: try: replica = {'scope': req['scope'], 'name': req['name'], 'rse_id': req['dest_rse_id'], 'bytes': req['bytes'], 'adler32': req['adler32'], 'request_id': req['request_id']} replica['pfn'] = req['dest_url'] replica['request_type'] = req['request_type'] replica['error_message'] = None if req['request_type'] not in replicas: replicas[req['request_type']] = {} if req['rule_id'] not in replicas[req['request_type']]: replicas[req['request_type']][req['rule_id']] = [] if req['state'] == RequestState.DONE: replica['state'] = ReplicaState.AVAILABLE replica['archived'] = False # for TAPE, replica path is needed if req['request_type'] in (RequestType.TRANSFER, RequestType.STAGEIN) and req['dest_rse_id'] in undeterministic_rses: if req['dest_rse_id'] not in rses_info: dest_rse = get_rse_name(rse_id=req['dest_rse_id']) rses_info[req['dest_rse_id']] = rsemanager.get_rse_info(dest_rse) pfn = req['dest_url'] scheme = urlparse(pfn).scheme dest_rse_id_scheme = '%s_%s' % (req['dest_rse_id'], scheme) if dest_rse_id_scheme not in protocols: protocols[dest_rse_id_scheme] = rsemanager.create_protocol(rses_info[req['dest_rse_id']], 'write', scheme) path = protocols[dest_rse_id_scheme].parse_pfns([pfn])[pfn]['path'] replica['path'] = os.path.join(path, os.path.basename(pfn)) # replica should not be added to replicas until all info are filled replicas[req['request_type']][req['rule_id']].append(replica) elif req['state'] == RequestState.FAILED: __check_suspicious_files(req, suspicious_patterns) if request_core.should_retry_request(req): tss = time.time() new_req = request_core.requeue_and_archive(req['request_id']) record_timer('daemons.conveyor.common.update_request_state.request-requeue_and_archive', (time.time() - tss) * 1000) if new_req: # should_retry_request and requeue_and_archive are not in one session, # another process can requeue_and_archive and this one will return None. logging.warn(prepend_str + 'REQUEUED DID %s:%s REQUEST %s AS %s TRY %s' % (req['scope'], req['name'], req['request_id'], new_req['request_id'], new_req['retry_count'])) else: logging.warn(prepend_str + 'EXCEEDED DID %s:%s REQUEST %s' % (req['scope'], req['name'], req['request_id'])) replica['state'] = ReplicaState.UNAVAILABLE replica['archived'] = False replica['error_message'] = req['err_msg'] if req['err_msg'] else request_core.get_transfer_error(req['state']) replicas[req['request_type']][req['rule_id']].append(replica) elif req['state'] == RequestState.SUBMITTING or req['state'] == RequestState.SUBMISSION_FAILED or req['state'] == RequestState.LOST: if req['updated_at'] > (datetime.datetime.utcnow() - datetime.timedelta(minutes=120)): continue if request_core.should_retry_request(req): tss = time.time() new_req = request_core.requeue_and_archive(req['request_id']) record_timer('daemons.conveyor.common.update_request_state.request-requeue_and_archive', (time.time() - tss) * 1000) if new_req: logging.warn(prepend_str + 'REQUEUED SUBMITTING DID %s:%s REQUEST %s AS %s TRY %s' % (req['scope'], req['name'], req['request_id'], new_req['request_id'], new_req['retry_count'])) else: logging.warn(prepend_str + 'EXCEEDED SUBMITTING DID %s:%s REQUEST %s' % (req['scope'], req['name'], req['request_id'])) replica['state'] = ReplicaState.UNAVAILABLE replica['archived'] = False replica['error_message'] = req['err_msg'] if req['err_msg'] else request_core.get_transfer_error(req['state']) replicas[req['request_type']][req['rule_id']].append(replica) elif req['state'] == RequestState.NO_SOURCES or req['state'] == RequestState.ONLY_TAPE_SOURCES or req['state'] == RequestState.MISMATCH_SCHEME: if request_core.should_retry_request(req): tss = time.time() new_req = request_core.requeue_and_archive(req['request_id']) record_timer('daemons.conveyor.common.update_request_state.request-requeue_and_archive', (time.time() - tss) * 1000) if new_req: # should_retry_request and requeue_and_archive are not in one session, # another process can requeue_and_archive and this one will return None. logging.warn(prepend_str + 'REQUEUED DID %s:%s REQUEST %s AS %s TRY %s' % (req['scope'], req['name'], req['request_id'], new_req['request_id'], new_req['retry_count'])) else: logging.warn(prepend_str + 'EXCEEDED DID %s:%s REQUEST %s' % (req['scope'], req['name'], req['request_id'])) replica['state'] = ReplicaState.UNAVAILABLE # should be broken here replica['archived'] = False replica['error_message'] = req['err_msg'] if req['err_msg'] else request_core.get_transfer_error(req['state']) replicas[req['request_type']][req['rule_id']].append(replica) except Exception as error: logging.error(prepend_str + "Something unexpected happened when handling request %s(%s:%s) at %s: %s" % (req['request_id'], req['scope'], req['name'], req['dest_rse_id'], str(error))) __handle_terminated_replicas(replicas, prepend_str)
def __handle_requests(reqs, suspicious_patterns, retry_protocol_mismatches, logger=logging.log): """ Used by finisher to handle terminated requests, :param reqs: List of requests. :param suspicious_patterns: List of suspicious patterns. :param retry_protocol_mismatches: Boolean to retry the transfer in case of protocol mismatch. :param prepend_str: String to prepend to logging. """ failed_during_submission = [ RequestState.SUBMITTING, RequestState.SUBMISSION_FAILED, RequestState.LOST ] failed_no_submission_attempts = [ RequestState.NO_SOURCES, RequestState.ONLY_TAPE_SOURCES, RequestState.MISMATCH_SCHEME ] undeterministic_rses = __get_undeterministic_rses(logger=logger) rses_info, protocols = {}, {} replicas = {} for req in reqs: try: replica = { 'scope': req['scope'], 'name': req['name'], 'rse_id': req['dest_rse_id'], 'bytes': req['bytes'], 'adler32': req['adler32'], 'request_id': req['request_id'] } replica['pfn'] = req['dest_url'] replica['request_type'] = req['request_type'] replica['error_message'] = None if req['request_type'] not in replicas: replicas[req['request_type']] = {} if req['rule_id'] not in replicas[req['request_type']]: replicas[req['request_type']][req['rule_id']] = [] if req['state'] == RequestState.DONE: replica['state'] = ReplicaState.AVAILABLE replica['archived'] = False # for TAPE, replica path is needed if req['request_type'] in ( RequestType.TRANSFER, RequestType.STAGEIN ) and req['dest_rse_id'] in undeterministic_rses: if req['dest_rse_id'] not in rses_info: rses_info[ req['dest_rse_id']] = rsemanager.get_rse_info( rse_id=req['dest_rse_id']) pfn = req['dest_url'] scheme = urlparse(pfn).scheme dest_rse_id_scheme = '%s_%s' % (req['dest_rse_id'], scheme) if dest_rse_id_scheme not in protocols: protocols[ dest_rse_id_scheme] = rsemanager.create_protocol( rses_info[req['dest_rse_id']], 'write', scheme) path = protocols[dest_rse_id_scheme].parse_pfns( [pfn])[pfn]['path'] replica['path'] = os.path.join(path, os.path.basename(pfn)) # replica should not be added to replicas until all info are filled replicas[req['request_type']][req['rule_id']].append(replica) # Standard failure from the transfer tool elif req['state'] == RequestState.FAILED: __check_suspicious_files(req, suspicious_patterns, logger=logger) tss = time.time() try: if request_core.should_retry_request( req, retry_protocol_mismatches): new_req = request_core.requeue_and_archive( req, source_ranking_update=True, retry_protocol_mismatches=retry_protocol_mismatches, logger=logger) # should_retry_request and requeue_and_archive are not in one session, # another process can requeue_and_archive and this one will return None. record_timer( 'daemons.conveyor.common.update_request_state.request_requeue_and_archive', (time.time() - tss) * 1000) logger( logging.WARNING, 'REQUEUED DID %s:%s REQUEST %s AS %s TRY %s' % (req['scope'], req['name'], req['request_id'], new_req['request_id'], new_req['retry_count'])) else: # No new_req is return if should_retry_request returns False logger( logging.WARNING, 'EXCEEDED SUBMITTING DID %s:%s REQUEST %s in state %s', req['scope'], req['name'], req['request_id'], req['state']) replica['state'] = ReplicaState.UNAVAILABLE replica['archived'] = False replica['error_message'] = req['err_msg'] if req[ 'err_msg'] else request_core.get_transfer_error( req['state']) replicas[req['request_type']][req['rule_id']].append( replica) except RequestNotFound: logger(logging.WARNING, 'Cannot find request %s anymore', req['request_id']) # All other failures elif req['state'] in failed_during_submission or req[ 'state'] in failed_no_submission_attempts: if req['state'] in failed_during_submission and req[ 'updated_at'] > (datetime.datetime.utcnow() - datetime.timedelta(minutes=120)): # To prevent race conditions continue try: tss = time.time() if request_core.should_retry_request( req, retry_protocol_mismatches): new_req = request_core.requeue_and_archive( req, source_ranking_update=False, retry_protocol_mismatches=retry_protocol_mismatches, logger=logger) record_timer( 'daemons.conveyor.common.update_request_state.request_requeue_and_archive', (time.time() - tss) * 1000) logger( logging.WARNING, 'REQUEUED SUBMITTING DID %s:%s REQUEST %s AS %s TRY %s' % (req['scope'], req['name'], req['request_id'], new_req['request_id'], new_req['retry_count'])) else: # No new_req is return if should_retry_request returns False logger( logging.WARNING, 'EXCEEDED SUBMITTING DID %s:%s REQUEST %s in state %s', req['scope'], req['name'], req['request_id'], req['state']) replica['state'] = ReplicaState.UNAVAILABLE replica['archived'] = False replica['error_message'] = req['err_msg'] if req[ 'err_msg'] else request_core.get_transfer_error( req['state']) replicas[req['request_type']][req['rule_id']].append( replica) except RequestNotFound: logger(logging.WARNING, 'Cannot find request %s anymore', req['request_id']) except Exception as error: logger( logging.ERROR, "Something unexpected happened when handling request %s(%s:%s) at %s: %s" % (req['request_id'], req['scope'], req['name'], req['dest_rse_id'], str(error))) __handle_terminated_replicas(replicas, logger=logger)
def update_transfer_state(external_host, transfer_id, state, logging_prepend_str=None, session=None): """ Used by poller to update the internal state of transfer, after the response by the external transfertool. :param request_host: Name of the external host. :param transfer_id: External transfer job id as a string. :param state: Request state as a string. :param logging_prepend_str: String to prepend to the logging :param session: The database session to use. :returns commit_or_rollback: Boolean. """ prepend_str = '' if logging_prepend_str: prepend_str = logging_prepend_str try: if state == RequestState.LOST: reqs = request_core.get_requests_by_transfer(external_host, transfer_id, session=session) for req in reqs: logging.info(prepend_str + 'REQUEST %s OF TRANSFER %s ON %s STATE %s' % (str(req['request_id']), external_host, transfer_id, str(state))) src_rse_id = req.get('source_rse_id', None) dst_rse_id = req.get('dest_rse_id', None) src_rse = None dst_rse = None if src_rse_id: src_rse = get_rse_name(src_rse_id, session=session) if dst_rse_id: dst_rse = get_rse_name(dst_rse_id, session=session) response = {'new_state': state, 'transfer_id': transfer_id, 'job_state': state, 'src_url': None, 'dst_url': req['dest_url'], 'duration': 0, 'reason': "The FTS job lost", 'scope': req.get('scope', None), 'name': req.get('name', None), 'src_rse': src_rse, 'dst_rse': dst_rse, 'request_id': req.get('request_id', None), 'activity': req.get('activity', None), 'src_rse_id': req.get('source_rse_id', None), 'dst_rse_id': req.get('dest_rse_id', None), 'previous_attempt_id': req.get('previous_attempt_id', None), 'adler32': req.get('adler32', None), 'md5': req.get('md5', None), 'filesize': req.get('filesize', None), 'external_host': external_host, 'job_m_replica': None, 'created_at': req.get('created_at', None), 'submitted_at': req.get('submitted_at', None), 'details': None, 'account': req.get('account', None)} err_msg = request_core.get_transfer_error(response['new_state'], response['reason'] if 'reason' in response else None) request_core.set_request_state(req['request_id'], response['new_state'], transfer_id=transfer_id, src_rse_id=src_rse_id, err_msg=err_msg, session=session) request_core.add_monitor_message(req, response, session=session) else: __set_transfer_state(external_host, transfer_id, state, session=session) return True except UnsupportedOperation as error: logging.warning(prepend_str + "Transfer %s on %s doesn't exist - Error: %s" % (transfer_id, external_host, str(error).replace('\n', ''))) return False