def handle_submitting_requests(older_than=1800, process=None, total_processes=None, thread=None, total_threads=None, session=None): """ used by finisher to handle submitting requests :param older_than: Only select requests older than this DateTime. :param process: Identifier of the caller process as an integer. :param total_processes: Maximum number of processes as an integer. :param thread: Identifier of the caller thread as an integer. :param total_threads: Maximum number of threads as an integer. :param session: The database session to use. """ reqs = request_core.get_next(request_type=[ RequestType.TRANSFER, RequestType.STAGEIN, RequestType.STAGEOUT ], state=RequestState.SUBMITTING, older_than=datetime.datetime.utcnow() - datetime.timedelta(seconds=older_than), process=process, total_processes=total_processes, thread=thread, total_threads=total_threads, session=session) for req in reqs: logging.info("Requeue SUBMITTING request %s" % (req['request_id'])) request_core.requeue_and_archive(req['request_id'], session=session)
def __handle_requests(reqs, suspicious_patterns, retry_protocol_mismatches, logger=logging.log): """ Used by finisher to handle terminated requests, :param reqs: List of requests. :param suspicious_patterns: List of suspicious patterns. :param retry_protocol_mismatches: Boolean to retry the transfer in case of protocol mismatch. :param prepend_str: String to prepend to logging. """ failed_during_submission = [ RequestState.SUBMITTING, RequestState.SUBMISSION_FAILED, RequestState.LOST ] failed_no_submission_attempts = [ RequestState.NO_SOURCES, RequestState.ONLY_TAPE_SOURCES, RequestState.MISMATCH_SCHEME ] undeterministic_rses = __get_undeterministic_rses(logger=logger) rses_info, protocols = {}, {} replicas = {} for req in reqs: try: replica = { 'scope': req['scope'], 'name': req['name'], 'rse_id': req['dest_rse_id'], 'bytes': req['bytes'], 'adler32': req['adler32'], 'request_id': req['request_id'] } replica['pfn'] = req['dest_url'] replica['request_type'] = req['request_type'] replica['error_message'] = None if req['request_type'] not in replicas: replicas[req['request_type']] = {} if req['rule_id'] not in replicas[req['request_type']]: replicas[req['request_type']][req['rule_id']] = [] if req['state'] == RequestState.DONE: replica['state'] = ReplicaState.AVAILABLE replica['archived'] = False # for TAPE, replica path is needed if req['request_type'] in ( RequestType.TRANSFER, RequestType.STAGEIN ) and req['dest_rse_id'] in undeterministic_rses: if req['dest_rse_id'] not in rses_info: rses_info[ req['dest_rse_id']] = rsemanager.get_rse_info( rse_id=req['dest_rse_id']) pfn = req['dest_url'] scheme = urlparse(pfn).scheme dest_rse_id_scheme = '%s_%s' % (req['dest_rse_id'], scheme) if dest_rse_id_scheme not in protocols: protocols[ dest_rse_id_scheme] = rsemanager.create_protocol( rses_info[req['dest_rse_id']], 'write', scheme) path = protocols[dest_rse_id_scheme].parse_pfns( [pfn])[pfn]['path'] replica['path'] = os.path.join(path, os.path.basename(pfn)) # replica should not be added to replicas until all info are filled replicas[req['request_type']][req['rule_id']].append(replica) # Standard failure from the transfer tool elif req['state'] == RequestState.FAILED: __check_suspicious_files(req, suspicious_patterns, logger=logger) tss = time.time() try: if request_core.should_retry_request( req, retry_protocol_mismatches): new_req = request_core.requeue_and_archive( req, source_ranking_update=True, retry_protocol_mismatches=retry_protocol_mismatches, logger=logger) # should_retry_request and requeue_and_archive are not in one session, # another process can requeue_and_archive and this one will return None. record_timer( 'daemons.conveyor.common.update_request_state.request_requeue_and_archive', (time.time() - tss) * 1000) logger( logging.WARNING, 'REQUEUED DID %s:%s REQUEST %s AS %s TRY %s' % (req['scope'], req['name'], req['request_id'], new_req['request_id'], new_req['retry_count'])) else: # No new_req is return if should_retry_request returns False logger( logging.WARNING, 'EXCEEDED SUBMITTING DID %s:%s REQUEST %s in state %s', req['scope'], req['name'], req['request_id'], req['state']) replica['state'] = ReplicaState.UNAVAILABLE replica['archived'] = False replica['error_message'] = req['err_msg'] if req[ 'err_msg'] else request_core.get_transfer_error( req['state']) replicas[req['request_type']][req['rule_id']].append( replica) except RequestNotFound: logger(logging.WARNING, 'Cannot find request %s anymore', req['request_id']) # All other failures elif req['state'] in failed_during_submission or req[ 'state'] in failed_no_submission_attempts: if req['state'] in failed_during_submission and req[ 'updated_at'] > (datetime.datetime.utcnow() - datetime.timedelta(minutes=120)): # To prevent race conditions continue try: tss = time.time() if request_core.should_retry_request( req, retry_protocol_mismatches): new_req = request_core.requeue_and_archive( req, source_ranking_update=False, retry_protocol_mismatches=retry_protocol_mismatches, logger=logger) record_timer( 'daemons.conveyor.common.update_request_state.request_requeue_and_archive', (time.time() - tss) * 1000) logger( logging.WARNING, 'REQUEUED SUBMITTING DID %s:%s REQUEST %s AS %s TRY %s' % (req['scope'], req['name'], req['request_id'], new_req['request_id'], new_req['retry_count'])) else: # No new_req is return if should_retry_request returns False logger( logging.WARNING, 'EXCEEDED SUBMITTING DID %s:%s REQUEST %s in state %s', req['scope'], req['name'], req['request_id'], req['state']) replica['state'] = ReplicaState.UNAVAILABLE replica['archived'] = False replica['error_message'] = req['err_msg'] if req[ 'err_msg'] else request_core.get_transfer_error( req['state']) replicas[req['request_type']][req['rule_id']].append( replica) except RequestNotFound: logger(logging.WARNING, 'Cannot find request %s anymore', req['request_id']) except Exception as error: logger( logging.ERROR, "Something unexpected happened when handling request %s(%s:%s) at %s: %s" % (req['request_id'], req['scope'], req['name'], req['dest_rse_id'], str(error))) __handle_terminated_replicas(replicas, logger=logger)
def handle_requests(reqs, suspicious_patterns): """ used by finisher to handle terminated requests, :param reqs: List of requests. """ undeterministic_rses = get_undeterministic_rses() rses_info, protocols = {}, {} replicas = {} for req in reqs: try: replica = {'scope': req['scope'], 'name': req['name'], 'rse_id': req['dest_rse_id'], 'bytes': req['bytes'], 'adler32': req['adler32'], 'request_id': req['request_id']} replica['pfn'] = req['dest_url'] replica['request_type'] = req['request_type'] replica['error_message'] = None if req['request_type'] not in replicas: replicas[req['request_type']] = {} if req['rule_id'] not in replicas[req['request_type']]: replicas[req['request_type']][req['rule_id']] = [] if req['state'] == RequestState.DONE: replica['state'] = ReplicaState.AVAILABLE replica['archived'] = False # for TAPE, replica path is needed if req['request_type'] == RequestType.TRANSFER and req['dest_rse_id'] in undeterministic_rses: if req['dest_rse_id'] not in rses_info: dest_rse = rse_core.get_rse_name(rse_id=req['dest_rse_id']) rses_info[req['dest_rse_id']] = rsemanager.get_rse_info(dest_rse) pfn = req['dest_url'] scheme = urlparse(pfn).scheme dest_rse_id_scheme = '%s_%s' % (req['dest_rse_id'], scheme) if dest_rse_id_scheme not in protocols: protocols[dest_rse_id_scheme] = rsemanager.create_protocol(rses_info[req['dest_rse_id']], 'write', scheme) path = protocols[dest_rse_id_scheme].parse_pfns([pfn])[pfn]['path'] replica['path'] = os.path.join(path, os.path.basename(pfn)) # replica should not be added to replicas until all info are filled replicas[req['request_type']][req['rule_id']].append(replica) elif req['state'] == RequestState.FAILED: check_suspicious_files(req, suspicious_patterns) if request_core.should_retry_request(req): tss = time.time() new_req = request_core.requeue_and_archive(req['request_id']) record_timer('daemons.conveyor.common.update_request_state.request-requeue_and_archive', (time.time() - tss) * 1000) if new_req: # should_retry_request and requeue_and_archive are not in one session, # another process can requeue_and_archive and this one will return None. logging.warn('REQUEUED DID %s:%s REQUEST %s AS %s TRY %s' % (req['scope'], req['name'], req['request_id'], new_req['request_id'], new_req['retry_count'])) else: logging.warn('EXCEEDED DID %s:%s REQUEST %s' % (req['scope'], req['name'], req['request_id'])) replica['state'] = ReplicaState.UNAVAILABLE replica['archived'] = False replica['error_message'] = req['err_msg'] if req['err_msg'] else get_transfer_error(req['state']) replicas[req['request_type']][req['rule_id']].append(replica) elif req['state'] == RequestState.SUBMITTING or req['state'] == RequestState.SUBMISSION_FAILED or req['state'] == RequestState.LOST: if req['updated_at'] > (datetime.datetime.utcnow() - datetime.timedelta(minutes=120)): continue if request_core.should_retry_request(req): tss = time.time() new_req = request_core.requeue_and_archive(req['request_id']) record_timer('daemons.conveyor.common.update_request_state.request-requeue_and_archive', (time.time() - tss) * 1000) if new_req: logging.warn('REQUEUED SUBMITTING DID %s:%s REQUEST %s AS %s TRY %s' % (req['scope'], req['name'], req['request_id'], new_req['request_id'], new_req['retry_count'])) else: logging.warn('EXCEEDED SUBMITTING DID %s:%s REQUEST %s' % (req['scope'], req['name'], req['request_id'])) replica['state'] = ReplicaState.UNAVAILABLE replica['archived'] = False replica['error_message'] = req['err_msg'] if req['err_msg'] else get_transfer_error(req['state']) replicas[req['request_type']][req['rule_id']].append(replica) elif req['state'] == RequestState.NO_SOURCES or req['state'] == RequestState.ONLY_TAPE_SOURCES or req['state'] == RequestState.MISMATCH_SCHEME: if request_core.should_retry_request(req): tss = time.time() new_req = request_core.requeue_and_archive(req['request_id']) record_timer('daemons.conveyor.common.update_request_state.request-requeue_and_archive', (time.time() - tss) * 1000) if new_req: # should_retry_request and requeue_and_archive are not in one session, # another process can requeue_and_archive and this one will return None. logging.warn('REQUEUED DID %s:%s REQUEST %s AS %s TRY %s' % (req['scope'], req['name'], req['request_id'], new_req['request_id'], new_req['retry_count'])) else: logging.warn('EXCEEDED DID %s:%s REQUEST %s' % (req['scope'], req['name'], req['request_id'])) replica['state'] = ReplicaState.UNAVAILABLE # should be broken here replica['archived'] = False replica['error_message'] = req['err_msg'] if req['err_msg'] else get_transfer_error(req['state']) replicas[req['request_type']][req['rule_id']].append(replica) except: logging.error("Something unexpected happened when handling request %s(%s:%s) at %s: %s" % (req['request_id'], req['scope'], req['name'], req['dest_rse_id'], traceback.format_exc())) handle_terminated_replicas(replicas)
rse_name, traceback.format_exc())) raise elif response['new_state'] == RequestState.FAILED: rse_name = response['dst_rse'] rse_update_name = rse_name req = request.get_request(response['request_id'], session=session) if req['request_type'] == RequestType.STAGEIN: rse_update_name = rse_core.list_rse_attributes(response['dst_rse'], session=session)['staging_buffer'] logging.debug('OVERRIDE REPLICA DID %s:%s RSE %s TO %s' % (response['scope'], response['name'], response['dst_rse'], rse_update_name)) tss = time.time() new_req = request.requeue_and_archive(response['request_id'], session=session) record_timer('daemons.conveyor.common.update_request_state.request-requeue_and_archive', (time.time()-tss)*1000) add_monitor_message(response, session=session) tss = time.time() if new_req is None: logging.warn('EXCEEDED DID %s:%s REQUEST %s' % (response['scope'], response['name'], response['request_id'])) try: replica.update_replicas_states([{'rse': rse_update_name, 'scope': response['scope'], 'name': response['name'], 'state': ReplicaState.UNAVAILABLE}], session=session)