def poll_transfers(external_host, xfers, prepend_str='', request_ids=None, timeout=None): """ Poll a list of transfers from an FTS server :param external_host: The FTS server to query from. :param xfrs: List of transfers to poll. :param process: Process number. :param thread: Thread number. :param timeout: Timeout. """ try: if TRANSFER_TOOL == 'mock': logging.debug(prepend_str + 'Setting %s transfer requests status to DONE per mock tool' % (len(xfers))) for task_id in xfers: ret = transfer_core.update_transfer_state(external_host=None, transfer_id=task_id, state=RequestState.DONE) record_counter('daemons.conveyor.poller.update_request_state.%s' % ret) return try: tss = time.time() logging.info(prepend_str + 'Polling %i transfers against %s with timeout %s' % (len(xfers), external_host, timeout)) resps = transfer_core.bulk_query_transfers(external_host, xfers, TRANSFER_TOOL, timeout) record_timer('daemons.conveyor.poller.bulk_query_transfers', (time.time() - tss) * 1000 / len(xfers)) except TransferToolTimeout as error: logging.error(prepend_str + str(error)) return except TransferToolWrongAnswer as error: logging.error(prepend_str + str(error)) logging.error(prepend_str + 'Problem querying %s on %s. All jobs are being checked individually' % (str(xfers), external_host)) for xfer in xfers: try: logging.debug(prepend_str + 'Checking %s on %s' % (xfer, external_host)) status = transfer_core.bulk_query_transfers(external_host, [xfer, ], TRANSFER_TOOL, timeout) if xfer in status and isinstance(status[xfer], Exception): logging.error(prepend_str + 'Problem querying %s on %s . Error returned : %s' % (xfer, external_host, str(status[xfer]))) except Exception as err: logging.error(prepend_str + 'Problem querying %s on %s . Error returned : %s' % (xfer, external_host, str(err))) break return except RequestException as error: logging.error(prepend_str + "Failed to contact FTS server: %s" % (str(error))) return except Exception: logging.error(prepend_str + "Failed to query FTS info: %s" % (traceback.format_exc())) return logging.debug(prepend_str + 'Polled %s transfer requests status in %s seconds' % (len(xfers), (time.time() - tss))) tss = time.time() logging.debug(prepend_str + 'Updating %s transfer requests status' % (len(xfers))) cnt = 0 if TRANSFER_TOOL == 'globus': for task_id in resps: ret = transfer_core.update_transfer_state(external_host=None, transfer_id=task_id, state=resps[task_id]) record_counter('daemons.conveyor.poller.update_request_state.%s' % ret) else: for transfer_id in resps: try: transf_resp = resps[transfer_id] # transf_resp is None: Lost. # is Exception: Failed to get fts job status. # is {}: No terminated jobs. # is {request_id: {file_status}}: terminated jobs. if transf_resp is None: transfer_core.update_transfer_state(external_host, transfer_id, RequestState.LOST, logging_prepend_str=prepend_str) record_counter('daemons.conveyor.poller.transfer_lost') elif isinstance(transf_resp, Exception): logging.warning(prepend_str + "Failed to poll FTS(%s) job (%s): %s" % (external_host, transfer_id, transf_resp)) record_counter('daemons.conveyor.poller.query_transfer_exception') else: for request_id in transf_resp: if request_id in request_ids: ret = request_core.update_request_state(transf_resp[request_id], logging_prepend_str=prepend_str) # if True, really update request content; if False, only touch request if ret: cnt += 1 record_counter('daemons.conveyor.poller.update_request_state.%s' % ret) # should touch transfers. # Otherwise if one bulk transfer includes many requests and one is not terminated, the transfer will be poll again. transfer_core.touch_transfer(external_host, transfer_id) except (DatabaseException, DatabaseError) as error: if re.match('.*ORA-00054.*', error.args[0]) or re.match('.*ORA-00060.*', error.args[0]) or 'ERROR 1205 (HY000)' in error.args[0]: logging.warn(prepend_str + "Lock detected when handling request %s - skipping" % request_id) else: logging.error(traceback.format_exc()) logging.debug(prepend_str + 'Finished updating %s transfer requests status (%i requests state changed) in %s seconds' % (len(xfers), cnt, (time.time() - tss))) except Exception: logging.error(traceback.format_exc())
def poll_transfers(external_host, xfers, prepend_str='', request_ids=None, timeout=None): """ Poll a list of transfers from an FTS server :param external_host: The FTS server to query from. :param xfrs: List of transfers to poll. :param process: Process number. :param thread: Thread number. :param timeout: Timeout. """ try: try: tss = time.time() logging.info(prepend_str + 'Polling %i transfers against %s with timeout %s' % (len(xfers), external_host, timeout)) resps = transfer_core.bulk_query_transfers(external_host, xfers, 'fts3', timeout) record_timer('daemons.conveyor.poller.bulk_query_transfers', (time.time() - tss) * 1000 / len(xfers)) except RequestException as error: logging.error(prepend_str + "Failed to contact FTS server: %s" % (str(error))) return except Exception: logging.error(prepend_str + "Failed to query FTS info: %s" % (traceback.format_exc())) return logging.debug(prepend_str + 'Polled %s transfer requests status in %s seconds' % (len(xfers), (time.time() - tss))) tss = time.time() logging.debug(prepend_str + 'Updating %s transfer requests status' % (len(xfers))) cnt = 0 for transfer_id in resps: try: transf_resp = resps[transfer_id] # transf_resp is None: Lost. # is Exception: Failed to get fts job status. # is {}: No terminated jobs. # is {request_id: {file_status}}: terminated jobs. if transf_resp is None: transfer_core.update_transfer_state( external_host, transfer_id, RequestState.LOST, logging_prepend_str=prepend_str) record_counter('daemons.conveyor.poller.transfer_lost') elif isinstance(transf_resp, Exception): logging.warning(prepend_str + "Failed to poll FTS(%s) job (%s): %s" % (external_host, transfer_id, transf_resp)) record_counter( 'daemons.conveyor.poller.query_transfer_exception') else: for request_id in transf_resp: if request_id in request_ids: ret = request_core.update_request_state( transf_resp[request_id], logging_prepend_str=prepend_str) # if True, really update request content; if False, only touch request if ret: cnt += 1 record_counter( 'daemons.conveyor.poller.update_request_state.%s' % ret) # should touch transfers. # Otherwise if one bulk transfer includes many requests and one is not terminated, the transfer will be poll again. transfer_core.touch_transfer(external_host, transfer_id) except (DatabaseException, DatabaseError) as error: if isinstance(error.args[0], tuple) and ( match('.*ORA-00054.*', error.args[0][0]) or match('.*ORA-00060.*', error.args[0][0]) or ('ERROR 1205 (HY000)' in error.args[0][0])): logging.warn( prepend_str + "Lock detected when handling request %s - skipping" % request_id) else: logging.error(traceback.format_exc()) logging.debug( prepend_str + 'Finished updating %s transfer requests status (%i requests state changed) in %s seconds' % (len(xfers), cnt, (time.time() - tss))) except Exception: logging.error(traceback.format_exc())