コード例 #1
0
def poll_transfers(external_host, xfers, prepend_str='', request_ids=None, timeout=None):
    """
    Poll a list of transfers from an FTS server

    :param external_host:    The FTS server to query from.
    :param xfrs:             List of transfers to poll.
    :param process:          Process number.
    :param thread:           Thread number.
    :param timeout:          Timeout.
    """
    try:
        if TRANSFER_TOOL == 'mock':
            logging.debug(prepend_str + 'Setting %s transfer requests status to DONE per mock tool' % (len(xfers)))
            for task_id in xfers:
                ret = transfer_core.update_transfer_state(external_host=None, transfer_id=task_id, state=RequestState.DONE)
                record_counter('daemons.conveyor.poller.update_request_state.%s' % ret)
            return
        try:
            tss = time.time()
            logging.info(prepend_str + 'Polling %i transfers against %s with timeout %s' % (len(xfers), external_host, timeout))
            resps = transfer_core.bulk_query_transfers(external_host, xfers, TRANSFER_TOOL, timeout)
            record_timer('daemons.conveyor.poller.bulk_query_transfers', (time.time() - tss) * 1000 / len(xfers))
        except TransferToolTimeout as error:
            logging.error(prepend_str + str(error))
            return
        except TransferToolWrongAnswer as error:
            logging.error(prepend_str + str(error))
            logging.error(prepend_str + 'Problem querying %s on %s. All jobs are being checked individually' % (str(xfers), external_host))
            for xfer in xfers:
                try:
                    logging.debug(prepend_str + 'Checking %s on %s' % (xfer, external_host))
                    status = transfer_core.bulk_query_transfers(external_host, [xfer, ], TRANSFER_TOOL, timeout)
                    if xfer in status and isinstance(status[xfer], Exception):
                        logging.error(prepend_str + 'Problem querying %s on %s . Error returned : %s' % (xfer, external_host, str(status[xfer])))
                except Exception as err:
                    logging.error(prepend_str + 'Problem querying %s on %s . Error returned : %s' % (xfer, external_host, str(err)))
                    break
            return
        except RequestException as error:
            logging.error(prepend_str + "Failed to contact FTS server: %s" % (str(error)))
            return
        except Exception:
            logging.error(prepend_str + "Failed to query FTS info: %s" % (traceback.format_exc()))
            return

        logging.debug(prepend_str + 'Polled %s transfer requests status in %s seconds' % (len(xfers), (time.time() - tss)))
        tss = time.time()
        logging.debug(prepend_str + 'Updating %s transfer requests status' % (len(xfers)))
        cnt = 0

        if TRANSFER_TOOL == 'globus':
            for task_id in resps:
                ret = transfer_core.update_transfer_state(external_host=None, transfer_id=task_id, state=resps[task_id])
                record_counter('daemons.conveyor.poller.update_request_state.%s' % ret)
        else:
            for transfer_id in resps:
                try:
                    transf_resp = resps[transfer_id]
                    # transf_resp is None: Lost.
                    #             is Exception: Failed to get fts job status.
                    #             is {}: No terminated jobs.
                    #             is {request_id: {file_status}}: terminated jobs.
                    if transf_resp is None:
                        transfer_core.update_transfer_state(external_host, transfer_id, RequestState.LOST, logging_prepend_str=prepend_str)
                        record_counter('daemons.conveyor.poller.transfer_lost')
                    elif isinstance(transf_resp, Exception):
                        logging.warning(prepend_str + "Failed to poll FTS(%s) job (%s): %s" % (external_host, transfer_id, transf_resp))
                        record_counter('daemons.conveyor.poller.query_transfer_exception')
                    else:
                        for request_id in transf_resp:
                            if request_id in request_ids:
                                ret = request_core.update_request_state(transf_resp[request_id], logging_prepend_str=prepend_str)
                                # if True, really update request content; if False, only touch request
                                if ret:
                                    cnt += 1
                                record_counter('daemons.conveyor.poller.update_request_state.%s' % ret)

                    # should touch transfers.
                    # Otherwise if one bulk transfer includes many requests and one is not terminated, the transfer will be poll again.
                    transfer_core.touch_transfer(external_host, transfer_id)
                except (DatabaseException, DatabaseError) as error:
                    if re.match('.*ORA-00054.*', error.args[0]) or re.match('.*ORA-00060.*', error.args[0]) or 'ERROR 1205 (HY000)' in error.args[0]:
                        logging.warn(prepend_str + "Lock detected when handling request %s - skipping" % request_id)
                    else:
                        logging.error(traceback.format_exc())
            logging.debug(prepend_str + 'Finished updating %s transfer requests status (%i requests state changed) in %s seconds' % (len(xfers), cnt, (time.time() - tss)))
    except Exception:
        logging.error(traceback.format_exc())
コード例 #2
0
def poll_transfers(external_host,
                   xfers,
                   prepend_str='',
                   request_ids=None,
                   timeout=None):
    """
    Poll a list of transfers from an FTS server

    :param external_host:    The FTS server to query from.
    :param xfrs:             List of transfers to poll.
    :param process:          Process number.
    :param thread:           Thread number.
    :param timeout:          Timeout.
    """
    try:
        try:
            tss = time.time()
            logging.info(prepend_str +
                         'Polling %i transfers against %s with timeout %s' %
                         (len(xfers), external_host, timeout))
            resps = transfer_core.bulk_query_transfers(external_host, xfers,
                                                       'fts3', timeout)
            record_timer('daemons.conveyor.poller.bulk_query_transfers',
                         (time.time() - tss) * 1000 / len(xfers))
        except RequestException as error:
            logging.error(prepend_str + "Failed to contact FTS server: %s" %
                          (str(error)))
            return
        except Exception:
            logging.error(prepend_str + "Failed to query FTS info: %s" %
                          (traceback.format_exc()))
            return

        logging.debug(prepend_str +
                      'Polled %s transfer requests status in %s seconds' %
                      (len(xfers), (time.time() - tss)))
        tss = time.time()
        logging.debug(prepend_str + 'Updating %s transfer requests status' %
                      (len(xfers)))
        cnt = 0
        for transfer_id in resps:
            try:
                transf_resp = resps[transfer_id]
                # transf_resp is None: Lost.
                #             is Exception: Failed to get fts job status.
                #             is {}: No terminated jobs.
                #             is {request_id: {file_status}}: terminated jobs.
                if transf_resp is None:
                    transfer_core.update_transfer_state(
                        external_host,
                        transfer_id,
                        RequestState.LOST,
                        logging_prepend_str=prepend_str)
                    record_counter('daemons.conveyor.poller.transfer_lost')
                elif isinstance(transf_resp, Exception):
                    logging.warning(prepend_str +
                                    "Failed to poll FTS(%s) job (%s): %s" %
                                    (external_host, transfer_id, transf_resp))
                    record_counter(
                        'daemons.conveyor.poller.query_transfer_exception')
                else:
                    for request_id in transf_resp:
                        if request_id in request_ids:
                            ret = request_core.update_request_state(
                                transf_resp[request_id],
                                logging_prepend_str=prepend_str)
                            # if True, really update request content; if False, only touch request
                            if ret:
                                cnt += 1
                            record_counter(
                                'daemons.conveyor.poller.update_request_state.%s'
                                % ret)

                # should touch transfers.
                # Otherwise if one bulk transfer includes many requests and one is not terminated, the transfer will be poll again.
                transfer_core.touch_transfer(external_host, transfer_id)
            except (DatabaseException, DatabaseError) as error:
                if isinstance(error.args[0], tuple) and (
                        match('.*ORA-00054.*', error.args[0][0])
                        or match('.*ORA-00060.*', error.args[0][0]) or
                    ('ERROR 1205 (HY000)' in error.args[0][0])):
                    logging.warn(
                        prepend_str +
                        "Lock detected when handling request %s - skipping" %
                        request_id)
                else:
                    logging.error(traceback.format_exc())
        logging.debug(
            prepend_str +
            'Finished updating %s transfer requests status (%i requests state changed) in %s seconds'
            % (len(xfers), cnt, (time.time() - tss)))
    except Exception:
        logging.error(traceback.format_exc())