예제 #1
0
def _poll_transfers(external_host, transfers_by_eid, vo, timeout, logger):
    """
    Helper function for poll_transfers which performs the actual polling and database update.
    """
    is_bulk = len(transfers_by_eid) > 1
    try:
        tss = time.time()
        logger(
            logging.INFO, 'Polling %i transfers against %s with timeout %s' %
            (len(transfers_by_eid), external_host, timeout))
        resps = transfer_core.bulk_query_transfers(external_host,
                                                   transfers_by_eid,
                                                   TRANSFER_TOOL, vo, timeout,
                                                   logger)
        duration = time.time() - tss
        record_timer('daemons.conveyor.poller.bulk_query_transfers',
                     duration * 1000 / len(transfers_by_eid))
        logger(
            logging.DEBUG, 'Polled %s transfer requests status in %s seconds' %
            (len(transfers_by_eid), duration))
    except TransferToolTimeout as error:
        logger(logging.ERROR, str(error))
        return
    except TransferToolWrongAnswer as error:
        logger(logging.ERROR, str(error))
        if is_bulk:
            raise  # The calling context will retry transfers one-by-one
        else:
            return
    except RequestException as error:
        logger(logging.ERROR,
               "Failed to contact FTS server: %s" % (str(error)))
        return
    except Exception:
        logger(logging.ERROR, "Failed to query FTS info", exc_info=True)
        return

    tss = time.time()
    logger(logging.DEBUG,
           'Updating %s transfer requests status' % (len(transfers_by_eid)))
    cnt = 0

    request_ids = {
        t['request_id']
        for t in itertools.chain.from_iterable(transfers_by_eid.values())
    }
    for transfer_id in resps:
        try:
            transf_resp = resps[transfer_id]
            # transf_resp is None: Lost.
            #             is Exception: Failed to get fts job status.
            #             is {}: No terminated jobs.
            #             is {request_id: {file_status}}: terminated jobs.
            if transf_resp is None:
                for transfer in transfers_by_eid[transfer_id]:
                    resp = transfer_core.fake_transfertool_response(
                        transfer,
                        new_state=RequestState.LOST,
                        reason="The FTS job lost")
                    request_core.update_request_state(resp, logger=logger)
                record_counter('daemons.conveyor.poller.transfer_lost')
            elif isinstance(transf_resp, Exception):
                logger(
                    logging.WARNING, "Failed to poll FTS(%s) job (%s): %s" %
                    (external_host, transfer_id, transf_resp))
                record_counter(
                    'daemons.conveyor.poller.query_transfer_exception')
            else:
                for request_id in request_ids.intersection(transf_resp):
                    ret = request_core.update_request_state(
                        transf_resp[request_id], logger=logger)
                    # if True, really update request content; if False, only touch request
                    if ret:
                        cnt += 1
                    record_counter(
                        'daemons.conveyor.poller.update_request_state.{updated}',
                        labels={'updated': ret})

            # should touch transfers.
            # Otherwise if one bulk transfer includes many requests and one is not terminated, the transfer will be poll again.
            transfer_core.touch_transfer(external_host, transfer_id)
        except (DatabaseException, DatabaseError) as error:
            if re.match('.*ORA-00054.*', error.args[0]) or re.match(
                    '.*ORA-00060.*',
                    error.args[0]) or 'ERROR 1205 (HY000)' in error.args[0]:
                logger(
                    logging.WARNING,
                    "Lock detected when handling request %s - skipping" %
                    transfer_id)
            else:
                logger(logging.ERROR, 'Exception', exc_info=True)
    logger(
        logging.DEBUG,
        'Finished updating %s transfer requests status (%i requests state changed) in %s seconds'
        % (len(transfers_by_eid), cnt, (time.time() - tss)))
예제 #2
0
def poll_transfers(external_host, xfers, prepend_str='', request_ids=None, timeout=None):
    """
    Poll a list of transfers from an FTS server

    :param external_host:    The FTS server to query from.
    :param xfrs:             List of transfers to poll.
    :param process:          Process number.
    :param thread:           Thread number.
    :param timeout:          Timeout.
    """
    try:
        if TRANSFER_TOOL == 'mock':
            logging.debug(prepend_str + 'Setting %s transfer requests status to DONE per mock tool' % (len(xfers)))
            for task_id in xfers:
                ret = transfer_core.update_transfer_state(external_host=None, transfer_id=task_id, state=RequestState.DONE)
                record_counter('daemons.conveyor.poller.update_request_state.%s' % ret)
            return
        try:
            tss = time.time()
            logging.info(prepend_str + 'Polling %i transfers against %s with timeout %s' % (len(xfers), external_host, timeout))
            resps = transfer_core.bulk_query_transfers(external_host, xfers, TRANSFER_TOOL, timeout)
            record_timer('daemons.conveyor.poller.bulk_query_transfers', (time.time() - tss) * 1000 / len(xfers))
        except TransferToolTimeout as error:
            logging.error(prepend_str + str(error))
            return
        except TransferToolWrongAnswer as error:
            logging.error(prepend_str + str(error))
            logging.error(prepend_str + 'Problem querying %s on %s. All jobs are being checked individually' % (str(xfers), external_host))
            for xfer in xfers:
                try:
                    logging.debug(prepend_str + 'Checking %s on %s' % (xfer, external_host))
                    status = transfer_core.bulk_query_transfers(external_host, [xfer, ], TRANSFER_TOOL, timeout)
                    if xfer in status and isinstance(status[xfer], Exception):
                        logging.error(prepend_str + 'Problem querying %s on %s . Error returned : %s' % (xfer, external_host, str(status[xfer])))
                except Exception as err:
                    logging.error(prepend_str + 'Problem querying %s on %s . Error returned : %s' % (xfer, external_host, str(err)))
                    break
            return
        except RequestException as error:
            logging.error(prepend_str + "Failed to contact FTS server: %s" % (str(error)))
            return
        except Exception:
            logging.error(prepend_str + "Failed to query FTS info: %s" % (traceback.format_exc()))
            return

        logging.debug(prepend_str + 'Polled %s transfer requests status in %s seconds' % (len(xfers), (time.time() - tss)))
        tss = time.time()
        logging.debug(prepend_str + 'Updating %s transfer requests status' % (len(xfers)))
        cnt = 0

        if TRANSFER_TOOL == 'globus':
            for task_id in resps:
                ret = transfer_core.update_transfer_state(external_host=None, transfer_id=task_id, state=resps[task_id])
                record_counter('daemons.conveyor.poller.update_request_state.%s' % ret)
        else:
            for transfer_id in resps:
                try:
                    transf_resp = resps[transfer_id]
                    # transf_resp is None: Lost.
                    #             is Exception: Failed to get fts job status.
                    #             is {}: No terminated jobs.
                    #             is {request_id: {file_status}}: terminated jobs.
                    if transf_resp is None:
                        transfer_core.update_transfer_state(external_host, transfer_id, RequestState.LOST, logging_prepend_str=prepend_str)
                        record_counter('daemons.conveyor.poller.transfer_lost')
                    elif isinstance(transf_resp, Exception):
                        logging.warning(prepend_str + "Failed to poll FTS(%s) job (%s): %s" % (external_host, transfer_id, transf_resp))
                        record_counter('daemons.conveyor.poller.query_transfer_exception')
                    else:
                        for request_id in transf_resp:
                            if request_id in request_ids:
                                ret = request_core.update_request_state(transf_resp[request_id], logging_prepend_str=prepend_str)
                                # if True, really update request content; if False, only touch request
                                if ret:
                                    cnt += 1
                                record_counter('daemons.conveyor.poller.update_request_state.%s' % ret)

                    # should touch transfers.
                    # Otherwise if one bulk transfer includes many requests and one is not terminated, the transfer will be poll again.
                    transfer_core.touch_transfer(external_host, transfer_id)
                except (DatabaseException, DatabaseError) as error:
                    if re.match('.*ORA-00054.*', error.args[0]) or re.match('.*ORA-00060.*', error.args[0]) or 'ERROR 1205 (HY000)' in error.args[0]:
                        logging.warn(prepend_str + "Lock detected when handling request %s - skipping" % request_id)
                    else:
                        logging.error(traceback.format_exc())
            logging.debug(prepend_str + 'Finished updating %s transfer requests status (%i requests state changed) in %s seconds' % (len(xfers), cnt, (time.time() - tss)))
    except Exception:
        logging.error(traceback.format_exc())
예제 #3
0
def poll_transfers(external_host,
                   xfers,
                   prepend_str='',
                   request_ids=None,
                   timeout=None):
    """
    Poll a list of transfers from an FTS server

    :param external_host:    The FTS server to query from.
    :param xfrs:             List of transfers to poll.
    :param process:          Process number.
    :param thread:           Thread number.
    :param timeout:          Timeout.
    """
    try:
        try:
            tss = time.time()
            logging.info(prepend_str +
                         'Polling %i transfers against %s with timeout %s' %
                         (len(xfers), external_host, timeout))
            resps = transfer_core.bulk_query_transfers(external_host, xfers,
                                                       'fts3', timeout)
            record_timer('daemons.conveyor.poller.bulk_query_transfers',
                         (time.time() - tss) * 1000 / len(xfers))
        except RequestException as error:
            logging.error(prepend_str + "Failed to contact FTS server: %s" %
                          (str(error)))
            return
        except Exception:
            logging.error(prepend_str + "Failed to query FTS info: %s" %
                          (traceback.format_exc()))
            return

        logging.debug(prepend_str +
                      'Polled %s transfer requests status in %s seconds' %
                      (len(xfers), (time.time() - tss)))
        tss = time.time()
        logging.debug(prepend_str + 'Updating %s transfer requests status' %
                      (len(xfers)))
        cnt = 0
        for transfer_id in resps:
            try:
                transf_resp = resps[transfer_id]
                # transf_resp is None: Lost.
                #             is Exception: Failed to get fts job status.
                #             is {}: No terminated jobs.
                #             is {request_id: {file_status}}: terminated jobs.
                if transf_resp is None:
                    transfer_core.update_transfer_state(
                        external_host,
                        transfer_id,
                        RequestState.LOST,
                        logging_prepend_str=prepend_str)
                    record_counter('daemons.conveyor.poller.transfer_lost')
                elif isinstance(transf_resp, Exception):
                    logging.warning(prepend_str +
                                    "Failed to poll FTS(%s) job (%s): %s" %
                                    (external_host, transfer_id, transf_resp))
                    record_counter(
                        'daemons.conveyor.poller.query_transfer_exception')
                else:
                    for request_id in transf_resp:
                        if request_id in request_ids:
                            ret = request_core.update_request_state(
                                transf_resp[request_id],
                                logging_prepend_str=prepend_str)
                            # if True, really update request content; if False, only touch request
                            if ret:
                                cnt += 1
                            record_counter(
                                'daemons.conveyor.poller.update_request_state.%s'
                                % ret)

                # should touch transfers.
                # Otherwise if one bulk transfer includes many requests and one is not terminated, the transfer will be poll again.
                transfer_core.touch_transfer(external_host, transfer_id)
            except (DatabaseException, DatabaseError) as error:
                if isinstance(error.args[0], tuple) and (
                        match('.*ORA-00054.*', error.args[0][0])
                        or match('.*ORA-00060.*', error.args[0][0]) or
                    ('ERROR 1205 (HY000)' in error.args[0][0])):
                    logging.warn(
                        prepend_str +
                        "Lock detected when handling request %s - skipping" %
                        request_id)
                else:
                    logging.error(traceback.format_exc())
        logging.debug(
            prepend_str +
            'Finished updating %s transfer requests status (%i requests state changed) in %s seconds'
            % (len(xfers), cnt, (time.time() - tss)))
    except Exception:
        logging.error(traceback.format_exc())