Ejemplo n.º 1
0
def control(queues, traces, args):

    targets = {
        'copytool_in': copytool_in,
        'copytool_out': copytool_out,
        'queue_monitoring': queue_monitoring
    }
    threads = [
        ExcThread(bucket=queue.Queue(),
                  target=target,
                  kwargs={
                      'queues': queues,
                      'traces': traces,
                      'args': args
                  },
                  name=name) for name, target in targets.items()
    ]

    [thread.start() for thread in threads]

    # if an exception is thrown, the graceful_stop will be set by the ExcThread class run() function
    while not args.graceful_stop.is_set():
        for thread in threads:
            bucket = thread.get_bucket()
            try:
                exc = bucket.get(block=False)
            except queue.Empty:
                pass
            else:
                exc_type, exc_obj, exc_trace = exc
                logger.warning(
                    "thread \'%s\' received an exception from bucket: %s" %
                    (thread.name, exc_obj))

                # deal with the exception
                # ..

            thread.join(0.1)
            time.sleep(0.1)

        time.sleep(0.5)

    logger.debug('data control ending since graceful_stop has been set')
    if args.abort_job.is_set():
        if traces.pilot['command'] == 'aborting':
            logger.warning('jobs are aborting')
        elif traces.pilot['command'] == 'abort':
            logger.warning(
                'data control detected a set abort_job (due to a kill signal)')
            traces.pilot['command'] = 'aborting'

            # find all running jobs and stop them, find all jobs in queues relevant to this module
            #abort_jobs_in_queues(queues, args.signal)

    logger.debug('[data] control thread has finished')
Ejemplo n.º 2
0
def run(args):
    """
    Main execution function for the interceptor communication layer.

    :param args: pilot arguments.
    :returns:
    """

    # t = threading.current_thread()
    # logger.debug('job.control is run by thread: %s' % t.name)

    targets = {'receive': receive, 'send': send}
    threads = [
        ExcThread(bucket=queue.Queue(),
                  target=target,
                  kwargs={'args': args},
                  name=name) for name, target in list(targets.items())
    ]  # Python 2/3

    [thread.start() for thread in threads]

    # if an exception is thrown, the graceful_stop will be set by the ExcThread class run() function
    while not args.graceful_stop.is_set():
        for thread in threads:
            bucket = thread.get_bucket()
            try:
                exc = bucket.get(block=False)
            except queue.Empty:
                pass
            else:
                exc_type, exc_obj, exc_trace = exc
                logger.warning(
                    "thread \'%s\' received an exception from bucket: %s" %
                    (thread.name, exc_obj))

                # deal with the exception
                # ..

            thread.join(0.1)
            time.sleep(0.1)

        time.sleep(0.5)

    # proceed to set the job_aborted flag?
    if threads_aborted():
        logger.debug('will proceed to set job_aborted')
        args.job_aborted.set()
    else:
        logger.debug('will not set job_aborted yet')

    logger.debug('[interceptor] run thread has finished')
Ejemplo n.º 3
0
def control(queues, traces, args):
    """
    (add description)

    :param queues:
    :param traces:
    :param args:
    :return:
    """

    targets = {
        'validate_pre': validate_pre,
        'execute_payloads': execute_payloads,
        'validate_post': validate_post,
        'failed_post': failed_post
    }
    threads = [
        ExcThread(bucket=queue.Queue(),
                  target=target,
                  kwargs={
                      'queues': queues,
                      'traces': traces,
                      'args': args
                  },
                  name=name) for name, target in list(targets.items())
    ]  # Python 3

    [thread.start() for thread in threads]

    # if an exception is thrown, the graceful_stop will be set by the ExcThread class run() function
    while not args.graceful_stop.is_set():
        for thread in threads:
            bucket = thread.get_bucket()
            try:
                exc = bucket.get(block=False)
            except queue.Empty:
                pass
            else:
                exc_type, exc_obj, exc_trace = exc
                logger.warning(
                    "thread \'%s\' received an exception from bucket: %s" %
                    (thread.name, exc_obj))

                # deal with the exception
                # ..

            thread.join(0.1)
            time.sleep(0.1)

        time.sleep(0.5)

    logger.debug('payload control ending since graceful_stop has been set')
    if args.abort_job.is_set():
        if traces.pilot['command'] == 'aborting':
            logger.warning('jobs are aborting')
        elif traces.pilot['command'] == 'abort':
            logger.warning(
                'data control detected a set abort_job (due to a kill signal)')
            traces.pilot['command'] = 'aborting'

            # find all running jobs and stop them, find all jobs in queues relevant to this module
            #abort_jobs_in_queues(queues, args.signal)

    # proceed to set the job_aborted flag?
    if threads_aborted():
        logger.debug('will proceed to set job_aborted')
        args.job_aborted.set()
    else:
        logger.debug('will not set job_aborted yet')

    logger.debug('[payload] control thread has finished')
Ejemplo n.º 4
0
def run(args):
    """
    Main execution function for the stage-in workflow.

    The function sets up the internal queues which handle the flow of jobs.

    :param args: pilot arguments.
    :returns: traces.
    """

    logger.info('setting up signal handling')
    signal.signal(signal.SIGINT, functools.partial(interrupt, args))
    signal.signal(signal.SIGTERM, functools.partial(interrupt, args))
    signal.signal(signal.SIGQUIT, functools.partial(interrupt, args))
    signal.signal(signal.SIGSEGV, functools.partial(interrupt, args))
    signal.signal(signal.SIGXCPU, functools.partial(interrupt, args))
    signal.signal(signal.SIGUSR1, functools.partial(interrupt, args))
    signal.signal(signal.SIGBUS, functools.partial(interrupt, args))

    logger.info('setting up queues')
    queues = namedtuple('queues', ['jobs', 'data_in', 'data_out', 'current_data_in', 'validated_jobs',
                                   'finished_jobs', 'finished_data_in', 'finished_data_out',
                                   'failed_jobs', 'failed_data_in', 'failed_data_out', 'completed_jobs'])

    queues.jobs = queue.Queue()
    queues.data_in = queue.Queue()
    queues.data_out = queue.Queue()

    queues.current_data_in = queue.Queue()
    queues.validated_jobs = queue.Queue()

    queues.finished_jobs = queue.Queue()
    queues.finished_data_in = queue.Queue()
    queues.finished_data_out = queue.Queue()

    queues.failed_jobs = queue.Queue()
    queues.failed_data_in = queue.Queue()
    queues.failed_data_out = queue.Queue()

    queues.completed_jobs = queue.Queue()

    logger.info('setting up tracing')
    traces = namedtuple('traces', ['pilot'])
    traces.pilot = {'state': SUCCESS,
                    'nr_jobs': 0,
                    'error_code': 0,
                    'command': None}

    # define the threads
    targets = {'job': job.control, 'data': data.control, 'monitor': monitor.control}
    threads = [ExcThread(bucket=queue.Queue(), target=target, kwargs={'queues': queues, 'traces': traces, 'args': args},
                         name=name) for name, target in list(targets.items())]

    logger.info('starting threads')
    [thread.start() for thread in threads]

    logger.info('waiting for interrupts')

    while threading.activeCount() > 1:
        for thread in threads:
            bucket = thread.get_bucket()
            try:
                exc = bucket.get(block=False)
            except queue.Empty:
                pass
            else:
                exc_type, exc_obj, exc_trace = exc
                # deal with the exception
                print('received exception from bucket queue in generic workflow: %s' % exc_obj, file=stderr)
                # logger.fatal('caught exception: %s' % exc_obj)

            thread.join(0.1)

    logger.info('end of stage-in workflow (traces error code: %d)' % traces.pilot['error_code'])

    return traces
Ejemplo n.º 5
0
def run(args):
    """
    Main execution function for the generic workflow.

    The function sets up the internal queues which handle the flow of jobs.

    :param args: pilot arguments.
    :returns: traces.
    """

    logger.info('setting up signal handling')
    signal.signal(signal.SIGINT, functools.partial(interrupt, args))
    signal.signal(signal.SIGTERM, functools.partial(interrupt, args))
    signal.signal(signal.SIGQUIT, functools.partial(interrupt, args))
    signal.signal(signal.SIGSEGV, functools.partial(interrupt, args))
    signal.signal(signal.SIGXCPU, functools.partial(interrupt, args))
    signal.signal(signal.SIGUSR1, functools.partial(interrupt, args))
    signal.signal(signal.SIGBUS, functools.partial(interrupt, args))

    logger.info('setting up queues')
    queues = namedtuple('queues', [
        'jobs', 'payloads', 'data_in', 'data_out', 'current_data_in',
        'validated_jobs', 'validated_payloads', 'monitored_payloads',
        'finished_jobs', 'finished_payloads', 'finished_data_in',
        'finished_data_out', 'failed_jobs', 'failed_payloads',
        'failed_data_in', 'failed_data_out', 'completed_jobs',
        'completed_jobids'
    ])

    queues.jobs = queue.Queue()
    queues.payloads = queue.Queue()
    queues.data_in = queue.Queue()
    queues.data_out = queue.Queue()

    queues.current_data_in = queue.Queue()
    queues.validated_jobs = queue.Queue()
    queues.validated_payloads = queue.Queue()
    queues.monitored_payloads = queue.Queue()

    queues.finished_jobs = queue.Queue()
    queues.finished_payloads = queue.Queue()
    queues.finished_data_in = queue.Queue()
    queues.finished_data_out = queue.Queue()

    queues.failed_jobs = queue.Queue()
    queues.failed_payloads = queue.Queue()
    queues.failed_data_in = queue.Queue()
    queues.failed_data_out = queue.Queue()

    queues.completed_jobs = queue.Queue()
    queues.completed_jobids = queue.Queue()

    logger.info('setting up tracing')
    traces = namedtuple('traces', ['pilot'])
    traces.pilot = {
        'state': SUCCESS,
        'nr_jobs': 0,
        'error_code': 0,
        'command': None
    }

    # initial sanity check defined by pilot user
    try:
        user = __import__('pilot.user.%s.common' % args.pilot_user.lower(),
                          globals(), locals(), [args.pilot_user.lower()],
                          0)  # Python 2/3
        exit_code = user.sanity_check()
    except Exception as e:
        logger.info('skipping sanity check since: %s' % e)
    else:
        if exit_code != 0:
            logger.info('aborting workflow since sanity check failed')
            traces.pilot['error_code'] = exit_code
            return traces
        else:
            logger.info('passed sanity check')

    # define the threads
    targets = {
        'job': job.control,
        'payload': payload.control,
        'data': data.control,
        'monitor': monitor.control
    }
    threads = [
        ExcThread(bucket=queue.Queue(),
                  target=target,
                  kwargs={
                      'queues': queues,
                      'traces': traces,
                      'args': args
                  },
                  name=name) for name, target in list(targets.items())
    ]  # Python 2/3

    logger.info('starting threads')
    [thread.start() for thread in threads]

    logger.info('waiting for interrupts')

    thread_count = threading.activeCount()
    while threading.activeCount() > 1:
        for thread in threads:
            bucket = thread.get_bucket()
            try:
                exc = bucket.get(block=False)
            except queue.Empty:
                pass
            else:
                exc_type, exc_obj, exc_trace = exc
                # deal with the exception
                print(
                    'received exception from bucket queue in generic workflow: %s'
                    % exc_obj,
                    file=stderr)
                # logger.fatal('caught exception: %s' % exc_obj)

            thread.join(0.1)

        abort = False
        if thread_count != threading.activeCount():
            thread_count = threading.activeCount()
            logger.debug('thread count now at %d threads' % thread_count)
            logger.debug('enumerate: %s' % str(threading.enumerate()))
            # count all non-daemon threads
            daemon_threads = 0
            for thread in threading.enumerate():
                if thread.isDaemon(
                ):  # ignore any daemon threads, they will be aborted when python ends
                    daemon_threads += 1
            if thread_count - daemon_threads == 1:
                logger.debug(
                    'aborting since there is[are] %d daemon thread[s] which can be ignored'
                    % daemon_threads)
                abort = True

        if abort:
            break

        sleep(1)

    logger.info('end of generic workflow (traces error code: %d)' %
                traces.pilot['error_code'])

    return traces