Esempio n. 1
0
def task_kill_job(node, transport_queue, cancel_flag):
    """
    Transport task that will attempt to kill a job calculation

    The task will first request a transport from the queue. Once the transport is yielded, the relevant execmanager
    function is called, wrapped in the exponential_backoff_retry coroutine, which, in case of a caught exception, will
    retry after an interval that increases exponentially with the number of retries, for a maximum number of retries.
    If all retries fail, the task will raise a TransportTaskException

    :param node: the node that represents the job calculation
    :param transport_queue: the TransportQueue from which to request a Transport
    :param cancel_flag: the cancelled flag that will be queried to determine whether the task was cancelled
    :raises: Return if the tasks was successfully completed
    :raises: TransportTaskException if after the maximum number of retries the transport task still excepted
    """
    initial_interval = 1
    max_attempts = 5

    if node.get_state() in [calc_states.NEW, calc_states.TOSUBMIT]:
        node._set_state(calc_states.FAILED)
        logger.warning('calculation<{}> killed, it was in the {} state'.format(node.pk, node.get_state()))
        raise Return(True)

    authinfo = node.get_computer().get_authinfo(node.get_user())

    @coroutine
    def do_kill():
        with transport_queue.request_transport(authinfo) as request:
            transport = yield request

            # It may have taken time to get the transport, check if we've been cancelled
            if cancel_flag.is_cancelled:
                raise plumpy.CancelledError('task_kill_job for calculation<{}> cancelled'.format(node.pk))

            logger.info('killing calculation<{}>'.format(node.pk))

            raise Return(execmanager.kill_calculation(node, transport))

    try:
        result = yield exponential_backoff_retry(do_kill, initial_interval, max_attempts, logger=node.logger)
    except plumpy.CancelledError:
        pass
    except Exception:
        logger.warning('killing calculation<{}> failed:\n{}'.format(node.pk, traceback.format_exc()))
        node._set_state(calc_states.FAILED)
        raise TransportTaskException('kill_calculation failed {} times consecutively'.format(max_attempts))
    else:
        logger.info('killing calculation<{}> successful'.format(node.pk))
        raise Return(result)
Esempio n. 2
0
    def test_exponential_backoff_success(self):
        """Test that exponential backoff will successfully catch exceptions as long as max_attempts is not exceeded."""
        ITERATION = 0
        loop = IOLoop()

        @coroutine
        def coro():
            """A function that will raise RuntimeError as long as ITERATION is smaller than MAX_ITERATIONS."""
            global ITERATION
            ITERATION += 1
            if ITERATION < MAX_ITERATIONS:
                raise RuntimeError

        max_attempts = MAX_ITERATIONS + 1
        loop.run_sync(lambda: exponential_backoff_retry(
            coro, initial_interval=0.1, max_attempts=max_attempts))
Esempio n. 3
0
    def test_exponential_backoff_max_attempts_exceeded(self):
        """Test that exponential backoff will finally raise if max_attempts is exceeded"""
        ITERATION = 0
        loop = IOLoop()

        @coroutine
        def coro():
            """A function that will raise RuntimeError as long as ITERATION is smaller than MAX_ITERATIONS."""
            global ITERATION
            ITERATION += 1
            if ITERATION < MAX_ITERATIONS:
                raise RuntimeError

        max_attempts = MAX_ITERATIONS - 1
        with self.assertRaises(RuntimeError):
            try:
                loop.run_sync(lambda: exponential_backoff_retry(
                    coro, initial_interval=0.1, max_attempts=max_attempts))
            except Exception as e:
                print(e)
                raise