Esempio n. 1
0
    def test_ignores_transient_failures_and_retries(self):
        """
        Retries after interval if the ``do_work`` function errbacks with an
        error that is ignored by the ``can_retry`` function.  The error is
        not propagated.
        """
        wrapped_retry = mock.MagicMock(wraps=self.retry_function, spec=[])
        d = retry(self.work_function, wrapped_retry,
                  self.interval_function, self.clock)

        self.assertNoResult(d)
        self.assertEqual(len(self.retries), 1)

        # no result on errback
        self.retries[-1].errback(DummyException('hey!'))
        self.assertIsNone(self.successResultOf(self.retries[-1]))
        self.assertNoResult(d)
        wrapped_retry.assert_called_once_with(CheckFailure(DummyException))

        self.clock.advance(self.interval)

        # since it was an errback, loop retries the function again
        self.assertNoResult(d)
        self.assertEqual(len(self.retries), 2)

        # stop loop
        self.retries[-1].callback('result!')
        self.assertEqual(self.successResultOf(d), 'result!')
Esempio n. 2
0
def retry_and_timeout(do_work, timeout, can_retry=None, next_interval=None,
                      clock=None, deferred_description=None):
    """
    Retry a function until the function succeeds or timeout has been reached.
    This is just a composition of :func:`timeout_deferred` and :func:`retry`
    for convenience.  Please see their respective arguments.

    :param callable do_work: Takes no arguments.  Implements the work you want
        to perform on a periodic basis.
    :param number timeout: The number of seconds beyond which is considered a
        timeout condition.
    :param callable can_retry: Takes a Twisted Failure instance as a
        parameter, and decides whether or not the work should be retried
        (returns True if retry is desired; it returns False otherwise).
    :param callable next_interval: Takes a Twisted Failure instance as a
        parameter, and decides how long to wait based on the error received.
        Returns a number.
    :param IReactorTime clock: The clock authority; if left unspecified, the
        normal Twisted reactor will be used.
    :param str deferred_description: A textual description of what timed out.
    :return: A deferred, which when fired, contains the output of do_work if
        do_work actually succeeds.  Otherwise, returns a Failure instance.
        The Failure can be a timeout error or the exception which prevents
        retrying.
    """
    if clock is None:  # pragma: no cover
        from twisted.internet import reactor
        clock = reactor

    d = retry(do_work, can_retry=can_retry, next_interval=next_interval,
              clock=clock)
    timeout_deferred(d, timeout, clock=clock,
                     deferred_description=deferred_description)
    return d
Esempio n. 3
0
    def test_ignores_transient_failures_and_retries(self):
        """
        Retries after interval if the ``do_work`` function errbacks with an
        error that is ignored by the ``can_retry`` function.  The error is
        not propagated.
        """
        wrapped_retry = mock.MagicMock(wraps=self.retry_function, spec=[])
        d = retry(self.work_function, wrapped_retry, self.interval_function,
                  self.clock)

        self.assertNoResult(d)
        self.assertEqual(len(self.retries), 1)

        # no result on errback
        self.retries[-1].errback(DummyException('hey!'))
        self.assertIsNone(self.successResultOf(self.retries[-1]))
        self.assertNoResult(d)
        wrapped_retry.assert_called_once_with(CheckFailure(DummyException))

        self.clock.advance(self.interval)

        # since it was an errback, loop retries the function again
        self.assertNoResult(d)
        self.assertEqual(len(self.retries), 2)

        # stop loop
        self.retries[-1].callback('result!')
        self.assertEqual(self.successResultOf(d), 'result!')
Esempio n. 4
0
 def authenticate_tenant(self, tenant_id, log=None):
     """
     see :meth:`IAuthenticator.authenticate_tenant`
     """
     return retry(
         partial(self._authenticator.authenticate_tenant, tenant_id, log=log),
         can_retry=retry_times(self._max_retries),
         next_interval=repeating_interval(self._retry_interval),
         clock=self._reactor)
Esempio n. 5
0
def remove_from_load_balancer(log, endpoint, auth_token, loadbalancer_id,
                              node_id, clock=None):
    """
    Remove a node from a load balancer.

    :param str endpoint: Load balancer endpoint URI.
    :param str auth_token: Keystone Auth Token.
    :param str loadbalancer_id: The ID for a cloud loadbalancer.
    :param str node_id: The ID for a node in that cloudloadbalancer.

    :returns: A Deferred that fires with None if the operation completed successfully,
        or errbacks with an RequestError.
    """
    lb_log = log.bind(loadbalancer_id=loadbalancer_id, node_id=node_id)
    # TODO: Will remove this once LB ERROR state is fixed and it is working fine
    lb_log.msg('Removing from load balancer')
    path = append_segments(endpoint, 'loadbalancers', str(loadbalancer_id), 'nodes', str(node_id))

    def check_422_deleted(failure):
        # A LB being deleted sometimes results in a 422.  This function
        # unfortunately has to parse the body of the message to see if this is an
        # acceptable 422 (if the LB has been deleted or the node has already been
        # removed, then 'removing from load balancer' as a task should be
        # successful - if the LB is in ERROR, then nothing more can be done to
        # it except resetting it - may as well remove the server.)
        failure.trap(APIError)
        error = failure.value
        if error.code == 422:
            message = json.loads(error.body)['message']
            if ('load balancer is deleted' not in message and
                    'PENDING_DELETE' not in message):
                return failure
            lb_log.msg(message)
        else:
            return failure

    def remove():
        d = treq.delete(path, headers=headers(auth_token), log=lb_log)

        # Success is 200/202.  An LB not being found is 404.  A node not being
        # found is a 404.  But a deleted LB sometimes results in a 422.
        d.addCallback(log_on_response_code, lb_log, 'Node to delete does not exist', 404)
        d.addCallback(check_success, [200, 202, 404])
        d.addCallback(treq.content)  # To avoid https://twistedmatrix.com/trac/ticket/6751
        d.addErrback(check_422_deleted)
        d.addErrback(log_lb_unexpected_errors, path, lb_log, 'remove_node')
        return d

    d = retry(
        remove,
        can_retry=retry_times(config_value('worker.lb_max_retries') or LB_MAX_RETRIES),
        next_interval=random_interval(
            *(config_value('worker.lb_retry_interval_range') or LB_RETRY_INTERVAL_RANGE)),
        clock=clock)
    d.addCallback(lambda _: lb_log.msg('Removed from load balancer'))
    return d
Esempio n. 6
0
 def authenticate_tenant(self, tenant_id, log=None):
     """
     see :meth:`IAuthenticator.authenticate_tenant`
     """
     return retry(partial(self._authenticator.authenticate_tenant,
                          tenant_id,
                          log=log),
                  can_retry=retry_times(self._max_retries),
                  next_interval=repeating_interval(self._retry_interval),
                  clock=self._reactor)
Esempio n. 7
0
def add_to_load_balancer(log, endpoint, auth_token, lb_config, ip_address, undo, clock=None):
    """
    Add an IP addressed to a load balancer based on the lb_config.

    TODO: Handle load balancer node metadata.

    :param log: A bound logger
    :param str endpoint: Load balancer endpoint URI.
    :param str auth_token: Keystone Auth Token.
    :param str lb_config: An lb_config dictionary.
    :param str ip_address: The IP Address of the node to add to the load
        balancer.
    :param IUndoStack undo: An IUndoStack to push any reversable operations onto.

    :return: Deferred that fires with the Add Node to load balancer response
        as a dict.
    """
    lb_id = lb_config['loadBalancerId']
    port = lb_config['port']
    path = append_segments(endpoint, 'loadbalancers', str(lb_id), 'nodes')
    lb_log = log.bind(loadbalancer_id=lb_id, ip_address=ip_address)

    def add():
        d = treq.post(path, headers=headers(auth_token),
                      data=json.dumps({"nodes": [{"address": ip_address,
                                                  "port": port,
                                                  "condition": "ENABLED",
                                                  "type": "PRIMARY"}]}),
                      log=lb_log)
        d.addCallback(check_success, [200, 202])
        d.addErrback(log_lb_unexpected_errors, lb_log, 'add_node')
        d.addErrback(wrap_request_error, path, 'add_node')
        d.addErrback(check_deleted_clb, lb_id)
        return d

    d = retry(
        add,
        can_retry=compose_retries(
            transient_errors_except(CLBOrNodeDeleted),
            retry_times(config_value('worker.lb_max_retries') or LB_MAX_RETRIES)),
        next_interval=random_interval(
            *(config_value('worker.lb_retry_interval_range') or LB_RETRY_INTERVAL_RANGE)),
        clock=clock)

    def when_done(result):
        lb_log.msg('Added to load balancer', node_id=result['nodes'][0]['id'])
        undo.push(remove_from_load_balancer,
                  lb_log,
                  endpoint,
                  auth_token,
                  lb_id,
                  result['nodes'][0]['id'])
        return result

    return d.addCallback(treq.json_content).addCallback(when_done)
Esempio n. 8
0
    def test_already_callbacked_deferred_not_canceled(self):
        """
        If ``do_work``'s deferred has already fired, ``retry``
        callbacks correctly without canceling the fired deferred.
        """
        r = succeed('result!')
        wrapped = mock.MagicMock(spec=r, wraps=r)
        work_function = mock.MagicMock(spec=[], return_value=wrapped)

        d = retry(work_function, self.retry_function, self.interval_function,
                  self.clock)
        self.assertEqual(self.successResultOf(d), 'result!')

        self.assertEqual(wrapped.cancel.call_count, 0)
Esempio n. 9
0
    def test_sync_propagates_result_and_stops_retries_on_callback(self):
        """
        The deferred callbacks with the result as soon as the synchronous
        ``do_work`` function succeeds.  No retries happen
        """
        self.work_function = mock.MagicMock(spec=[], return_value='result!')
        d = retry(self.work_function, self.retry_function,
                  self.interval_function, self.clock)
        self.assertEqual(self.successResultOf(d), 'result!')
        self.work_function.assert_called_once_with()

        # work_function not called again.
        self.clock.advance(self.interval)
        self.work_function.assert_called_once_with()
Esempio n. 10
0
    def test_cancelling_deferred_stops_retries(self):
        """
        Cancelling the deferred prevents ``retry`` from retrying ``do_work``
        again.
        """
        d = retry(self.work_function, self.retry_function,
                  self.interval_function, self.clock)
        self.assertEqual(len(self.retries), 1)

        d.cancel()
        self.failureResultOf(d, CancelledError)

        self.clock.advance(self.interval)
        self.assertEqual(len(self.retries), 1)
Esempio n. 11
0
    def test_already_callbacked_deferred_not_canceled(self):
        """
        If ``do_work``'s deferred has already fired, ``retry``
        callbacks correctly without canceling the fired deferred.
        """
        r = succeed('result!')
        wrapped = mock.MagicMock(spec=r, wraps=r)
        work_function = mock.MagicMock(spec=[], return_value=wrapped)

        d = retry(work_function, self.retry_function,
                  self.interval_function, self.clock)
        self.assertEqual(self.successResultOf(d), 'result!')

        self.assertEqual(wrapped.cancel.call_count, 0)
Esempio n. 12
0
    def test_cancelling_deferred_stops_retries(self):
        """
        Cancelling the deferred prevents ``retry`` from retrying ``do_work``
        again.
        """
        d = retry(self.work_function, self.retry_function,
                  self.interval_function, self.clock)
        self.assertEqual(len(self.retries), 1)

        d.cancel()
        self.failureResultOf(d, CancelledError)

        self.clock.advance(self.interval)
        self.assertEqual(len(self.retries), 1)
Esempio n. 13
0
    def test_sync_propagates_result_and_stops_retries_on_callback(self):
        """
        The deferred callbacks with the result as soon as the synchronous
        ``do_work`` function succeeds.  No retries happen
        """
        self.work_function = mock.MagicMock(spec=[], return_value='result!')
        d = retry(self.work_function, self.retry_function,
                  self.interval_function, self.clock)
        self.assertEqual(self.successResultOf(d), 'result!')
        self.work_function.assert_called_once_with()

        # work_function not called again.
        self.clock.advance(self.interval)
        self.work_function.assert_called_once_with()
Esempio n. 14
0
def _remove_from_clb(log,
                     endpoint,
                     auth_token,
                     loadbalancer_id,
                     node_id,
                     clock=None):
    """
    Remove a node from a CLB load balancer.

    :param str endpoint: Load balancer endpoint URI.
    :param str auth_token: Keystone authentication token.
    :param str loadbalancer_id: The ID for a Cloud Load Balancer.
    :param str node_id: The ID for a node in that Cloud Load Balancer.

    :returns: A Deferred that fires with None if the operation completed successfully,
        or errbacks with an RequestError.
    """
    lb_log = log.bind(loadbalancer_id=loadbalancer_id, node_id=node_id)
    # TODO: Will remove this once LB ERROR state is fixed and it is working fine
    lb_log.msg('Removing from load balancer')
    path = append_segments(endpoint, 'loadbalancers', str(loadbalancer_id),
                           'nodes', str(node_id))

    def remove():
        d = treq.delete(path, headers=headers(auth_token), log=lb_log)
        d.addCallback(check_success, [200, 202])
        d.addCallback(treq.content
                      )  # To avoid https://twistedmatrix.com/trac/ticket/6751
        d.addErrback(log_lb_unexpected_errors, lb_log, 'remove_node')
        d.addErrback(wrap_request_error, path, 'remove_node')
        d.addErrback(check_deleted_clb, loadbalancer_id, node_id)
        return d

    d = retry(remove,
              can_retry=compose_retries(
                  transient_errors_except(CLBOrNodeDeleted),
                  retry_times(
                      config_value('worker.lb_max_retries')
                      or LB_MAX_RETRIES)),
              next_interval=random_interval(
                  *(config_value('worker.lb_retry_interval_range')
                    or LB_RETRY_INTERVAL_RANGE)),
              clock=clock)

    # A node or CLB deleted is considered successful removal
    d.addErrback(
        lambda f: f.trap(CLBOrNodeDeleted) and lb_log.msg(f.value.message))
    d.addCallback(lambda _: lb_log.msg('Removed from load balancer'))
    return d
Esempio n. 15
0
def retry_and_timeout(do_work, timeout, can_retry=None, next_interval=None,
                      clock=None):
    """
    Retry a function until the function succeeds or timeout has been reached.
    This is just a composition of :func:`timeout_deferred` and :func:`retry`
    for convenience.  Please see their respective arguments.
    """
    if clock is None:  # pragma: no cover
        from twisted.internet import reactor
        clock = reactor

    d = retry(do_work, can_retry=can_retry, next_interval=next_interval,
              clock=clock)
    timeout_deferred(d, timeout, clock=clock)
    return d
Esempio n. 16
0
    def test_default_can_retry_function(self):
        """
        If no ``can_retry`` function is provided, a default function treats
        any failure as transient
        """
        d = retry(self.work_function, None, self.interval_function, self.clock)

        self.assertEqual(len(self.retries), 1)
        self.retries[-1].errback(DummyException('temp'))

        self.clock.advance(self.interval)

        self.assertEqual(len(self.retries), 2)
        self.retries[-1].errback(NotImplementedError())

        self.assertNoResult(d)
Esempio n. 17
0
    def test_default_can_retry_function(self):
        """
        If no ``can_retry`` function is provided, a default function treats
        any failure as transient
        """
        d = retry(self.work_function, None, self.interval_function, self.clock)

        self.assertEqual(len(self.retries), 1)
        self.retries[-1].errback(DummyException('temp'))

        self.clock.advance(self.interval)

        self.assertEqual(len(self.retries), 2)
        self.retries[-1].errback(NotImplementedError())

        self.assertNoResult(d)
Esempio n. 18
0
    def test_stops_on_non_transient_error(self):
        """
        If ``do_work`` errbacks with something the ``can_retry`` function does
        not ignore, the error is propagated up.  ``do_work`` is not retried.
        """
        d = retry(self.work_function, lambda *args: False,
                  self.interval_function, self.clock)

        self.assertNoResult(d)
        self.assertEqual(len(self.retries), 1)

        self.retries[-1].errback(DummyException('fail!'))
        self.failureResultOf(d, DummyException)

        # work_function not called again
        self.clock.advance(self.interval)
        self.assertEqual(len(self.retries), 1)
Esempio n. 19
0
    def test_stops_on_non_transient_error(self):
        """
        If ``do_work`` errbacks with something the ``can_retry`` function does
        not ignore, the error is propagated up.  ``do_work`` is not retried.
        """
        d = retry(self.work_function, lambda *args: False,
                  self.interval_function, self.clock)

        self.assertNoResult(d)
        self.assertEqual(len(self.retries), 1)

        self.retries[-1].errback(DummyException('fail!'))
        self.failureResultOf(d, DummyException)

        # work_function not called again
        self.clock.advance(self.interval)
        self.assertEqual(len(self.retries), 1)
Esempio n. 20
0
    def test_async_propagates_result_and_stops_retries_on_callback(self):
        """
        The deferred callbacks with the result as soon as the asynchronous
        ``do_work`` function succeeds.  No retries happen
        """
        d = retry(self.work_function, self.retry_function,
                  self.interval_function, self.clock)

        # no result until the work_function's deferred fires
        self.assertNoResult(d)
        self.assertEqual(len(self.retries), 1)

        self.retries[-1].callback('result!')
        self.assertEqual(self.successResultOf(d), 'result!')

        # work_function not called again.
        self.clock.advance(self.interval)
        self.assertEqual(len(self.retries), 1)
Esempio n. 21
0
    def test_cancelling_deferred_does_not_cancel_completed_work(self):
        """
        Cancelling the deferred does not attempt to cancel previously
        callbacked results from ``do_work``
        """
        d = retry(self.work_function, self.retry_function,
                  self.interval_function, self.clock)

        self.assertEqual(len(self.retries), 1)
        self.retries[-1].errback(DummyException('temp'))

        # cancel main deferred
        d.cancel()
        self.failureResultOf(d, CancelledError)

        # work_function's deferred is not cancelled
        self.assertEqual(self.retries[-1].cancel.call_count, 0)
        self.assertIsNone(self.successResultOf(self.retries[-1]))
Esempio n. 22
0
    def test_async_propagates_result_and_stops_retries_on_callback(self):
        """
        The deferred callbacks with the result as soon as the asynchronous
        ``do_work`` function succeeds.  No retries happen
        """
        d = retry(self.work_function, self.retry_function,
                  self.interval_function, self.clock)

        # no result until the work_function's deferred fires
        self.assertNoResult(d)
        self.assertEqual(len(self.retries), 1)

        self.retries[-1].callback('result!')
        self.assertEqual(self.successResultOf(d), 'result!')

        # work_function not called again.
        self.clock.advance(self.interval)
        self.assertEqual(len(self.retries), 1)
Esempio n. 23
0
    def test_cancelling_deferred_does_not_cancel_completed_work(self):
        """
        Cancelling the deferred does not attempt to cancel previously
        callbacked results from ``do_work``
        """
        d = retry(self.work_function, self.retry_function,
                  self.interval_function, self.clock)

        self.assertEqual(len(self.retries), 1)
        self.retries[-1].errback(DummyException('temp'))

        # cancel main deferred
        d.cancel()
        self.failureResultOf(d, CancelledError)

        # work_function's deferred is not cancelled
        self.assertEqual(self.retries[-1].cancel.call_count, 0)
        self.assertIsNone(self.successResultOf(self.retries[-1]))
Esempio n. 24
0
def verified_delete(log,
                    server_endpoint,
                    request_bag,
                    server_id,
                    exp_start=2,
                    max_retries=10,
                    clock=None):
    """
    Attempt to delete a server from the server endpoint, and ensure that it is
    deleted by trying again until deleting/getting the server results in a 404
    or until ``OS-EXT-STS:task_state`` in server details is 'deleting',
    indicating that Nova has acknowledged that the server is to be deleted
    as soon as possible.

    Time out attempting to verify deletes after a period of time and log an
    error.

    :param log: A bound logger.
    :param str server_endpoint: Server endpoint URI.
    :param str auth_token: Keystone Auth token.
    :param str server_id: Opaque nova server id.
    :param int exp_start: Exponential backoff interval start seconds. Default 2
    :param int max_retries: Maximum number of retry attempts

    :return: Deferred that fires when the expected status has been seen.
    """
    serv_log = log.bind(server_id=server_id)
    serv_log.msg('Deleting server')

    if clock is None:  # pragma: no cover
        from twisted.internet import reactor
        clock = reactor

    d = retry(
        partial(delete_and_verify, serv_log, server_endpoint, request_bag,
                server_id, clock),
        can_retry=retry_times(max_retries),
        next_interval=exponential_backoff_interval(exp_start),
        clock=clock)

    d.addCallback(log_with_time, clock, serv_log, clock.seconds(),
                  ('Server deleted successfully (or acknowledged by Nova as '
                   'to-be-deleted) : {time_delete} seconds.'), 'time_delete')
    return d
Esempio n. 25
0
    def test_default_next_interval_function(self):
        """
        If no ``next_interval`` function is provided, a default function returns
        5 no matter what the failure.
        """
        d = retry(self.work_function, self.retry_function, None, self.clock)

        self.assertEqual(len(self.retries), 1)
        self.retries[-1].errback(DummyException('temp'))

        self.clock.advance(5)

        self.assertEqual(len(self.retries), 2)
        self.retries[-1].errback(NotImplementedError())

        self.clock.advance(5)

        self.assertEqual(len(self.retries), 3)
        self.assertNoResult(d)
Esempio n. 26
0
    def test_default_next_interval_function(self):
        """
        If no ``next_interval`` function is provided, a default function returns
        5 no matter what the failure.
        """
        d = retry(self.work_function, self.retry_function, None, self.clock)

        self.assertEqual(len(self.retries), 1)
        self.retries[-1].errback(DummyException('temp'))

        self.clock.advance(5)

        self.assertEqual(len(self.retries), 2)
        self.retries[-1].errback(NotImplementedError())

        self.clock.advance(5)

        self.assertEqual(len(self.retries), 3)
        self.assertNoResult(d)
Esempio n. 27
0
def verified_delete(log,
                    server_endpoint,
                    request_bag,
                    server_id,
                    exp_start=2,
                    max_retries=10,
                    clock=None):
    """
    Attempt to delete a server from the server endpoint, and ensure that it is
    deleted by trying again until deleting/getting the server results in a 404
    or until ``OS-EXT-STS:task_state`` in server details is 'deleting',
    indicating that Nova has acknowledged that the server is to be deleted
    as soon as possible.

    Time out attempting to verify deletes after a period of time and log an
    error.

    :param log: A bound logger.
    :param str server_endpoint: Server endpoint URI.
    :param str auth_token: Keystone Auth token.
    :param str server_id: Opaque nova server id.
    :param int exp_start: Exponential backoff interval start seconds. Default 2
    :param int max_retries: Maximum number of retry attempts

    :return: Deferred that fires when the expected status has been seen.
    """
    serv_log = log.bind(server_id=server_id)
    serv_log.msg('Deleting server')

    if clock is None:  # pragma: no cover
        from twisted.internet import reactor
        clock = reactor

    d = retry(partial(delete_and_verify, serv_log, server_endpoint,
                      request_bag, server_id, clock),
              can_retry=retry_times(max_retries),
              next_interval=exponential_backoff_interval(exp_start),
              clock=clock)

    d.addCallback(log_with_time, clock, serv_log, clock.seconds(),
                  ('Server deleted successfully (or acknowledged by Nova as '
                   'to-be-deleted) : {time_delete} seconds.'), 'time_delete')
    return d
Esempio n. 28
0
    def test_cancelling_deferred_cancels_work_in_progress(self):
        """
        Cancelling the deferred cancels the deferred returned by
        ``do_work`` if it is still in progress, but eats the
        :class:`CancelledError` (but the overall retry deferred still
        errbacks with a :class:`CancelledError`)
        """
        d = retry(self.work_function, self.retry_function,
                  self.interval_function, self.clock)
        self.assertEqual(len(self.retries), 1)
        self.assertNoResult(self.retries[-1])

        # cancel main deferred
        d.cancel()
        self.failureResultOf(d, CancelledError)

        # work_function's deferred is cancelled, and error eaten
        self.retries[-1].cancel.assert_called_once_with()
        self.assertIsNone(self.successResultOf(self.retries[-1]))
Esempio n. 29
0
    def test_cancelling_deferred_cancels_work_in_progress(self):
        """
        Cancelling the deferred cancels the deferred returned by
        ``do_work`` if it is still in progress, but eats the
        :class:`CancelledError` (but the overall retry deferred still
        errbacks with a :class:`CancelledError`)
        """
        d = retry(self.work_function, self.retry_function,
                  self.interval_function, self.clock)
        self.assertEqual(len(self.retries), 1)
        self.assertNoResult(self.retries[-1])

        # cancel main deferred
        d.cancel()
        self.failureResultOf(d, CancelledError)

        # work_function's deferred is cancelled, and error eaten
        self.retries[-1].cancel.assert_called_once_with()
        self.assertIsNone(self.successResultOf(self.retries[-1]))
Esempio n. 30
0
def retry_and_timeout(do_work,
                      timeout,
                      can_retry=None,
                      next_interval=None,
                      clock=None,
                      deferred_description=None):
    """
    Retry a function until the function succeeds or timeout has been reached.
    This is just a composition of :func:`timeout_deferred` and :func:`retry`
    for convenience.  Please see their respective arguments.

    :param callable do_work: Takes no arguments.  Implements the work you want
        to perform on a periodic basis.
    :param number timeout: The number of seconds beyond which is considered a
        timeout condition.
    :param callable can_retry: Takes a Twisted Failure instance as a
        parameter, and decides whether or not the work should be retried
        (returns True if retry is desired; it returns False otherwise).
    :param callable next_interval: Takes a Twisted Failure instance as a
        parameter, and decides how long to wait based on the error received.
        Returns a number.
    :param IReactorTime clock: The clock authority; if left unspecified, the
        normal Twisted reactor will be used.
    :param str deferred_description: A textual description of what timed out.
    :return: A deferred, which when fired, contains the output of do_work if
        do_work actually succeeds.  Otherwise, returns a Failure instance.
        The Failure can be a timeout error or the exception which prevents
        retrying.
    """
    if clock is None:  # pragma: no cover
        from twisted.internet import reactor
        clock = reactor

    d = retry(do_work,
              can_retry=can_retry,
              next_interval=next_interval,
              clock=clock)
    timeout_deferred(d,
                     timeout,
                     clock=clock,
                     deferred_description=deferred_description)
    return d
Esempio n. 31
0
def retry_and_timeout(do_work,
                      timeout,
                      can_retry=None,
                      next_interval=None,
                      clock=None):
    """
    Retry a function until the function succeeds or timeout has been reached.
    This is just a composition of :func:`timeout_deferred` and :func:`retry`
    for convenience.  Please see their respective arguments.
    """
    if clock is None:  # pragma: no cover
        from twisted.internet import reactor
        clock = reactor

    d = retry(do_work,
              can_retry=can_retry,
              next_interval=next_interval,
              clock=clock)
    timeout_deferred(d, timeout, clock=clock)
    return d
Esempio n. 32
0
def remove_from_load_balancer(log, endpoint, auth_token, loadbalancer_id,
                              node_id, clock=None):
    """
    Remove a node from a load balancer.

    :param str endpoint: Load balancer endpoint URI.
    :param str auth_token: Keystone Auth Token.
    :param str loadbalancer_id: The ID for a cloud loadbalancer.
    :param str node_id: The ID for a node in that cloudloadbalancer.

    :returns: A Deferred that fires with None if the operation completed successfully,
        or errbacks with an RequestError.
    """
    lb_log = log.bind(loadbalancer_id=loadbalancer_id, node_id=node_id)
    # TODO: Will remove this once LB ERROR state is fixed and it is working fine
    lb_log.msg('Removing from load balancer')
    path = append_segments(endpoint, 'loadbalancers', str(loadbalancer_id), 'nodes', str(node_id))

    def remove():
        d = treq.delete(path, headers=headers(auth_token), log=lb_log)
        d.addCallback(check_success, [200, 202])
        d.addCallback(treq.content)  # To avoid https://twistedmatrix.com/trac/ticket/6751
        d.addErrback(log_lb_unexpected_errors, lb_log, 'remove_node')
        d.addErrback(wrap_request_error, path, 'remove_node')
        d.addErrback(check_deleted_clb, loadbalancer_id, node_id)
        return d

    d = retry(
        remove,
        can_retry=compose_retries(
            transient_errors_except(CLBOrNodeDeleted),
            retry_times(config_value('worker.lb_max_retries') or LB_MAX_RETRIES)),
        next_interval=random_interval(
            *(config_value('worker.lb_retry_interval_range') or LB_RETRY_INTERVAL_RANGE)),
        clock=clock)

    # A node or CLB deleted is considered successful removal
    d.addErrback(lambda f: f.trap(CLBOrNodeDeleted) and lb_log.msg(f.value.message))
    d.addCallback(lambda _: lb_log.msg('Removed from load balancer'))
    return d
Esempio n. 33
0
    def test_handles_synchronous_do_work_function_errors(self):
        """
        Transient/terminal error handling works the same with a synchronous
        ``do_work`` function that raises instead of errbacks.
        """
        self.work_function = mock.MagicMock(spec=[])
        self.work_function.side_effect = DummyException

        # DummyExceptions are transient, all else are terminal
        d = retry(self.work_function, (lambda f: f.check(DummyException)),
                  self.interval_function, self.clock)

        # no result
        self.assertNoResult(d)
        self.work_function.assert_called_once_with()

        self.work_function.side_effect = NotImplementedError
        self.clock.advance(self.interval)

        # terminal error
        self.failureResultOf(d, NotImplementedError)
        self.assertEqual(self.work_function.call_count, 2)
Esempio n. 34
0
    def test_handles_synchronous_do_work_function_errors(self):
        """
        Transient/terminal error handling works the same with a synchronous
        ``do_work`` function that raises instead of errbacks.
        """
        self.work_function = mock.MagicMock(spec=[])
        self.work_function.side_effect = DummyException

        # DummyExceptions are transient, all else are terminal
        d = retry(self.work_function, (lambda f: f.check(DummyException)),
                  self.interval_function, self.clock)

        # no result
        self.assertNoResult(d)
        self.work_function.assert_called_once_with()

        self.work_function.side_effect = NotImplementedError
        self.clock.advance(self.interval)

        # terminal error
        self.failureResultOf(d, NotImplementedError)
        self.assertEqual(self.work_function.call_count, 2)
Esempio n. 35
0
def get_all_server_details(tenant_id, authenticator, service_name, region,
                           limit=100, clock=None, _treq=None):
    """
    Return all servers of a tenant
    TODO: service_name is possibly internal to this function but I don't want to pass config here?
    NOTE: This really screams to be a independent txcloud-type API
    """
    token, catalog = yield authenticator.authenticate_tenant(tenant_id, log=default_log)
    endpoint = public_endpoint_url(catalog, service_name, region)
    url = append_segments(endpoint, 'servers', 'detail')
    query = {'limit': limit}
    all_servers = []

    if clock is None:  # pragma: no cover
        from twisted.internet import reactor as clock

    if _treq is None:  # pragma: no cover
        _treq = treq

    def fetch(url, headers):
        d = _treq.get(url, headers=headers)
        d.addCallback(check_success, [200], _treq=_treq)
        d.addCallback(_treq.json_content)
        return d

    while True:
        # sort based on query name to make the tests predictable
        urlparams = sorted(query.items(), key=lambda e: e[0])
        d = retry(partial(fetch, '{}?{}'.format(url, urlencode(urlparams)), headers(token)),
                  can_retry=retry_times(5),
                  next_interval=exponential_backoff_interval(2), clock=clock)
        servers = (yield d)['servers']
        all_servers.extend(servers)
        if len(servers) < limit:
            break
        query.update({'marker': servers[-1]['id']})

    defer.returnValue(all_servers)
Esempio n. 36
0
    def test_retries_at_intervals_specified_by_interval_function(self):
        """
        ``do_work``, if it experiences transient failures, will be retried at
        intervals returned by the ``next_interval`` function
        """
        changing_interval = mock.MagicMock(spec=[])
        d = retry(self.work_function, self.retry_function, changing_interval,
                  self.clock)

        changing_interval.return_value = 1
        self.assertEqual(len(self.retries), 1)
        self.retries[-1].errback(DummyException('hey!'))
        self.assertNoResult(d)
        changing_interval.assert_called_once_with(CheckFailure(DummyException))

        self.clock.advance(1)
        changing_interval.return_value = 2
        self.assertEqual(len(self.retries), 2)
        self.retries[-1].errback(DummyException('hey!'))
        self.assertNoResult(d)
        changing_interval.assert_has_calls(
            [mock.call(CheckFailure(DummyException))] * 2)

        # the next interval has changed - after 1 second, it is still not
        # retried
        self.clock.advance(1)
        self.assertEqual(len(self.retries), 2)
        self.assertNoResult(d)
        changing_interval.assert_has_calls(
            [mock.call(CheckFailure(DummyException))] * 2)

        # after 2 seconds, the function is retried
        self.clock.advance(1)
        self.assertEqual(len(self.retries), 3)

        # stop retrying
        self.retries[-1].callback('hey')
Esempio n. 37
0
    def test_retries_at_intervals_specified_by_interval_function(self):
        """
        ``do_work``, if it experiences transient failures, will be retried at
        intervals returned by the ``next_interval`` function
        """
        changing_interval = mock.MagicMock(spec=[])
        d = retry(self.work_function, self.retry_function,
                  changing_interval, self.clock)

        changing_interval.return_value = 1
        self.assertEqual(len(self.retries), 1)
        self.retries[-1].errback(DummyException('hey!'))
        self.assertNoResult(d)
        changing_interval.assert_called_once_with(CheckFailure(DummyException))

        self.clock.advance(1)
        changing_interval.return_value = 2
        self.assertEqual(len(self.retries), 2)
        self.retries[-1].errback(DummyException('hey!'))
        self.assertNoResult(d)
        changing_interval.assert_has_calls(
            [mock.call(CheckFailure(DummyException))] * 2)

        # the next interval has changed - after 1 second, it is still not
        # retried
        self.clock.advance(1)
        self.assertEqual(len(self.retries), 2)
        self.assertNoResult(d)
        changing_interval.assert_has_calls(
            [mock.call(CheckFailure(DummyException))] * 2)

        # after 2 seconds, the function is retried
        self.clock.advance(1)
        self.assertEqual(len(self.retries), 3)

        # stop retrying
        self.retries[-1].callback('hey')
Esempio n. 38
0
def launch_server(log,
                  request_bag,
                  scaling_group,
                  launch_config,
                  undo,
                  clock=None):
    """
    Launch a new server given the launch config auth tokens and service
    catalog. Possibly adding the newly launched server to a load balancer.

    :param BoundLog log: A bound logger.
    :param request_bag: An object with a bunch of useful data on it, including
        a callable to re-auth and get a new token.
    :param IScalingGroup scaling_group: The scaling group to add the launched
        server to.
    :param dict launch_config: A launch_config args structure as defined for
        the launch_server_v1 type.
    :param IUndoStack undo: The stack that will be rewound if undo fails.

    :return: Deferred that fires with a 2-tuple of server details and the
        list of load balancer responses from add_to_load_balancers.
    """
    launch_config = prepare_launch_config(scaling_group.uuid, launch_config)

    cloudServersOpenStack = config_value('cloudServersOpenStack')
    server_endpoint = public_endpoint_url(request_bag.service_catalog,
                                          cloudServersOpenStack,
                                          request_bag.region)

    lb_config = launch_config.get('loadBalancers', [])
    server_config = launch_config['server']

    log = log.bind(server_name=server_config['name'])
    ilog = [None]

    def check_metadata(server):
        # sanity check to make sure the metadata didn't change - can probably
        # be removed after a while if we do not see any log messages from this
        # function
        expected = launch_config['server']['metadata']
        result = server['server'].get('metadata')
        if result != expected:
            ilog[0].msg('Server metadata has changed.',
                        sanity_check=True,
                        expected_metadata=expected,
                        nova_metadata=result)
        return server

    def wait_for_server(server, new_request_bag):
        server_id = server['server']['id']

        # NOTE: If server create is retried, each server delete will be pushed
        # to undo stack even after it will be deleted in check_error which is
        # fine since verified_delete succeeds on deleted server
        undo.push(verified_delete, log, server_endpoint, new_request_bag,
                  server_id)

        ilog[0] = log.bind(server_id=server_id)
        return wait_for_active(ilog[0], server_endpoint,
                               new_request_bag.auth_token,
                               server_id).addCallback(check_metadata)

    def add_lb(server, new_request_bag):
        if lb_config:
            lbd = add_to_load_balancers(ilog[0], new_request_bag, lb_config,
                                        server, undo)
            lbd.addCallback(lambda lb_response: (server, lb_response))
            return lbd

        return (server, [])

    def _real_create_server(new_request_bag):
        auth_token = new_request_bag.auth_token
        d = create_server(server_endpoint, auth_token, server_config, log=log)
        d.addCallback(wait_for_server, new_request_bag)
        d.addCallback(add_lb, new_request_bag)
        return d

    def _create_server():
        return request_bag.re_auth().addCallback(_real_create_server)

    def check_error(f):
        f.trap(UnexpectedServerStatus)
        if f.value.status == 'ERROR':
            log.msg(
                '{server_id} errored, deleting and creating new '
                'server instead',
                server_id=f.value.server_id)
            # trigger server delete and return True to allow retry
            verified_delete(log, server_endpoint, request_bag,
                            f.value.server_id)
            return True
        else:
            return False

    d = retry(_create_server,
              can_retry=compose_retries(retry_times(3), check_error),
              next_interval=repeating_interval(15),
              clock=clock)

    return d
Esempio n. 39
0
def add_to_clb(log,
               endpoint,
               auth_token,
               lb_config,
               ip_address,
               undo,
               clock=None):
    """
    Add an IP address to a Cloud Load Balancer based on the ``lb_config``.

    TODO: Handle load balancer node metadata.

    :param log: A bound logger
    :param str endpoint: Load balancer endpoint URI.
    :param str auth_token: Keystone auth token.
    :param dict lb_config: An ``lb_config`` dictionary.
    :param str ip_address: The IP address of the node to add to the load
        balancer.
    :param IUndoStack undo: An IUndoStack to push any reversable operations
        onto.

    :return: Deferred that fires with the load balancer response.
    """
    lb_id = lb_config['loadBalancerId']
    port = lb_config['port']
    path = append_segments(endpoint, 'loadbalancers', str(lb_id), 'nodes')
    lb_log = log.bind(loadbalancer_id=lb_id, ip_address=ip_address)

    def add():
        d = treq.post(path,
                      headers=headers(auth_token),
                      data=json.dumps({
                          "nodes": [{
                              "address": ip_address,
                              "port": port,
                              "condition": "ENABLED",
                              "type": "PRIMARY"
                          }]
                      }),
                      log=lb_log)
        d.addCallback(check_success, [200, 202])
        d.addErrback(log_lb_unexpected_errors, lb_log, 'add_node')
        d.addErrback(wrap_request_error, path, 'add_node')
        d.addErrback(check_deleted_clb, lb_id)
        return d

    d = retry(add,
              can_retry=compose_retries(
                  transient_errors_except(CLBOrNodeDeleted),
                  retry_times(
                      config_value('worker.lb_max_retries')
                      or LB_MAX_RETRIES)),
              next_interval=random_interval(
                  *(config_value('worker.lb_retry_interval_range')
                    or LB_RETRY_INTERVAL_RANGE)),
              clock=clock)

    def when_done(result):
        node_id = result['nodes'][0]['id']
        lb_log.msg('Added to load balancer', node_id=node_id)
        undo.push(_remove_from_clb, lb_log, endpoint, auth_token, lb_id,
                  node_id)
        return result

    return d.addCallback(treq.json_content).addCallback(when_done)
Esempio n. 40
0
def create_server(server_endpoint,
                  auth_token,
                  server_config,
                  log=None,
                  clock=None,
                  retries=3,
                  create_failure_delay=5,
                  _treq=None):
    """
    Create a new server.  If there is an error from Nova from this call,
    checks to see if the server was created anyway.  If not, will retry the
    create ``retries`` times (checking each time if a server).

    If the error from Nova is a 400, does not retry, because that implies that
    retrying will just result in another 400 (bad args).

    If checking to see if the server is created also results in a failure,
    does not retry because there might just be something wrong with Nova.

    :param str server_endpoint: Server endpoint URI.
    :param str auth_token: Keystone Auth Token.
    :param dict server_config: Nova server config.
    :param: int retries: Number of tries to retry the create.
    :param: int create_failure_delay: how much time in seconds to wait after
        a create server failure before checking Nova to see if a server
        was created

    :param log: logger
    :type log: :class:`otter.log.bound.BoundLog`

    :param _treq: To be used for testing - what treq object to use
    :type treq: something with the same api as :obj:`treq`

    :return: Deferred that fires with the CreateServer response as a dict.
    """
    path = append_segments(server_endpoint, 'servers')

    if _treq is None:  # pragma: no cover
        _treq = treq
    if clock is None:  # pragma: no cover
        from twisted.internet import reactor
        clock = reactor

    def _check_results(result, propagated_f):
        """
        Return the original failure, if checking a server resulted in a
        failure too.  Returns a wrapped propagated failure, if there were no
        servers created, so that the retry utility knows that server creation
        can be retried.
        """
        if isinstance(result, Failure):
            log.msg(
                "Attempt to find a created server in nova resulted in "
                "{failure}. Propagating the original create error instead.",
                failure=result)
            return propagated_f

        if result is None:
            raise _NoCreatedServerFound(propagated_f)

        return result

    def _check_server_created(f):
        """
        If creating a server failed with anything other than a 400, see if
        Nova created a server anyway (a 400 means that the server creation args
        were bad, and there is no point in retrying).

        If Nova created a server, just return it and pretend that the error
        never happened.  If it didn't, or if checking resulted in another
        failure response, return a failure of some type.
        """
        f.trap(APIError)
        if f.value.code == 400:
            return f

        d = deferLater(clock,
                       create_failure_delay,
                       find_server,
                       server_endpoint,
                       auth_token,
                       server_config,
                       log=log)
        d.addBoth(_check_results, f)
        return d

    def _create_with_delay(to_delay):
        d = _treq.post(path,
                       headers=headers(auth_token),
                       data=json.dumps({'server': server_config}),
                       log=log)
        if to_delay:
            # Add 1 second delay to space 1 second between server creations
            d.addCallback(delay, clock, 1)
        return d

    def _create_server():
        """
        Attempt to create a server, handling spurious non-400 errors from Nova
        by seeing if Nova created a server anyway in spite of the error.  If so
        then create server succeeded.

        If not, and if no further errors occur, server creation can be retried.
        """
        sem = get_sempahore("create_server", "worker.create_server_limit")
        if sem is not None:
            d = sem.run(_create_with_delay, True)
        else:
            d = _create_with_delay(False)
        d.addCallback(check_success, [202], _treq=_treq)
        d.addCallback(_treq.json_content)
        d.addErrback(_check_server_created)
        return d

    def _unwrap_NoCreatedServerFound(f):
        """
        The original failure was wrapped in a :class:`_NoCreatedServerFound`
        for ease of retry, but that should not be the final error propagated up
        by :func:`create_server`.

        This errback unwraps the :class:`_NoCreatedServerFound` error and
        returns the original failure.
        """
        f.trap(_NoCreatedServerFound)
        return f.value.original

    d = retry(_create_server,
              can_retry=compose_retries(
                  retry_times(retries),
                  terminal_errors_except(_NoCreatedServerFound)),
              next_interval=repeating_interval(15),
              clock=clock)

    d.addErrback(_unwrap_NoCreatedServerFound)
    d.addErrback(wrap_request_error, path, 'server_create')

    return d
Esempio n. 41
0
def launch_server(log, region, scaling_group, service_catalog, auth_token,
                  launch_config, undo, clock=None):
    """
    Launch a new server given the launch config auth tokens and service catalog.
    Possibly adding the newly launched server to a load balancer.

    :param BoundLog log: A bound logger.
    :param str region: A rackspace region as found in the service catalog.
    :param IScalingGroup scaling_group: The scaling group to add the launched
        server to.
    :param list service_catalog: A list of services as returned by the auth apis.
    :param str auth_token: The user's auth token.
    :param dict launch_config: A launch_config args structure as defined for
        the launch_server_v1 type.
    :param IUndoStack undo: The stack that will be rewound if undo fails.

    :return: Deferred that fires with a 2-tuple of server details and the
        list of load balancer responses from add_to_load_balancers.
    """
    launch_config = prepare_launch_config(scaling_group.uuid, launch_config)

    lb_region = config_value('regionOverrides.cloudLoadBalancers') or region
    cloudLoadBalancers = config_value('cloudLoadBalancers')
    cloudServersOpenStack = config_value('cloudServersOpenStack')

    lb_endpoint = public_endpoint_url(service_catalog,
                                      cloudLoadBalancers,
                                      lb_region)

    server_endpoint = public_endpoint_url(service_catalog,
                                          cloudServersOpenStack,
                                          region)

    lb_config = launch_config.get('loadBalancers', [])

    server_config = launch_config['server']

    log = log.bind(server_name=server_config['name'])
    ilog = [None]

    def wait_for_server(server):
        server_id = server['server']['id']

        # NOTE: If server create is retried, each server delete will be pushed
        # to undo stack even after it will be deleted in check_error which is fine
        # since verified_delete succeeds on deleted server
        undo.push(
            verified_delete, log, server_endpoint, auth_token, server_id)

        ilog[0] = log.bind(server_id=server_id)
        return wait_for_active(
            ilog[0],
            server_endpoint,
            auth_token,
            server_id)

    def add_lb(server):
        ip_address = private_ip_addresses(server)[0]
        lbd = add_to_load_balancers(
            ilog[0], lb_endpoint, auth_token, lb_config, ip_address, undo)
        lbd.addCallback(lambda lb_response: (server, lb_response))
        return lbd

    def _create_server():
        d = create_server(server_endpoint, auth_token, server_config, log=log)
        d.addCallback(wait_for_server)
        d.addCallback(add_lb)
        return d

    def check_error(f):
        f.trap(UnexpectedServerStatus)
        if f.value.status == 'ERROR':
            log.msg('{server_id} errored, deleting and creating new server instead',
                    server_id=f.value.server_id)
            # trigger server delete and return True to allow retry
            verified_delete(log, server_endpoint, auth_token, f.value.server_id)
            return True
        else:
            return False

    d = retry(_create_server, can_retry=compose_retries(retry_times(3), check_error),
              next_interval=repeating_interval(15), clock=clock)

    return d
Esempio n. 42
0
def launch_server(log, request_bag, scaling_group, launch_config, undo,
                  clock=None):
    """
    Launch a new server given the launch config auth tokens and service
    catalog. Possibly adding the newly launched server to a load balancer.

    :param BoundLog log: A bound logger.
    :param request_bag: An object with a bunch of useful data on it, including
        a callable to re-auth and get a new token.
    :param IScalingGroup scaling_group: The scaling group to add the launched
        server to.
    :param dict launch_config: A launch_config args structure as defined for
        the launch_server_v1 type.
    :param IUndoStack undo: The stack that will be rewound if undo fails.

    :return: Deferred that fires with a 2-tuple of server details and the
        list of load balancer responses from add_to_load_balancers.
    """
    launch_config = prepare_launch_config(scaling_group.uuid, launch_config)

    cloudServersOpenStack = config_value('cloudServersOpenStack')
    server_endpoint = public_endpoint_url(request_bag.service_catalog,
                                          cloudServersOpenStack,
                                          request_bag.region)

    lb_config = launch_config.get('loadBalancers', [])
    server_config = launch_config['server']

    log = log.bind(server_name=server_config['name'])
    ilog = [None]

    def check_metadata(server):
        # sanity check to make sure the metadata didn't change - can probably
        # be removed after a while if we do not see any log messages from this
        # function
        expected = launch_config['server']['metadata']
        result = server['server'].get('metadata')
        if result != expected:
            ilog[0].msg('Server metadata has changed.',
                        sanity_check=True,
                        expected_metadata=expected,
                        nova_metadata=result)
        return server

    def wait_for_server(server, new_request_bag):
        server_id = server['server']['id']

        # NOTE: If server create is retried, each server delete will be pushed
        # to undo stack even after it will be deleted in check_error which is
        # fine since verified_delete succeeds on deleted server
        undo.push(
            verified_delete, log, server_endpoint, new_request_bag, server_id)

        ilog[0] = log.bind(server_id=server_id)
        return wait_for_active(
            ilog[0],
            server_endpoint,
            new_request_bag.auth_token,
            server_id).addCallback(check_metadata)

    def add_lb(server, new_request_bag):
        if lb_config:
            lbd = add_to_load_balancers(
                ilog[0], new_request_bag, lb_config, server, undo)
            lbd.addCallback(lambda lb_response: (server, lb_response))
            return lbd

        return (server, [])

    def _real_create_server(new_request_bag):
        auth_token = new_request_bag.auth_token
        d = create_server(server_endpoint, auth_token, server_config, log=log)
        d.addCallback(wait_for_server, new_request_bag)
        d.addCallback(add_lb, new_request_bag)
        return d

    def _create_server():
        return request_bag.re_auth().addCallback(_real_create_server)

    def check_error(f):
        f.trap(UnexpectedServerStatus)
        if f.value.status == 'ERROR':
            log.msg('{server_id} errored, deleting and creating new '
                    'server instead', server_id=f.value.server_id)
            # trigger server delete and return True to allow retry
            verified_delete(log, server_endpoint, request_bag,
                            f.value.server_id)
            return True
        else:
            return False

    d = retry(_create_server,
              can_retry=compose_retries(retry_times(3), check_error),
              next_interval=repeating_interval(15), clock=clock)

    return d
Esempio n. 43
0
def create_server(server_endpoint, auth_token, server_config, log=None,
                  clock=None, retries=3, create_failure_delay=5, _treq=None):
    """
    Create a new server.  If there is an error from Nova from this call,
    checks to see if the server was created anyway.  If not, will retry the
    create ``retries`` times (checking each time if a server).

    If the error from Nova is a 400, does not retry, because that implies that
    retrying will just result in another 400 (bad args).

    If checking to see if the server is created also results in a failure,
    does not retry because there might just be something wrong with Nova.

    :param str server_endpoint: Server endpoint URI.
    :param str auth_token: Keystone Auth Token.
    :param dict server_config: Nova server config.
    :param: int retries: Number of tries to retry the create.
    :param: int create_failure_delay: how much time in seconds to wait after
        a create server failure before checking Nova to see if a server
        was created

    :param log: logger
    :type log: :class:`otter.log.bound.BoundLog`

    :param _treq: To be used for testing - what treq object to use
    :type treq: something with the same api as :obj:`treq`

    :return: Deferred that fires with the CreateServer response as a dict.
    """
    path = append_segments(server_endpoint, 'servers')

    if _treq is None:  # pragma: no cover
        _treq = treq
    if clock is None:  # pragma: no cover
        from twisted.internet import reactor
        clock = reactor

    def _check_results(result, propagated_f):
        """
        Return the original failure, if checking a server resulted in a
        failure too.  Returns a wrapped propagated failure, if there were no
        servers created, so that the retry utility knows that server creation
        can be retried.
        """
        if isinstance(result, Failure):
            log.msg("Attempt to find a created server in nova resulted in "
                    "{failure}. Propagating the original create error instead.",
                    failure=result)
            return propagated_f

        if result is None:
            raise _NoCreatedServerFound(propagated_f)

        return result

    def _check_server_created(f):
        """
        If creating a server failed with anything other than a 400, see if
        Nova created a server anyway (a 400 means that the server creation args
        were bad, and there is no point in retrying).

        If Nova created a server, just return it and pretend that the error
        never happened.  If it didn't, or if checking resulted in another
        failure response, return a failure of some type.
        """
        f.trap(APIError)
        if f.value.code == 400:
            return f

        d = deferLater(clock, create_failure_delay, find_server,
                       server_endpoint, auth_token, server_config, log=log)
        d.addBoth(_check_results, f)
        return d

    def _create_with_delay(to_delay):
        d = _treq.post(path, headers=headers(auth_token),
                       data=json.dumps({'server': server_config}), log=log)
        if to_delay:
            # Add 1 second delay to space 1 second between server creations
            d.addCallback(delay, clock, 1)
        return d

    def _create_server():
        """
        Attempt to create a server, handling spurious non-400 errors from Nova
        by seeing if Nova created a server anyway in spite of the error.  If so
        then create server succeeded.

        If not, and if no further errors occur, server creation can be retried.
        """
        sem = get_sempahore("create_server", "worker.create_server_limit")
        if sem is not None:
            d = sem.run(_create_with_delay, True)
        else:
            d = _create_with_delay(False)
        d.addCallback(check_success, [202], _treq=_treq)
        d.addCallback(_treq.json_content)
        d.addErrback(_check_server_created)
        return d

    def _unwrap_NoCreatedServerFound(f):
        """
        The original failure was wrapped in a :class:`_NoCreatedServerFound`
        for ease of retry, but that should not be the final error propagated up
        by :func:`create_server`.

        This errback unwraps the :class:`_NoCreatedServerFound` error and
        returns the original failure.
        """
        f.trap(_NoCreatedServerFound)
        return f.value.original

    d = retry(
        _create_server,
        can_retry=compose_retries(
            retry_times(retries),
            terminal_errors_except(_NoCreatedServerFound)),
        next_interval=repeating_interval(15), clock=clock)

    d.addErrback(_unwrap_NoCreatedServerFound)
    d.addErrback(wrap_request_error, path, 'server_create')

    return d