Esempio n. 1
0
def node_feed_req(lb_id, node_id, response):
    """
    Return (intent, performer) sequence for getting clb node's feed that
    wrapped with retry intent.

    :param lb_id: Lodbalancer ID
    :param node_id: LB node ID
    :param response: The response returned when getting CLB node feed. It is
        either string containing feed or Exception object that will be raised
        when getting the feed

    :return: (intent, performer) tuple
    """
    if isinstance(response, Exception):

        def handler(i):
            raise response
    else:

        def handler(i):
            return response

    return (Retry(effect=mock.ANY,
                  should_retry=ShouldDelayAndRetry(
                      can_retry=retry_times(5),
                      next_interval=exponential_backoff_interval(2))),
            nested_sequence([(("gcnf", lb_id, node_id), handler)]))
Esempio n. 2
0
def lb_req(url, json_response, response):
    """
    Return a SequenceDispatcher two-tuple that matches a service request to a
    particular load balancer endpoint (using GET), and returns the given
    ``response`` as the content in an HTTP 200 ``StubResponse``.
    """
    if isinstance(response, Exception):
        def handler(i): raise response
        log_seq = []
    else:
        def handler(i): return (StubResponse(200, {}), response)
        log_seq = [(Log(mock.ANY, mock.ANY), lambda i: None)]
    return (
        Retry(
            effect=mock.ANY,
            should_retry=ShouldDelayAndRetry(
                can_retry=retry_times(5),
                next_interval=exponential_backoff_interval(2))
        ),
        nested_sequence([
            (service_request(
                ServiceType.CLOUD_LOAD_BALANCERS,
                'GET', url, json_response=json_response).intent,
             handler)
        ] + log_seq)
    )
Esempio n. 3
0
def node_feed_req(lb_id, node_id, response):
    """
    Return (intent, performer) sequence for getting clb node's feed that
    wrapped with retry intent.

    :param lb_id: Lodbalancer ID
    :param node_id: LB node ID
    :param response: The response returned when getting CLB node feed. It is
        either string containing feed or Exception object that will be raised
        when getting the feed

    :return: (intent, performer) tuple
    """
    if isinstance(response, Exception):
        def handler(i): raise response
    else:
        def handler(i): return response
    return (
        Retry(
            effect=mock.ANY,
            should_retry=ShouldDelayAndRetry(
                can_retry=retry_times(5),
                next_interval=exponential_backoff_interval(2))
        ),
        nested_sequence([(("gcnf", lb_id, node_id), handler)])
    )
Esempio n. 4
0
def lb_req(url, json_response, response):
    """
    Return a SequenceDispatcher two-tuple that matches a service request to a
    particular load balancer endpoint (using GET), and returns the given
    ``response`` as the content in an HTTP 200 ``StubResponse``.
    """
    if isinstance(response, Exception):
        def handler(i): raise response
        log_seq = []
    else:
        def handler(i): return (StubResponse(200, {}), response)
        log_seq = [(Log(mock.ANY, mock.ANY), lambda i: None)]
    return (
        Retry(
            effect=mock.ANY,
            should_retry=ShouldDelayAndRetry(
                can_retry=retry_times(5),
                next_interval=exponential_backoff_interval(2))
        ),
        nested_sequence([
            (service_request(
                ServiceType.CLOUD_LOAD_BALANCERS,
                'GET', url, json_response=json_response).intent,
             handler)
        ] + log_seq)
    )
Esempio n. 5
0
 def test_retry_times(self):
     """
     `retry_times` returns function that will retry given number of times
     """
     can_retry = retry_times(3)
     for exception in (DummyException(), NotImplementedError(), ValueError()):
         self.assertTrue(can_retry(Failure(exception)))
     self.assertFalse(can_retry(Failure(DummyException())))
Esempio n. 6
0
 def test_retry_times(self):
     """
     `retry_times` returns function that will retry given number of times
     """
     can_retry = retry_times(3)
     for exception in (DummyException(), NotImplementedError(),
                       ValueError()):
         self.assertTrue(can_retry(Failure(exception)))
     self.assertFalse(can_retry(Failure(DummyException())))
Esempio n. 7
0
 def authenticate_tenant(self, tenant_id, log=None):
     """
     see :meth:`IAuthenticator.authenticate_tenant`
     """
     return retry(
         partial(self._authenticator.authenticate_tenant, tenant_id, log=log),
         can_retry=retry_times(self._max_retries),
         next_interval=repeating_interval(self._retry_interval),
         clock=self._reactor)
Esempio n. 8
0
def convergence_remove_server_from_group(log, transaction_id, server_id,
                                         replace, purge, group, state):
    """
    Remove a specific server from the group, optionally decrementing the
    desired capacity.

    The server may just be scheduled for deletion, or it may be evicted from
    the group by removing otter-specific metdata from the server.

    :param log: A bound logger
    :param bytes trans_id: The transaction id for this operation.
    :param bytes server_id: The id of the server to be removed.
    :param bool replace: Should the server be replaced?
    :param bool purge: Should the server be deleted from Nova?
    :param group: The scaling group to remove a server from.
    :type group: :class:`~otter.models.interface.IScalingGroup`
    :param state: The current state of the group.
    :type state: :class:`~otter.models.interface.GroupState`

    :return: The updated state.
    :rtype: Effect of :class:`~otter.models.interface.GroupState`

    :raise: :class:`CannotDeleteServerBelowMinError` if the server cannot
        be deleted without replacement, and :class:`ServerNotFoundError` if
        there is no such server to be deleted.
    """
    effects = [_is_server_in_group(group, server_id)]
    if not replace:
        effects.append(_can_scale_down(group, server_id))

    # the (possibly) two checks can happen in parallel, but we want
    # ServerNotFoundError to take precedence over
    # CannotDeleteServerBelowMinError
    both_checks = yield parallel_all_errors(effects)
    for is_error, result in both_checks:
        if is_error:
            reraise(*result)

    # Remove the server
    if purge:
        eff = set_nova_metadata_item(server_id, *DRAINING_METADATA)
    else:
        eff = Effect(
            EvictServerFromScalingGroup(log=log,
                                        transaction_id=transaction_id,
                                        scaling_group=group,
                                        server_id=server_id))
    yield Effect(
        TenantScope(
            retry_effect(eff, retry_times(3), exponential_backoff_interval(2)),
            group.tenant_id))

    if not replace:
        yield do_return(assoc_obj(state, desired=state.desired - 1))
    else:
        yield do_return(state)
Esempio n. 9
0
def remove_from_load_balancer(log, endpoint, auth_token, loadbalancer_id,
                              node_id, clock=None):
    """
    Remove a node from a load balancer.

    :param str endpoint: Load balancer endpoint URI.
    :param str auth_token: Keystone Auth Token.
    :param str loadbalancer_id: The ID for a cloud loadbalancer.
    :param str node_id: The ID for a node in that cloudloadbalancer.

    :returns: A Deferred that fires with None if the operation completed successfully,
        or errbacks with an RequestError.
    """
    lb_log = log.bind(loadbalancer_id=loadbalancer_id, node_id=node_id)
    # TODO: Will remove this once LB ERROR state is fixed and it is working fine
    lb_log.msg('Removing from load balancer')
    path = append_segments(endpoint, 'loadbalancers', str(loadbalancer_id), 'nodes', str(node_id))

    def check_422_deleted(failure):
        # A LB being deleted sometimes results in a 422.  This function
        # unfortunately has to parse the body of the message to see if this is an
        # acceptable 422 (if the LB has been deleted or the node has already been
        # removed, then 'removing from load balancer' as a task should be
        # successful - if the LB is in ERROR, then nothing more can be done to
        # it except resetting it - may as well remove the server.)
        failure.trap(APIError)
        error = failure.value
        if error.code == 422:
            message = json.loads(error.body)['message']
            if ('load balancer is deleted' not in message and
                    'PENDING_DELETE' not in message):
                return failure
            lb_log.msg(message)
        else:
            return failure

    def remove():
        d = treq.delete(path, headers=headers(auth_token), log=lb_log)

        # Success is 200/202.  An LB not being found is 404.  A node not being
        # found is a 404.  But a deleted LB sometimes results in a 422.
        d.addCallback(log_on_response_code, lb_log, 'Node to delete does not exist', 404)
        d.addCallback(check_success, [200, 202, 404])
        d.addCallback(treq.content)  # To avoid https://twistedmatrix.com/trac/ticket/6751
        d.addErrback(check_422_deleted)
        d.addErrback(log_lb_unexpected_errors, path, lb_log, 'remove_node')
        return d

    d = retry(
        remove,
        can_retry=retry_times(config_value('worker.lb_max_retries') or LB_MAX_RETRIES),
        next_interval=random_interval(
            *(config_value('worker.lb_retry_interval_range') or LB_RETRY_INTERVAL_RANGE)),
        clock=clock)
    d.addCallback(lambda _: lb_log.msg('Removed from load balancer'))
    return d
Esempio n. 10
0
 def authenticate_tenant(self, tenant_id, log=None):
     """
     see :meth:`IAuthenticator.authenticate_tenant`
     """
     return retry(partial(self._authenticator.authenticate_tenant,
                          tenant_id,
                          log=log),
                  can_retry=retry_times(self._max_retries),
                  next_interval=repeating_interval(self._retry_interval),
                  clock=self._reactor)
Esempio n. 11
0
def add_to_load_balancer(log, endpoint, auth_token, lb_config, ip_address, undo, clock=None):
    """
    Add an IP addressed to a load balancer based on the lb_config.

    TODO: Handle load balancer node metadata.

    :param log: A bound logger
    :param str endpoint: Load balancer endpoint URI.
    :param str auth_token: Keystone Auth Token.
    :param str lb_config: An lb_config dictionary.
    :param str ip_address: The IP Address of the node to add to the load
        balancer.
    :param IUndoStack undo: An IUndoStack to push any reversable operations onto.

    :return: Deferred that fires with the Add Node to load balancer response
        as a dict.
    """
    lb_id = lb_config['loadBalancerId']
    port = lb_config['port']
    path = append_segments(endpoint, 'loadbalancers', str(lb_id), 'nodes')
    lb_log = log.bind(loadbalancer_id=lb_id, ip_address=ip_address)

    def add():
        d = treq.post(path, headers=headers(auth_token),
                      data=json.dumps({"nodes": [{"address": ip_address,
                                                  "port": port,
                                                  "condition": "ENABLED",
                                                  "type": "PRIMARY"}]}),
                      log=lb_log)
        d.addCallback(check_success, [200, 202])
        d.addErrback(log_lb_unexpected_errors, lb_log, 'add_node')
        d.addErrback(wrap_request_error, path, 'add_node')
        d.addErrback(check_deleted_clb, lb_id)
        return d

    d = retry(
        add,
        can_retry=compose_retries(
            transient_errors_except(CLBOrNodeDeleted),
            retry_times(config_value('worker.lb_max_retries') or LB_MAX_RETRIES)),
        next_interval=random_interval(
            *(config_value('worker.lb_retry_interval_range') or LB_RETRY_INTERVAL_RANGE)),
        clock=clock)

    def when_done(result):
        lb_log.msg('Added to load balancer', node_id=result['nodes'][0]['id'])
        undo.push(remove_from_load_balancer,
                  lb_log,
                  endpoint,
                  auth_token,
                  lb_id,
                  result['nodes'][0]['id'])
        return result

    return d.addCallback(treq.json_content).addCallback(when_done)
Esempio n. 12
0
def convergence_remove_server_from_group(
        log, transaction_id, server_id, replace, purge, group, state):
    """
    Remove a specific server from the group, optionally decrementing the
    desired capacity.

    The server may just be scheduled for deletion, or it may be evicted from
    the group by removing otter-specific metdata from the server.

    :param log: A bound logger
    :param bytes trans_id: The transaction id for this operation.
    :param bytes server_id: The id of the server to be removed.
    :param bool replace: Should the server be replaced?
    :param bool purge: Should the server be deleted from Nova?
    :param group: The scaling group to remove a server from.
    :type group: :class:`~otter.models.interface.IScalingGroup`
    :param state: The current state of the group.
    :type state: :class:`~otter.models.interface.GroupState`

    :return: The updated state.
    :rtype: Effect of :class:`~otter.models.interface.GroupState`

    :raise: :class:`CannotDeleteServerBelowMinError` if the server cannot
        be deleted without replacement, and :class:`ServerNotFoundError` if
        there is no such server to be deleted.
    """
    effects = [_is_server_in_group(group, server_id)]
    if not replace:
        effects.append(_can_scale_down(group, server_id))

    # the (possibly) two checks can happen in parallel, but we want
    # ServerNotFoundError to take precedence over
    # CannotDeleteServerBelowMinError
    both_checks = yield parallel_all_errors(effects)
    for is_error, result in both_checks:
        if is_error:
            reraise(*result)

    # Remove the server
    if purge:
        eff = set_nova_metadata_item(server_id, *DRAINING_METADATA)
    else:
        eff = Effect(
            EvictServerFromScalingGroup(log=log,
                                        transaction_id=transaction_id,
                                        scaling_group=group,
                                        server_id=server_id))
    yield Effect(TenantScope(
        retry_effect(eff, retry_times(3), exponential_backoff_interval(2)),
        group.tenant_id))

    if not replace:
        yield do_return(assoc_obj(state, desired=state.desired - 1))
    else:
        yield do_return(state)
Esempio n. 13
0
    def as_effect(self):
        """Produce a :obj:`Effect` to delete a server."""

        eff = retry_effect(
            delete_and_verify(self.server_id), can_retry=retry_times(3),
            next_interval=exponential_backoff_interval(2))

        def report_success(result):
            return StepResult.RETRY, [
                ErrorReason.String(
                    'must re-gather after deletion in order to update the '
                    'active cache')]

        return eff.on(success=report_success)
Esempio n. 14
0
    def as_effect(self):
        """Produce a :obj:`Effect` to delete a server."""

        eff = retry_effect(
            delete_and_verify(self.server_id), can_retry=retry_times(3),
            next_interval=exponential_backoff_interval(2))

        def report_success(result):
            return StepResult.RETRY, [
                ErrorReason.String(
                    'must re-gather after deletion in order to update the '
                    'active cache')]

        return eff.on(success=report_success)
Esempio n. 15
0
def _remove_from_clb(log,
                     endpoint,
                     auth_token,
                     loadbalancer_id,
                     node_id,
                     clock=None):
    """
    Remove a node from a CLB load balancer.

    :param str endpoint: Load balancer endpoint URI.
    :param str auth_token: Keystone authentication token.
    :param str loadbalancer_id: The ID for a Cloud Load Balancer.
    :param str node_id: The ID for a node in that Cloud Load Balancer.

    :returns: A Deferred that fires with None if the operation completed successfully,
        or errbacks with an RequestError.
    """
    lb_log = log.bind(loadbalancer_id=loadbalancer_id, node_id=node_id)
    # TODO: Will remove this once LB ERROR state is fixed and it is working fine
    lb_log.msg('Removing from load balancer')
    path = append_segments(endpoint, 'loadbalancers', str(loadbalancer_id),
                           'nodes', str(node_id))

    def remove():
        d = treq.delete(path, headers=headers(auth_token), log=lb_log)
        d.addCallback(check_success, [200, 202])
        d.addCallback(treq.content
                      )  # To avoid https://twistedmatrix.com/trac/ticket/6751
        d.addErrback(log_lb_unexpected_errors, lb_log, 'remove_node')
        d.addErrback(wrap_request_error, path, 'remove_node')
        d.addErrback(check_deleted_clb, loadbalancer_id, node_id)
        return d

    d = retry(remove,
              can_retry=compose_retries(
                  transient_errors_except(CLBOrNodeDeleted),
                  retry_times(
                      config_value('worker.lb_max_retries')
                      or LB_MAX_RETRIES)),
              next_interval=random_interval(
                  *(config_value('worker.lb_retry_interval_range')
                    or LB_RETRY_INTERVAL_RANGE)),
              clock=clock)

    # A node or CLB deleted is considered successful removal
    d.addErrback(
        lambda f: f.trap(CLBOrNodeDeleted) and lb_log.msg(f.value.message))
    d.addCallback(lambda _: lb_log.msg('Removed from load balancer'))
    return d
Esempio n. 16
0
 def test_retry_sequence_fails_if_mismatch_sequence(self):
     """
     Fail if the wrong number of performers are given.
     """
     r = Retry(
         effect=Effect(1),
         should_retry=ShouldDelayAndRetry(
             can_retry=retry_times(5),
             next_interval=repeating_interval(10)))
     seq = [
         retry_sequence(r, [lambda _: raise_(Exception()),
                            lambda _: raise_(Exception())])
     ]
     self.assertRaises(AssertionError,
                       perform_sequence, seq, Effect(r))
Esempio n. 17
0
 def test_retry_sequence_retries_without_delays(self):
     """
     Perform the wrapped effect with the performers given,
     without any delay even if the original intent had a delay.
     """
     r = Retry(
         effect=Effect(1),
         should_retry=ShouldDelayAndRetry(
             can_retry=retry_times(5),
             next_interval=repeating_interval(10)))
     seq = [
         retry_sequence(r, [lambda _: raise_(Exception()),
                            lambda _: raise_(Exception()),
                            lambda _: "yay done"])
     ]
     self.assertEqual(perform_sequence(seq, Effect(r)), "yay done")
Esempio n. 18
0
def add_event(event, admin_tenant_id, region, log):
    """
    Add event to cloud feeds
    """
    event, error, timestamp, event_tenant_id, event_id = sanitize_event(event)
    req = prepare_request(request_format, event, error, timestamp, region,
                          event_tenant_id, event_id)

    eff = retry_effect(
        publish_to_cloudfeeds(req, log=log),
        compose_retries(
            lambda f: (not f.check(APIError) or
                       f.value.code < 400 or
                       f.value.code >= 500),
            retry_times(5)),
        exponential_backoff_interval(2))
    return Effect(TenantScope(tenant_id=admin_tenant_id, effect=eff))
Esempio n. 19
0
def add_event(event, admin_tenant_id, region, log):
    """
    Add event to cloud feeds
    """
    event, error, timestamp, event_tenant_id, event_id = sanitize_event(event)
    req = prepare_request(request_format, event, error, timestamp, region,
                          event_tenant_id, event_id)

    eff = retry_effect(
        publish_autoscale_event(req, log=log),
        compose_retries(
            lambda f: (not f.check(APIError) or
                       f.value.code < 400 or
                       f.value.code >= 500),
            retry_times(5)),
        exponential_backoff_interval(2))
    return Effect(TenantScope(tenant_id=admin_tenant_id, effect=eff))
Esempio n. 20
0
def verified_delete(log,
                    server_endpoint,
                    request_bag,
                    server_id,
                    exp_start=2,
                    max_retries=10,
                    clock=None):
    """
    Attempt to delete a server from the server endpoint, and ensure that it is
    deleted by trying again until deleting/getting the server results in a 404
    or until ``OS-EXT-STS:task_state`` in server details is 'deleting',
    indicating that Nova has acknowledged that the server is to be deleted
    as soon as possible.

    Time out attempting to verify deletes after a period of time and log an
    error.

    :param log: A bound logger.
    :param str server_endpoint: Server endpoint URI.
    :param str auth_token: Keystone Auth token.
    :param str server_id: Opaque nova server id.
    :param int exp_start: Exponential backoff interval start seconds. Default 2
    :param int max_retries: Maximum number of retry attempts

    :return: Deferred that fires when the expected status has been seen.
    """
    serv_log = log.bind(server_id=server_id)
    serv_log.msg('Deleting server')

    if clock is None:  # pragma: no cover
        from twisted.internet import reactor
        clock = reactor

    d = retry(
        partial(delete_and_verify, serv_log, server_endpoint, request_bag,
                server_id, clock),
        can_retry=retry_times(max_retries),
        next_interval=exponential_backoff_interval(exp_start),
        clock=clock)

    d.addCallback(log_with_time, clock, serv_log, clock.seconds(),
                  ('Server deleted successfully (or acknowledged by Nova as '
                   'to-be-deleted) : {time_delete} seconds.'), 'time_delete')
    return d
Esempio n. 21
0
def verified_delete(log,
                    server_endpoint,
                    request_bag,
                    server_id,
                    exp_start=2,
                    max_retries=10,
                    clock=None):
    """
    Attempt to delete a server from the server endpoint, and ensure that it is
    deleted by trying again until deleting/getting the server results in a 404
    or until ``OS-EXT-STS:task_state`` in server details is 'deleting',
    indicating that Nova has acknowledged that the server is to be deleted
    as soon as possible.

    Time out attempting to verify deletes after a period of time and log an
    error.

    :param log: A bound logger.
    :param str server_endpoint: Server endpoint URI.
    :param str auth_token: Keystone Auth token.
    :param str server_id: Opaque nova server id.
    :param int exp_start: Exponential backoff interval start seconds. Default 2
    :param int max_retries: Maximum number of retry attempts

    :return: Deferred that fires when the expected status has been seen.
    """
    serv_log = log.bind(server_id=server_id)
    serv_log.msg('Deleting server')

    if clock is None:  # pragma: no cover
        from twisted.internet import reactor
        clock = reactor

    d = retry(partial(delete_and_verify, serv_log, server_endpoint,
                      request_bag, server_id, clock),
              can_retry=retry_times(max_retries),
              next_interval=exponential_backoff_interval(exp_start),
              clock=clock)

    d.addCallback(log_with_time, clock, serv_log, clock.seconds(),
                  ('Server deleted successfully (or acknowledged by Nova as '
                   'to-be-deleted) : {time_delete} seconds.'), 'time_delete')
    return d
Esempio n. 22
0
def _is_server_in_group(group, server_id):
    """
    Given a group and server ID, determines if the server is a member of
    the group.  If it isn't, it raises a :class:`ServerNotFoundError`.
    """
    try:
        response, server_info = yield Effect(
            TenantScope(
                retry_effect(get_server_details(server_id), retry_times(3),
                             exponential_backoff_interval(2)),
                group.tenant_id))
    except NoSuchServerError:
        raise ServerNotFoundError(group.tenant_id, group.uuid, server_id)

    group_id = group_id_from_metadata(
        get_in(('server', 'metadata'), server_info, {}))

    if group_id != group.uuid:
        raise ServerNotFoundError(group.tenant_id, group.uuid, server_id)
Esempio n. 23
0
def _is_server_in_group(group, server_id):
    """
    Given a group and server ID, determines if the server is a member of
    the group.  If it isn't, it raises a :class:`ServerNotFoundError`.
    """
    try:
        response, server_info = yield Effect(
            TenantScope(
                retry_effect(get_server_details(server_id), retry_times(3), exponential_backoff_interval(2)),
                group.tenant_id,
            )
        )
    except NoSuchServerError:
        raise ServerNotFoundError(group.tenant_id, group.uuid, server_id)

    group_id = group_id_from_metadata(get_in(("server", "metadata"), server_info, {}))

    if group_id != group.uuid:
        raise ServerNotFoundError(group.tenant_id, group.uuid, server_id)
Esempio n. 24
0
    def test_delete_server(self, mock_dav):
        """
        :obj:`DeleteServer.as_effect` calls `delete_and_verify` with
        retries. It returns SUCCESS on completion and RETRY on failure
        """
        mock_dav.side_effect = lambda sid: Effect(sid)
        eff = DeleteServer(server_id='abc123').as_effect()
        self.assertIsInstance(eff.intent, Retry)
        self.assertEqual(
            eff.intent.should_retry,
            ShouldDelayAndRetry(can_retry=retry_times(3),
                                next_interval=exponential_backoff_interval(2)))
        self.assertEqual(eff.intent.effect.intent, 'abc123')

        self.assertEqual(
            resolve_effect(eff, (None, {})),
            (StepResult.RETRY,
             [ErrorReason.String('must re-gather after deletion in order to '
                                 'update the active cache')]))
Esempio n. 25
0
def remove_from_load_balancer(log, endpoint, auth_token, loadbalancer_id,
                              node_id, clock=None):
    """
    Remove a node from a load balancer.

    :param str endpoint: Load balancer endpoint URI.
    :param str auth_token: Keystone Auth Token.
    :param str loadbalancer_id: The ID for a cloud loadbalancer.
    :param str node_id: The ID for a node in that cloudloadbalancer.

    :returns: A Deferred that fires with None if the operation completed successfully,
        or errbacks with an RequestError.
    """
    lb_log = log.bind(loadbalancer_id=loadbalancer_id, node_id=node_id)
    # TODO: Will remove this once LB ERROR state is fixed and it is working fine
    lb_log.msg('Removing from load balancer')
    path = append_segments(endpoint, 'loadbalancers', str(loadbalancer_id), 'nodes', str(node_id))

    def remove():
        d = treq.delete(path, headers=headers(auth_token), log=lb_log)
        d.addCallback(check_success, [200, 202])
        d.addCallback(treq.content)  # To avoid https://twistedmatrix.com/trac/ticket/6751
        d.addErrback(log_lb_unexpected_errors, lb_log, 'remove_node')
        d.addErrback(wrap_request_error, path, 'remove_node')
        d.addErrback(check_deleted_clb, loadbalancer_id, node_id)
        return d

    d = retry(
        remove,
        can_retry=compose_retries(
            transient_errors_except(CLBOrNodeDeleted),
            retry_times(config_value('worker.lb_max_retries') or LB_MAX_RETRIES)),
        next_interval=random_interval(
            *(config_value('worker.lb_retry_interval_range') or LB_RETRY_INTERVAL_RANGE)),
        clock=clock)

    # A node or CLB deleted is considered successful removal
    d.addErrback(lambda f: f.trap(CLBOrNodeDeleted) and lb_log.msg(f.value.message))
    d.addCallback(lambda _: lb_log.msg('Removed from load balancer'))
    return d
Esempio n. 26
0
    def test_fallback(self):
        """
        Accept a ``fallback`` dispatcher that will be used if a performer
        returns an effect for an intent that is not covered by the base
        dispatcher.
        """
        def dispatch_2(intent):
            if intent == 2:
                return sync_performer(lambda d, i: "yay done")

        r = Retry(
            effect=Effect(1),
            should_retry=ShouldDelayAndRetry(
                can_retry=retry_times(5),
                next_interval=repeating_interval(10)))

        seq = [
            retry_sequence(r, [lambda _: Effect(2)],
                           fallback_dispatcher=ComposedDispatcher(
                               [dispatch_2, base_dispatcher]))
        ]
        self.assertEqual(perform_sequence(seq, Effect(r)), "yay done")
Esempio n. 27
0
def get_all_server_details(tenant_id, authenticator, service_name, region,
                           limit=100, clock=None, _treq=None):
    """
    Return all servers of a tenant
    TODO: service_name is possibly internal to this function but I don't want to pass config here?
    NOTE: This really screams to be a independent txcloud-type API
    """
    token, catalog = yield authenticator.authenticate_tenant(tenant_id, log=default_log)
    endpoint = public_endpoint_url(catalog, service_name, region)
    url = append_segments(endpoint, 'servers', 'detail')
    query = {'limit': limit}
    all_servers = []

    if clock is None:  # pragma: no cover
        from twisted.internet import reactor as clock

    if _treq is None:  # pragma: no cover
        _treq = treq

    def fetch(url, headers):
        d = _treq.get(url, headers=headers)
        d.addCallback(check_success, [200], _treq=_treq)
        d.addCallback(_treq.json_content)
        return d

    while True:
        # sort based on query name to make the tests predictable
        urlparams = sorted(query.items(), key=lambda e: e[0])
        d = retry(partial(fetch, '{}?{}'.format(url, urlencode(urlparams)), headers(token)),
                  can_retry=retry_times(5),
                  next_interval=exponential_backoff_interval(2), clock=clock)
        servers = (yield d)['servers']
        all_servers.extend(servers)
        if len(servers) < limit:
            break
        query.update({'marker': servers[-1]['id']})

    defer.returnValue(all_servers)
Esempio n. 28
0
def launch_server(log,
                  request_bag,
                  scaling_group,
                  launch_config,
                  undo,
                  clock=None):
    """
    Launch a new server given the launch config auth tokens and service
    catalog. Possibly adding the newly launched server to a load balancer.

    :param BoundLog log: A bound logger.
    :param request_bag: An object with a bunch of useful data on it, including
        a callable to re-auth and get a new token.
    :param IScalingGroup scaling_group: The scaling group to add the launched
        server to.
    :param dict launch_config: A launch_config args structure as defined for
        the launch_server_v1 type.
    :param IUndoStack undo: The stack that will be rewound if undo fails.

    :return: Deferred that fires with a 2-tuple of server details and the
        list of load balancer responses from add_to_load_balancers.
    """
    launch_config = prepare_launch_config(scaling_group.uuid, launch_config)

    cloudServersOpenStack = config_value('cloudServersOpenStack')
    server_endpoint = public_endpoint_url(request_bag.service_catalog,
                                          cloudServersOpenStack,
                                          request_bag.region)

    lb_config = launch_config.get('loadBalancers', [])
    server_config = launch_config['server']

    log = log.bind(server_name=server_config['name'])
    ilog = [None]

    def check_metadata(server):
        # sanity check to make sure the metadata didn't change - can probably
        # be removed after a while if we do not see any log messages from this
        # function
        expected = launch_config['server']['metadata']
        result = server['server'].get('metadata')
        if result != expected:
            ilog[0].msg('Server metadata has changed.',
                        sanity_check=True,
                        expected_metadata=expected,
                        nova_metadata=result)
        return server

    def wait_for_server(server, new_request_bag):
        server_id = server['server']['id']

        # NOTE: If server create is retried, each server delete will be pushed
        # to undo stack even after it will be deleted in check_error which is
        # fine since verified_delete succeeds on deleted server
        undo.push(verified_delete, log, server_endpoint, new_request_bag,
                  server_id)

        ilog[0] = log.bind(server_id=server_id)
        return wait_for_active(ilog[0], server_endpoint,
                               new_request_bag.auth_token,
                               server_id).addCallback(check_metadata)

    def add_lb(server, new_request_bag):
        if lb_config:
            lbd = add_to_load_balancers(ilog[0], new_request_bag, lb_config,
                                        server, undo)
            lbd.addCallback(lambda lb_response: (server, lb_response))
            return lbd

        return (server, [])

    def _real_create_server(new_request_bag):
        auth_token = new_request_bag.auth_token
        d = create_server(server_endpoint, auth_token, server_config, log=log)
        d.addCallback(wait_for_server, new_request_bag)
        d.addCallback(add_lb, new_request_bag)
        return d

    def _create_server():
        return request_bag.re_auth().addCallback(_real_create_server)

    def check_error(f):
        f.trap(UnexpectedServerStatus)
        if f.value.status == 'ERROR':
            log.msg(
                '{server_id} errored, deleting and creating new '
                'server instead',
                server_id=f.value.server_id)
            # trigger server delete and return True to allow retry
            verified_delete(log, server_endpoint, request_bag,
                            f.value.server_id)
            return True
        else:
            return False

    d = retry(_create_server,
              can_retry=compose_retries(retry_times(3), check_error),
              next_interval=repeating_interval(15),
              clock=clock)

    return d
Esempio n. 29
0
def add_to_clb(log,
               endpoint,
               auth_token,
               lb_config,
               ip_address,
               undo,
               clock=None):
    """
    Add an IP address to a Cloud Load Balancer based on the ``lb_config``.

    TODO: Handle load balancer node metadata.

    :param log: A bound logger
    :param str endpoint: Load balancer endpoint URI.
    :param str auth_token: Keystone auth token.
    :param dict lb_config: An ``lb_config`` dictionary.
    :param str ip_address: The IP address of the node to add to the load
        balancer.
    :param IUndoStack undo: An IUndoStack to push any reversable operations
        onto.

    :return: Deferred that fires with the load balancer response.
    """
    lb_id = lb_config['loadBalancerId']
    port = lb_config['port']
    path = append_segments(endpoint, 'loadbalancers', str(lb_id), 'nodes')
    lb_log = log.bind(loadbalancer_id=lb_id, ip_address=ip_address)

    def add():
        d = treq.post(path,
                      headers=headers(auth_token),
                      data=json.dumps({
                          "nodes": [{
                              "address": ip_address,
                              "port": port,
                              "condition": "ENABLED",
                              "type": "PRIMARY"
                          }]
                      }),
                      log=lb_log)
        d.addCallback(check_success, [200, 202])
        d.addErrback(log_lb_unexpected_errors, lb_log, 'add_node')
        d.addErrback(wrap_request_error, path, 'add_node')
        d.addErrback(check_deleted_clb, lb_id)
        return d

    d = retry(add,
              can_retry=compose_retries(
                  transient_errors_except(CLBOrNodeDeleted),
                  retry_times(
                      config_value('worker.lb_max_retries')
                      or LB_MAX_RETRIES)),
              next_interval=random_interval(
                  *(config_value('worker.lb_retry_interval_range')
                    or LB_RETRY_INTERVAL_RANGE)),
              clock=clock)

    def when_done(result):
        node_id = result['nodes'][0]['id']
        lb_log.msg('Added to load balancer', node_id=node_id)
        undo.push(_remove_from_clb, lb_log, endpoint, auth_token, lb_id,
                  node_id)
        return result

    return d.addCallback(treq.json_content).addCallback(when_done)
Esempio n. 30
0
def create_server(server_endpoint,
                  auth_token,
                  server_config,
                  log=None,
                  clock=None,
                  retries=3,
                  create_failure_delay=5,
                  _treq=None):
    """
    Create a new server.  If there is an error from Nova from this call,
    checks to see if the server was created anyway.  If not, will retry the
    create ``retries`` times (checking each time if a server).

    If the error from Nova is a 400, does not retry, because that implies that
    retrying will just result in another 400 (bad args).

    If checking to see if the server is created also results in a failure,
    does not retry because there might just be something wrong with Nova.

    :param str server_endpoint: Server endpoint URI.
    :param str auth_token: Keystone Auth Token.
    :param dict server_config: Nova server config.
    :param: int retries: Number of tries to retry the create.
    :param: int create_failure_delay: how much time in seconds to wait after
        a create server failure before checking Nova to see if a server
        was created

    :param log: logger
    :type log: :class:`otter.log.bound.BoundLog`

    :param _treq: To be used for testing - what treq object to use
    :type treq: something with the same api as :obj:`treq`

    :return: Deferred that fires with the CreateServer response as a dict.
    """
    path = append_segments(server_endpoint, 'servers')

    if _treq is None:  # pragma: no cover
        _treq = treq
    if clock is None:  # pragma: no cover
        from twisted.internet import reactor
        clock = reactor

    def _check_results(result, propagated_f):
        """
        Return the original failure, if checking a server resulted in a
        failure too.  Returns a wrapped propagated failure, if there were no
        servers created, so that the retry utility knows that server creation
        can be retried.
        """
        if isinstance(result, Failure):
            log.msg(
                "Attempt to find a created server in nova resulted in "
                "{failure}. Propagating the original create error instead.",
                failure=result)
            return propagated_f

        if result is None:
            raise _NoCreatedServerFound(propagated_f)

        return result

    def _check_server_created(f):
        """
        If creating a server failed with anything other than a 400, see if
        Nova created a server anyway (a 400 means that the server creation args
        were bad, and there is no point in retrying).

        If Nova created a server, just return it and pretend that the error
        never happened.  If it didn't, or if checking resulted in another
        failure response, return a failure of some type.
        """
        f.trap(APIError)
        if f.value.code == 400:
            return f

        d = deferLater(clock,
                       create_failure_delay,
                       find_server,
                       server_endpoint,
                       auth_token,
                       server_config,
                       log=log)
        d.addBoth(_check_results, f)
        return d

    def _create_with_delay(to_delay):
        d = _treq.post(path,
                       headers=headers(auth_token),
                       data=json.dumps({'server': server_config}),
                       log=log)
        if to_delay:
            # Add 1 second delay to space 1 second between server creations
            d.addCallback(delay, clock, 1)
        return d

    def _create_server():
        """
        Attempt to create a server, handling spurious non-400 errors from Nova
        by seeing if Nova created a server anyway in spite of the error.  If so
        then create server succeeded.

        If not, and if no further errors occur, server creation can be retried.
        """
        sem = get_sempahore("create_server", "worker.create_server_limit")
        if sem is not None:
            d = sem.run(_create_with_delay, True)
        else:
            d = _create_with_delay(False)
        d.addCallback(check_success, [202], _treq=_treq)
        d.addCallback(_treq.json_content)
        d.addErrback(_check_server_created)
        return d

    def _unwrap_NoCreatedServerFound(f):
        """
        The original failure was wrapped in a :class:`_NoCreatedServerFound`
        for ease of retry, but that should not be the final error propagated up
        by :func:`create_server`.

        This errback unwraps the :class:`_NoCreatedServerFound` error and
        returns the original failure.
        """
        f.trap(_NoCreatedServerFound)
        return f.value.original

    d = retry(_create_server,
              can_retry=compose_retries(
                  retry_times(retries),
                  terminal_errors_except(_NoCreatedServerFound)),
              next_interval=repeating_interval(15),
              clock=clock)

    d.addErrback(_unwrap_NoCreatedServerFound)
    d.addErrback(wrap_request_error, path, 'server_create')

    return d
Esempio n. 31
0
def launch_server(log, request_bag, scaling_group, launch_config, undo,
                  clock=None):
    """
    Launch a new server given the launch config auth tokens and service
    catalog. Possibly adding the newly launched server to a load balancer.

    :param BoundLog log: A bound logger.
    :param request_bag: An object with a bunch of useful data on it, including
        a callable to re-auth and get a new token.
    :param IScalingGroup scaling_group: The scaling group to add the launched
        server to.
    :param dict launch_config: A launch_config args structure as defined for
        the launch_server_v1 type.
    :param IUndoStack undo: The stack that will be rewound if undo fails.

    :return: Deferred that fires with a 2-tuple of server details and the
        list of load balancer responses from add_to_load_balancers.
    """
    launch_config = prepare_launch_config(scaling_group.uuid, launch_config)

    cloudServersOpenStack = config_value('cloudServersOpenStack')
    server_endpoint = public_endpoint_url(request_bag.service_catalog,
                                          cloudServersOpenStack,
                                          request_bag.region)

    lb_config = launch_config.get('loadBalancers', [])
    server_config = launch_config['server']

    log = log.bind(server_name=server_config['name'])
    ilog = [None]

    def check_metadata(server):
        # sanity check to make sure the metadata didn't change - can probably
        # be removed after a while if we do not see any log messages from this
        # function
        expected = launch_config['server']['metadata']
        result = server['server'].get('metadata')
        if result != expected:
            ilog[0].msg('Server metadata has changed.',
                        sanity_check=True,
                        expected_metadata=expected,
                        nova_metadata=result)
        return server

    def wait_for_server(server, new_request_bag):
        server_id = server['server']['id']

        # NOTE: If server create is retried, each server delete will be pushed
        # to undo stack even after it will be deleted in check_error which is
        # fine since verified_delete succeeds on deleted server
        undo.push(
            verified_delete, log, server_endpoint, new_request_bag, server_id)

        ilog[0] = log.bind(server_id=server_id)
        return wait_for_active(
            ilog[0],
            server_endpoint,
            new_request_bag.auth_token,
            server_id).addCallback(check_metadata)

    def add_lb(server, new_request_bag):
        if lb_config:
            lbd = add_to_load_balancers(
                ilog[0], new_request_bag, lb_config, server, undo)
            lbd.addCallback(lambda lb_response: (server, lb_response))
            return lbd

        return (server, [])

    def _real_create_server(new_request_bag):
        auth_token = new_request_bag.auth_token
        d = create_server(server_endpoint, auth_token, server_config, log=log)
        d.addCallback(wait_for_server, new_request_bag)
        d.addCallback(add_lb, new_request_bag)
        return d

    def _create_server():
        return request_bag.re_auth().addCallback(_real_create_server)

    def check_error(f):
        f.trap(UnexpectedServerStatus)
        if f.value.status == 'ERROR':
            log.msg('{server_id} errored, deleting and creating new '
                    'server instead', server_id=f.value.server_id)
            # trigger server delete and return True to allow retry
            verified_delete(log, server_endpoint, request_bag,
                            f.value.server_id)
            return True
        else:
            return False

    d = retry(_create_server,
              can_retry=compose_retries(retry_times(3), check_error),
              next_interval=repeating_interval(15), clock=clock)

    return d
Esempio n. 32
0
def _retry(eff):
    """Retry an effect with a common policy."""
    return retry_effect(
        eff, retry_times(5), exponential_backoff_interval(2))
Esempio n. 33
0
def launch_server(log, region, scaling_group, service_catalog, auth_token,
                  launch_config, undo, clock=None):
    """
    Launch a new server given the launch config auth tokens and service catalog.
    Possibly adding the newly launched server to a load balancer.

    :param BoundLog log: A bound logger.
    :param str region: A rackspace region as found in the service catalog.
    :param IScalingGroup scaling_group: The scaling group to add the launched
        server to.
    :param list service_catalog: A list of services as returned by the auth apis.
    :param str auth_token: The user's auth token.
    :param dict launch_config: A launch_config args structure as defined for
        the launch_server_v1 type.
    :param IUndoStack undo: The stack that will be rewound if undo fails.

    :return: Deferred that fires with a 2-tuple of server details and the
        list of load balancer responses from add_to_load_balancers.
    """
    launch_config = prepare_launch_config(scaling_group.uuid, launch_config)

    lb_region = config_value('regionOverrides.cloudLoadBalancers') or region
    cloudLoadBalancers = config_value('cloudLoadBalancers')
    cloudServersOpenStack = config_value('cloudServersOpenStack')

    lb_endpoint = public_endpoint_url(service_catalog,
                                      cloudLoadBalancers,
                                      lb_region)

    server_endpoint = public_endpoint_url(service_catalog,
                                          cloudServersOpenStack,
                                          region)

    lb_config = launch_config.get('loadBalancers', [])

    server_config = launch_config['server']

    log = log.bind(server_name=server_config['name'])
    ilog = [None]

    def wait_for_server(server):
        server_id = server['server']['id']

        # NOTE: If server create is retried, each server delete will be pushed
        # to undo stack even after it will be deleted in check_error which is fine
        # since verified_delete succeeds on deleted server
        undo.push(
            verified_delete, log, server_endpoint, auth_token, server_id)

        ilog[0] = log.bind(server_id=server_id)
        return wait_for_active(
            ilog[0],
            server_endpoint,
            auth_token,
            server_id)

    def add_lb(server):
        ip_address = private_ip_addresses(server)[0]
        lbd = add_to_load_balancers(
            ilog[0], lb_endpoint, auth_token, lb_config, ip_address, undo)
        lbd.addCallback(lambda lb_response: (server, lb_response))
        return lbd

    def _create_server():
        d = create_server(server_endpoint, auth_token, server_config, log=log)
        d.addCallback(wait_for_server)
        d.addCallback(add_lb)
        return d

    def check_error(f):
        f.trap(UnexpectedServerStatus)
        if f.value.status == 'ERROR':
            log.msg('{server_id} errored, deleting and creating new server instead',
                    server_id=f.value.server_id)
            # trigger server delete and return True to allow retry
            verified_delete(log, server_endpoint, auth_token, f.value.server_id)
            return True
        else:
            return False

    d = retry(_create_server, can_retry=compose_retries(retry_times(3), check_error),
              next_interval=repeating_interval(15), clock=clock)

    return d
Esempio n. 34
0
def create_server(server_endpoint, auth_token, server_config, log=None,
                  clock=None, retries=3, create_failure_delay=5, _treq=None):
    """
    Create a new server.  If there is an error from Nova from this call,
    checks to see if the server was created anyway.  If not, will retry the
    create ``retries`` times (checking each time if a server).

    If the error from Nova is a 400, does not retry, because that implies that
    retrying will just result in another 400 (bad args).

    If checking to see if the server is created also results in a failure,
    does not retry because there might just be something wrong with Nova.

    :param str server_endpoint: Server endpoint URI.
    :param str auth_token: Keystone Auth Token.
    :param dict server_config: Nova server config.
    :param: int retries: Number of tries to retry the create.
    :param: int create_failure_delay: how much time in seconds to wait after
        a create server failure before checking Nova to see if a server
        was created

    :param log: logger
    :type log: :class:`otter.log.bound.BoundLog`

    :param _treq: To be used for testing - what treq object to use
    :type treq: something with the same api as :obj:`treq`

    :return: Deferred that fires with the CreateServer response as a dict.
    """
    path = append_segments(server_endpoint, 'servers')

    if _treq is None:  # pragma: no cover
        _treq = treq
    if clock is None:  # pragma: no cover
        from twisted.internet import reactor
        clock = reactor

    def _check_results(result, propagated_f):
        """
        Return the original failure, if checking a server resulted in a
        failure too.  Returns a wrapped propagated failure, if there were no
        servers created, so that the retry utility knows that server creation
        can be retried.
        """
        if isinstance(result, Failure):
            log.msg("Attempt to find a created server in nova resulted in "
                    "{failure}. Propagating the original create error instead.",
                    failure=result)
            return propagated_f

        if result is None:
            raise _NoCreatedServerFound(propagated_f)

        return result

    def _check_server_created(f):
        """
        If creating a server failed with anything other than a 400, see if
        Nova created a server anyway (a 400 means that the server creation args
        were bad, and there is no point in retrying).

        If Nova created a server, just return it and pretend that the error
        never happened.  If it didn't, or if checking resulted in another
        failure response, return a failure of some type.
        """
        f.trap(APIError)
        if f.value.code == 400:
            return f

        d = deferLater(clock, create_failure_delay, find_server,
                       server_endpoint, auth_token, server_config, log=log)
        d.addBoth(_check_results, f)
        return d

    def _create_with_delay(to_delay):
        d = _treq.post(path, headers=headers(auth_token),
                       data=json.dumps({'server': server_config}), log=log)
        if to_delay:
            # Add 1 second delay to space 1 second between server creations
            d.addCallback(delay, clock, 1)
        return d

    def _create_server():
        """
        Attempt to create a server, handling spurious non-400 errors from Nova
        by seeing if Nova created a server anyway in spite of the error.  If so
        then create server succeeded.

        If not, and if no further errors occur, server creation can be retried.
        """
        sem = get_sempahore("create_server", "worker.create_server_limit")
        if sem is not None:
            d = sem.run(_create_with_delay, True)
        else:
            d = _create_with_delay(False)
        d.addCallback(check_success, [202], _treq=_treq)
        d.addCallback(_treq.json_content)
        d.addErrback(_check_server_created)
        return d

    def _unwrap_NoCreatedServerFound(f):
        """
        The original failure was wrapped in a :class:`_NoCreatedServerFound`
        for ease of retry, but that should not be the final error propagated up
        by :func:`create_server`.

        This errback unwraps the :class:`_NoCreatedServerFound` error and
        returns the original failure.
        """
        f.trap(_NoCreatedServerFound)
        return f.value.original

    d = retry(
        _create_server,
        can_retry=compose_retries(
            retry_times(retries),
            terminal_errors_except(_NoCreatedServerFound)),
        next_interval=repeating_interval(15), clock=clock)

    d.addErrback(_unwrap_NoCreatedServerFound)
    d.addErrback(wrap_request_error, path, 'server_create')

    return d
Esempio n. 35
0
def _retry(eff):
    """Retry an effect with a common policy."""
    return retry_effect(eff, retry_times(5), exponential_backoff_interval(2))