Example #1
0
def collect_metrics(reactor, config, log, client=None, authenticator=None,
                    _print=False):
    """
    Start collecting the metrics

    :param reactor: Twisted reactor
    :param dict config: Configuration got from file containing all info
        needed to collect metrics
    :param :class:`silverberg.client.CQLClient` client:
        Optional cassandra client. A new client will be created
        if this is not given and disconnected before returing
    :param :class:`otter.auth.IAuthenticator` authenticator:
        Optional authenticator. A new authenticator will be created
        if this is not given
    :param bool _print: Should debug messages be printed to stdout?

    :return: :class:`Deferred` fired with ``list`` of `GroupMetrics`
    """
    _client = client or connect_cass_servers(reactor, config['cassandra'])
    authenticator = authenticator or generate_authenticator(reactor,
                                                            config['identity'])
    store = CassScalingGroupCollection(_client, reactor, 1000)
    dispatcher = get_dispatcher(reactor, authenticator, log,
                                get_service_configs(config), store)

    # calculate metrics on launch_server and non-paused groups
    groups = yield perform(dispatcher, Effect(GetAllValidGroups()))
    groups = [
        g for g in groups
        if json.loads(g["launch_config"]).get("type") == "launch_server" and
        (not g.get("paused", False))]
    tenanted_groups = groupby(lambda g: g["tenantId"], groups)
    group_metrics = yield get_all_metrics(
        dispatcher, tenanted_groups, log, _print=_print)

    # Add to cloud metrics
    metr_conf = config.get("metrics", None)
    if metr_conf is not None:
        eff = add_to_cloud_metrics(
            metr_conf['ttl'], config['region'], group_metrics,
            len(tenanted_groups), config, log, _print)
        eff = Effect(TenantScope(eff, metr_conf['tenant_id']))
        yield perform(dispatcher, eff)
        log.msg('added to cloud metrics')
        if _print:
            print('added to cloud metrics')
    if _print:
        group_metrics.sort(key=lambda g: abs(g.desired - g.actual),
                           reverse=True)
        print('groups sorted as per divergence')
        print('\n'.join(map(str, group_metrics)))

    # Disconnect only if we created the client
    if not client:
        yield _client.disconnect()

    defer.returnValue(group_metrics)
Example #2
0
def collect_metrics(reactor, config, log, client=None, authenticator=None,
                    _print=False):
    """
    Start collecting the metrics

    :param reactor: Twisted reactor
    :param dict config: Configuration got from file containing all info
        needed to collect metrics
    :param :class:`silverberg.client.CQLClient` client:
        Optional cassandra client. A new client will be created
        if this is not given and disconnected before returing
    :param :class:`otter.auth.IAuthenticator` authenticator:
        Optional authenticator. A new authenticator will be created
        if this is not given
    :param bool _print: Should debug messages be printed to stdout?

    :return: :class:`Deferred` fired with ``list`` of `GroupMetrics`
    """
    convergence_tids = config.get('convergence-tenants', [])
    _client = client or connect_cass_servers(reactor, config['cassandra'])
    authenticator = authenticator or generate_authenticator(reactor,
                                                            config['identity'])
    store = CassScalingGroupCollection(_client, reactor, 1000)
    dispatcher = get_dispatcher(reactor, authenticator, log,
                                get_service_configs(config), store)

    # calculate metrics
    fpath = get_in(["metrics", "last_tenant_fpath"], config,
                   default="last_tenant.txt")
    tenanted_groups = yield perform(
        dispatcher,
        get_todays_scaling_groups(convergence_tids, fpath))
    group_metrics = yield get_all_metrics(
        dispatcher, tenanted_groups, log, _print=_print)

    # Add to cloud metrics
    metr_conf = config.get("metrics", None)
    if metr_conf is not None:
        eff = add_to_cloud_metrics(
            metr_conf['ttl'], config['region'], group_metrics,
            len(tenanted_groups), log, _print)
        eff = Effect(TenantScope(eff, metr_conf['tenant_id']))
        yield perform(dispatcher, eff)
        log.msg('added to cloud metrics')
        if _print:
            print('added to cloud metrics')
    if _print:
        group_metrics.sort(key=lambda g: abs(g.desired - g.actual),
                           reverse=True)
        print('groups sorted as per divergence', *group_metrics, sep='\n')

    # Disconnect only if we created the client
    if not client:
        yield _client.disconnect()

    defer.returnValue(group_metrics)
Example #3
0
def perform_run_remotely(reactor, base_dispatcher, intent):
    connection_helper = get_connection_helper(
        reactor,
        username=intent.username, address=intent.address, port=intent.port)

    context = Message.new(
        username=intent.username, address=intent.address, port=intent.port)

    def connect():
        connection = connection_helper.secureConnection()
        connection.addErrback(write_failure)
        timeout(reactor, connection, 30)
        return connection

    connection = yield loop_until(reactor, connect)

    dispatcher = ComposedDispatcher([
        get_ssh_dispatcher(
            connection=connection,
            context=context,
        ),
        base_dispatcher,
    ])

    yield perform(dispatcher, intent.commands)

    yield connection_helper.cleanupConnection(
        connection, False)
Example #4
0
def webhook_migrate(reactor, conn, args):
    """
    Migrate webhook indexes to table
    """
    store = CassScalingGroupCollection(None, None, 3)
    eff = store.get_webhook_index_only().on(store.add_webhook_keys)
    return perform(get_working_cql_dispatcher(reactor, conn), eff)
Example #5
0
def modify_and_trigger(dispatcher, group, logargs, modifier, *args, **kwargs):
    """
    Modify group state and trigger convergence after that if the group is not
    suspended. Otherwise fail with :obj:`TenantSuspendedError`.

    :param IScalingGroup group: Scaling group whose state is getting modified
    :param log: Bound logger
    :param modifier: Callable as described in IScalingGroup.modify_state

    :return: Deferred with None if modification and convergence succeeded.
        Fails with :obj:`TenantSuspendedError` if group is suspended.
    """
    def modifier_wrapper(_group, state, *_args, **_kwargs):
        # Ideally this will not be allowed by repose middleware but
        # adding check for mimic based integration tests
        if state.suspended:
            raise TenantSuspendedError(_group.tenant_id)
        return modifier(_group, state, *_args, **_kwargs)

    cannot_exec_pol_err = None
    try:
        yield group.modify_state(modifier_wrapper, *args, **kwargs)
    except CannotExecutePolicyError as ce:
        cannot_exec_pol_err = ce
    if tenant_is_enabled(group.tenant_id, config_value):
        eff = Effect(
            BoundFields(
                trigger_convergence(group.tenant_id, group.uuid), logargs))
        yield perform(dispatcher, eff)
    if cannot_exec_pol_err is not None:
        raise cannot_exec_pol_err
Example #6
0
def modify_and_trigger(dispatcher, group, logargs, modifier, *args, **kwargs):
    """
    Modify group state and trigger convergence after that if the group is not
    suspended. Otherwise fail with :obj:`TenantSuspendedError`.

    :param IScalingGroup group: Scaling group whose state is getting modified
    :param log: Bound logger
    :param modifier: Callable as described in IScalingGroup.modify_state

    :return: Deferred with None if modification and convergence succeeded.
        Fails with :obj:`TenantSuspendedError` if group is suspended.
    """
    def modifier_wrapper(_group, state, *_args, **_kwargs):
        # Ideally this will not be allowed by repose middleware but
        # adding check for mimic based integration tests
        if state.suspended:
            raise TenantSuspendedError(_group.tenant_id)
        return modifier(_group, state, *_args, **_kwargs)

    cannot_exec_pol_err = None
    try:
        yield group.modify_state(modifier_wrapper, *args, **kwargs)
    except CannotExecutePolicyError as ce:
        cannot_exec_pol_err = ce
    if tenant_is_enabled(group.tenant_id, config_value):
        eff = Effect(
            BoundFields(trigger_convergence(group.tenant_id, group.uuid),
                        logargs))
        yield perform(dispatcher, eff)
    if cannot_exec_pol_err is not None:
        raise cannot_exec_pol_err
Example #7
0
def webhook_migrate(reactor, conn, args):
    """
    Migrate webhook indexes to table
    """
    store = CassScalingGroupCollection(None, None, 3)
    eff = store.get_webhook_index_only().on(store.add_webhook_keys)
    return perform(get_working_cql_dispatcher(reactor, conn), eff)
Example #8
0
def perform_run_remotely(reactor, base_dispatcher, intent):
    connection_helper = get_connection_helper(reactor,
                                              username=intent.username,
                                              address=intent.address,
                                              port=intent.port)

    context = Message.new(username=intent.username,
                          address=intent.address,
                          port=intent.port)

    def connect():
        connection = connection_helper.secureConnection()
        connection.addErrback(write_failure)
        timeout(reactor, connection, 30)
        return connection

    connection = yield loop_until(reactor, connect)

    dispatcher = ComposedDispatcher([
        get_ssh_dispatcher(
            connection=connection,
            context=context,
        ),
        base_dispatcher,
    ])

    yield perform(dispatcher, intent.commands)

    yield connection_helper.cleanupConnection(connection, False)
Example #9
0
def webhook_index(reactor, conn, args):
    """
    Show webhook indexes that is not there table connection
    """
    store = CassScalingGroupCollection(None, None, 3)
    eff = store.get_webhook_index_only()
    return perform(get_working_cql_dispatcher(reactor, conn), eff)
Example #10
0
def webhook_index(reactor, conn, args):
    """
    Show webhook indexes that is not there table connection
    """
    store = CassScalingGroupCollection(None, None, 3)
    eff = store.get_webhook_index_only()
    return perform(get_working_cql_dispatcher(reactor, conn), eff)
Example #11
0
 def acquire(self, blocking=True, timeout=None):
     """
     Same as :meth:`kazoo.recipe.lock.Lock.acquire` except that this can be
     called again on an object that has been released. It will start fresh
     process to acquire the lock.
     """
     return perform(self.dispatcher, self.acquire_eff(blocking, timeout))
Example #12
0
    def start_cluster(self, reactor):
        """
        Provision cloud cluster for acceptance tests.

        :return Cluster: The cluster to connect to for acceptance tests.
        """
        metadata = {
            'purpose': 'acceptance-testing',
            'distribution': self.distribution,
        }
        metadata.update(self.metadata)

        for index in range(self.num_nodes):
            name = "acceptance-test-%s-%d" % (self.creator, index)
            try:
                print "Creating node %d: %s" % (index, name)
                node = self.provisioner.create_node(
                    name=name,
                    distribution=self.distribution,
                    metadata=metadata,
                )
            except:
                print "Error creating node %d: %s" % (index, name)
                print "It may have leaked into the cloud."
                raise

            yield remove_known_host(reactor, node.address)
            self.nodes.append(node)
            del node

        commands = parallel([
            node.provision(package_source=self.package_source,
                           variants=self.variants)
            for node in self.nodes
        ])
        if self.dataset_backend == DatasetBackend.zfs:
            zfs_commands = parallel([
                configure_zfs(node, variants=self.variants)
                for node in self.nodes
            ])
            commands = commands.on(success=lambda _: zfs_commands)

        yield perform(make_dispatcher(reactor), commands)

        cluster = yield configured_cluster_for_nodes(
            reactor,
            generate_certificates(
                make_cluster_id(
                    TestTypes.ACCEPTANCE,
                    _provider_for_cluster_id(self.dataset_backend),
                ),
                self.nodes),
            self.nodes,
            self.dataset_backend,
            self.dataset_backend_configuration,
            _save_backend_configuration(self.dataset_backend,
                                        self.dataset_backend_configuration)
        )

        returnValue(cluster)
Example #13
0
 def test_log_none_effectful_fields(self):
     """
     When log is not passed, but there are log fields from BoundFields,
     the log passed to treq has those fields.
     """
     log = mock_log()
     # we have to include system='otter' in the expected log here because
     # the code falls back to otter.log.log, which has the system key bound.
     expected_log = matches(IsBoundWith(bound='stuff', system='otter'))
     req = ('GET', 'http://google.com/', None, None, None, {
         'log': expected_log
     })
     response = StubResponse(200, {})
     treq = StubTreq(reqs=[(req, response)],
                     contents=[(response, "content")])
     req = Request(method="get", url="http://google.com/")
     req.treq = treq
     req_eff = Effect(req)
     bound_log_eff = with_log(req_eff, bound='stuff')
     dispatcher = ComposedDispatcher(
         [get_simple_dispatcher(None),
          get_log_dispatcher(log, {})])
     self.assertEqual(
         self.successResultOf(perform(dispatcher, bound_log_eff)),
         (response, "content"))
Example #14
0
    def _upgrade_flocker(self, reactor, nodes, package_source):
        """
        Put the version of Flocker indicated by ``package_source`` onto all of
        the given nodes.

        This takes a primitive approach of uninstalling the software and then
        installing the new version instead of trying to take advantage of any
        OS-level package upgrade support.  Because it's easier.  The package
        removal step is allowed to fail in case the package is not installed
        yet (other failures are not differentiated).  The only action taken on
        failure is that the failure is logged.

        :param pvector nodes: The ``ManagedNode``\ s on which to upgrade the
            software.
        :param PackageSource package_source: The version of the software to
            which to upgrade.

        :return: A ``Deferred`` that fires when the software has been upgraded.
        """
        dispatcher = make_dispatcher(reactor)

        uninstalling = perform(dispatcher, uninstall_flocker(nodes))
        uninstalling.addErrback(write_failure, logger=None)

        def install(ignored):
            return perform(
                dispatcher,
                install_flocker(nodes, package_source),
            )
        installing = uninstalling.addCallback(install)
        return installing
Example #15
0
 def acquire(self, blocking=True, timeout=None):
     """
     Same as :meth:`kazoo.recipe.lock.Lock.acquire` except that this can be
     called again on an object that has been released. It will start fresh
     process to acquire the lock.
     """
     return perform(self.dispatcher, self.acquire_eff(blocking, timeout))
Example #16
0
def configured_cluster_for_nodes(
    reactor, certificates, nodes, dataset_backend,
    dataset_backend_configuration, dataset_backend_config_file,
    provider=None
):
    """
    Get a ``Cluster`` with Flocker services running on the right nodes.

    :param reactor: The reactor.
    :param Certificates certificates: The certificates to install on the
        cluster.
    :param nodes: The ``ManagedNode``s on which to operate.
    :param NamedConstant dataset_backend: The ``DatasetBackend`` constant
        representing the dataset backend that the nodes will be configured to
        use when they are "started".
    :param dict dataset_backend_configuration: The backend-specific
        configuration the nodes will be given for their dataset backend.
    :param FilePath dataset_backend_config_file: A FilePath that has the
        dataset_backend info stored.

    :returns: A ``Deferred`` which fires with ``Cluster`` when it is
        configured.
    """
    # XXX: There is duplication between the values here and those in
    # f.node.agents.test.blockdevicefactory.MINIMUM_ALLOCATABLE_SIZES. We want
    # the default volume size to be greater than or equal to the minimum
    # allocatable size.
    #
    # Ideally, the minimum allocatable size (and perhaps the default volume
    # size) would be something known by an object that represents the dataset
    # backend. Unfortunately:
    #  1. There is no such object
    #  2. There is existing confusion in the code around 'openstack' and
    #     'rackspace'
    #
    # Here, we special-case Rackspace (presumably) because it has a minimum
    # allocatable size that is different from other Openstack backends.
    #
    # FLOC-2584 also discusses this.
    default_volume_size = GiB(1)
    if dataset_backend_configuration.get('auth_plugin') == 'rackspace':
        default_volume_size = RACKSPACE_MINIMUM_VOLUME_SIZE

    cluster = Cluster(
        all_nodes=pvector(nodes),
        control_node=nodes[0],
        agent_nodes=nodes,
        dataset_backend=dataset_backend,
        default_volume_size=int(default_volume_size.to_Byte().value),
        certificates=certificates,
        dataset_backend_config_file=dataset_backend_config_file
    )

    configuring = perform(
        make_dispatcher(reactor),
        configure_cluster(cluster, dataset_backend_configuration, provider)
    )
    configuring.addCallback(lambda ignored: cluster)
    return configuring
Example #17
0
def configured_cluster_for_nodes(
    reactor, certificates, nodes, dataset_backend,
    dataset_backend_configuration, dataset_backend_config_file,
    provider=None
):
    """
    Get a ``Cluster`` with Flocker services running on the right nodes.

    :param reactor: The reactor.
    :param Certificates certificates: The certificates to install on the
        cluster.
    :param nodes: The ``ManagedNode``s on which to operate.
    :param NamedConstant dataset_backend: The ``DatasetBackend`` constant
        representing the dataset backend that the nodes will be configured to
        use when they are "started".
    :param dict dataset_backend_configuration: The backend-specific
        configuration the nodes will be given for their dataset backend.
    :param FilePath dataset_backend_config_file: A FilePath that has the
        dataset_backend info stored.

    :returns: A ``Deferred`` which fires with ``Cluster`` when it is
        configured.
    """
    # XXX: There is duplication between the values here and those in
    # f.node.agents.test.blockdevicefactory.MINIMUM_ALLOCATABLE_SIZES. We want
    # the default volume size to be greater than or equal to the minimum
    # allocatable size.
    #
    # Ideally, the minimum allocatable size (and perhaps the default volume
    # size) would be something known by an object that represents the dataset
    # backend. Unfortunately:
    #  1. There is no such object
    #  2. There is existing confusion in the code around 'openstack' and
    #     'rackspace'
    #
    # Here, we special-case Rackspace (presumably) because it has a minimum
    # allocatable size that is different from other Openstack backends.
    #
    # FLOC-2584 also discusses this.
    default_volume_size = GiB(1)
    if dataset_backend_configuration.get('auth_plugin') == 'rackspace':
        default_volume_size = RACKSPACE_MINIMUM_VOLUME_SIZE

    cluster = Cluster(
        all_nodes=pvector(nodes),
        control_node=nodes[0],
        agent_nodes=nodes,
        dataset_backend=dataset_backend,
        default_volume_size=int(default_volume_size.to_Byte().value),
        certificates=certificates,
        dataset_backend_config_file=dataset_backend_config_file
    )

    configuring = perform(
        make_dispatcher(reactor),
        configure_cluster(cluster, dataset_backend_configuration, provider)
    )
    configuring.addCallback(lambda ignored: cluster)
    return configuring
Example #18
0
    def is_acquired(self):
        """
        Is the lock already acquired? This method does not exist in kazoo
        lock recipe and is a nice addition to it.

        :return: :obj:`Deferred` of ``bool``
        """
        return perform(self.dispatcher, self.is_acquired_eff())
Example #19
0
    def is_acquired(self):
        """
        Is the lock already acquired? This method does not exist in kazoo
        lock recipe and is a nice addition to it.

        :return: :obj:`Deferred` of ``bool``
        """
        return perform(self.dispatcher, self.is_acquired_eff())
Example #20
0
def get_active_cache(reactor, connection, tenant_id, group_id):
    """
    Get active servers from servers cache table
    """
    eff = CassScalingGroupServersCache(tenant_id, group_id).get_servers(True)
    disp = get_working_cql_dispatcher(reactor, connection)
    d = perform(disp, eff)
    return d.addCallback(lambda (servers, _): {s['id']: s for s in servers})
Example #21
0
def get_active_cache(reactor, connection, tenant_id, group_id):
    """
    Get active servers from servers cache table
    """
    eff = CassScalingGroupServersCache(tenant_id, group_id).get_servers(True)
    disp = get_working_cql_dispatcher(reactor, connection)
    d = perform(disp, eff)
    return d.addCallback(lambda (servers, _): {s['id']: s for s in servers})
Example #22
0
def groups_steps(groups, reactor, store, cass_client, authenticator, conf):
    """
    Return [(group, steps)] list
    """
    eff = parallel(map(group_steps, groups))
    disp = get_full_dispatcher(
        reactor, authenticator, mock_log(), get_service_configs(conf),
        "kzclient", store, "supervisor", cass_client)
    return perform(disp, eff).addCallback(lambda steps: zip(groups, steps))
Example #23
0
def groups_steps(groups, reactor, store, cass_client, authenticator, conf):
    """
    Return [(group, steps)] list
    """
    eff = parallel(map(group_steps, groups))
    disp = get_full_dispatcher(reactor, authenticator, mock_log(),
                               get_service_configs(conf), "kzclient", store,
                               "supervisor", cass_client)
    return perform(disp, eff).addCallback(lambda steps: zip(groups, steps))
Example #24
0
    def check_and_call():
        class DoFunc(object):
            pass

        @deferred_performer
        def func_performer(d, i):
            return maybeDeferred(func, *args, **kwargs)

        comp_dispatcher = ComposedDispatcher([TypeDispatcher({DoFunc: func_performer}), dispatcher])
        return perform(comp_dispatcher, call_if_acquired(lock, Effect(DoFunc())))
Example #25
0
    def start_convergence(self, log, tenant_id, group_id, perform=perform):
        """Record that a group needs converged by creating a ZooKeeper node."""
        log = log.bind(tenant_id=tenant_id, scaling_group_id=group_id)
        eff = mark_divergent(tenant_id, group_id)
        d = perform(self.dispatcher, eff)

        def success(r):
            log.msg('mark-dirty-success')
            return r  # The result is ignored normally, but return it for tests
        d.addCallbacks(success, log.err, errbackArgs=('mark-dirty-failure',))
        return d
Example #26
0
    def test_performs_tenant_scope(self, deferred_lock_run):
        """
        :func:`perform_tenant_scope` performs :obj:`TenantScope`, and uses the
        default throttler
        """
        # We want to ensure
        # 1. the TenantScope can be performed
        # 2. the ServiceRequest is run within a lock, since it matches the
        #    default throttling policy

        set_config_data({
            "cloud_client": {
                "throttling": {
                    "create_server_delay": 1,
                    "delete_server_delay": 0.4
                }
            }
        })
        self.addCleanup(set_config_data, {})
        clock = Clock()
        authenticator = object()
        log = object()
        dispatcher = get_cloud_client_dispatcher(clock, authenticator, log,
                                                 make_service_configs())
        svcreq = service_request(ServiceType.CLOUD_SERVERS, 'POST', 'servers')
        tscope = TenantScope(tenant_id='111', effect=svcreq)

        def run(f, *args, **kwargs):
            result = f(*args, **kwargs)
            result.addCallback(lambda x: (x[0], assoc(x[1], 'locked', True)))
            return result

        deferred_lock_run.side_effect = run

        response = stub_pure_response({}, 200)
        seq = SequenceDispatcher([
            (Authenticate(authenticator=authenticator,
                          tenant_id='111',
                          log=log), lambda i: ('token', fake_service_catalog)),
            (Request(method='POST',
                     url='http://dfw.openstack/servers',
                     headers=headers('token'),
                     log=log), lambda i: response),
        ])

        disp = ComposedDispatcher([seq, dispatcher])
        with seq.consume():
            result = perform(disp, Effect(tscope))
            self.assertNoResult(result)
            clock.advance(1)
            self.assertEqual(self.successResultOf(result), (response[0], {
                'locked': True
            }))
Example #27
0
    def start_convergence(self, log, tenant_id, group_id, perform=perform):
        """Record that a group needs converged by creating a ZooKeeper node."""
        log = log.bind(tenant_id=tenant_id, scaling_group_id=group_id)
        eff = mark_divergent(tenant_id, group_id)
        d = perform(self.dispatcher, eff)

        def success(r):
            log.msg('mark-dirty-success')
            return r  # The result is ignored normally, but return it for tests

        d.addCallbacks(success, log.err, errbackArgs=('mark-dirty-failure', ))
        return d
Example #28
0
    def check_and_call():
        class DoFunc(object):
            pass

        @deferred_performer
        def func_performer(d, i):
            return maybeDeferred(func, *args, **kwargs)

        comp_dispatcher = ComposedDispatcher(
            [TypeDispatcher({DoFunc: func_performer}), dispatcher])
        return perform(comp_dispatcher,
                       call_if_acquired(lock, Effect(DoFunc())))
Example #29
0
def resume_scaling_group(log, transaction_id, scaling_group, dispatcher):
    """
    Resumes the scaling group, causing all scaling policy executions to be
    evaluated as normal again.  This is an idempotent change, if it's already
    paused, this does not raise an error.

    :raises: :class:`NoSuchScalingGroup` if the scaling group does not exist.

    :return: None
    """
    if not tenant_is_enabled(scaling_group.tenant_id, config_value):
        raise NotImplementedError("Resume is not implemented for legacy groups")
    return perform(dispatcher, conv_resume_group_eff(transaction_id, scaling_group))
Example #30
0
    def buckets_acquired(self, my_buckets):
        """
        Get dirty flags from zookeeper and run convergence with them.

        This is used as the partitioner callback.
        """
        ceff = Effect(GetChildren(CONVERGENCE_DIRTY_DIR)).on(
            partial(self._converge_all, my_buckets))
        # Return deferred as 1-element tuple for testing only.
        # Returning deferred would block otter from shutting down until
        # it is fired which we don't need to do since convergence is itempotent
        # and will be triggered in next start of otter
        return (perform(self._dispatcher, self._with_conv_runid(ceff)), )
Example #31
0
def pause_scaling_group(log, transaction_id, scaling_group, dispatcher):
    """
    Pauses the scaling group, causing all scaling policy executions to be
    rejected until unpaused.  This is an idempotent change, if it's already
    paused, this does not raise an error.

    :raises: :class:`NoSuchScalingGroup` if the scaling group does not exist.

    :return: None
    """
    if not tenant_is_enabled(scaling_group.tenant_id, config_value):
        raise NotImplementedError("Pause is not implemented for legay groups")
    return perform(dispatcher, conv_pause_group_eff(scaling_group, transaction_id))
Example #32
0
    def buckets_acquired(self, my_buckets):
        """
        Get dirty flags from zookeeper and run convergence with them.

        This is used as the partitioner callback.
        """
        ceff = Effect(GetChildren(CONVERGENCE_DIRTY_DIR)).on(
            partial(self._converge_all, my_buckets))
        # Return deferred as 1-element tuple for testing only.
        # Returning deferred would block otter from shutting down until
        # it is fired which we don't need to do since convergence is itempotent
        # and will be triggered in next start of otter
        return (perform(self._dispatcher, self._with_conv_runid(ceff)), )
Example #33
0
def main(reactor, args, base_path, top_level):
    try:
        options = TestBrewOptions()
        try:
            options.parseOptions(args)
        except UsageError as e:
            sys.stderr.write("Error: {error}.\n".format(error=str(e)))
            sys.exit(1)

        eliot_to_stdout(MESSAGE_FORMATS, {})

        recipe_url = options['recipe_url']
        options['vmpath'] = FilePath(options['vmpath'])
        # Open the recipe URL just to validate and verify that it exists.
        # We do not need to read its content.
        urllib2.urlopen(recipe_url)
        yield run(reactor, [
            "vmrun",
            "revertToSnapshot",
            options['vmpath'].path,
            options['vmsnapshot'],
        ])
        yield run(reactor, [
            "vmrun",
            "start",
            options['vmpath'].path,
            "nogui",
        ])
        yield perform(
            make_dispatcher(reactor),
            run_remotely(
                username=options['vmuser'],
                address=options['vmhost'],
                commands=sequence([
                    task_configure_brew_path(),
                    task_test_homebrew(recipe_url),
                ]),
            ),
        )
        yield run(reactor, [
            "vmrun",
            "stop",
            options['vmpath'].path,
            "hard",
        ])
        print "Done."
    except ProcessTerminated as e:
        sys.stderr.write(
            ("Error: Command terminated with exit status {code}.\n").format(
                code=e.exitCode))
        raise
Example #34
0
def remove_server_from_group(dispatcher,
                             log,
                             trans_id,
                             server_id,
                             replace,
                             purge,
                             group,
                             state,
                             config_value=config_value):
    """
    Remove a specific server from the group, optionally replacing it
    with a new one, and optionally deleting the old one from Nova.

    If the old server is not deleted from Nova, otter-specific metadata
    is removed: otherwise, a different part of otter may later mistake
    the server as one that *should* still be in the group.

    :param log: A bound logger
    :param bytes trans_id: The transaction id for this operation.
    :param bytes server_id: The id of the server to be removed.
    :param bool replace: Should the server be replaced?
    :param bool purge: Should the server be deleted from Nova?
    :param group: The scaling group to remove a server from.
    :type group: :class:`~otter.models.interface.IScalingGroup`
    :param state: The current state of the group.
    :type state: :class:`~otter.models.interface.GroupState`

    :return: The updated state.
    :rtype: deferred :class:`~otter.models.interface.GroupState`
    """
    # worker case
    if not tenant_is_enabled(group.tenant_id, config_value):
        return worker_remove_server_from_group(log, trans_id, server_id,
                                               replace, purge, group, state)

    # convergence case - requires that the convergence dispatcher handles
    # EvictServerFromScalingGroup
    eff = convergence_remove_server_from_group(log, trans_id, server_id,
                                               replace, purge, group, state)

    def kick_off_convergence(new_state):
        ceff = trigger_convergence(group.tenant_id, group.uuid)
        return ceff.on(lambda _: new_state)

    return perform(
        dispatcher,
        with_log(eff.on(kick_off_convergence),
                 tenant_id=group.tenant_id,
                 scaling_group_id=group.uuid,
                 server_id=server_id,
                 transaction_id=trans_id))
Example #35
0
def pause_scaling_group(log, transaction_id, scaling_group, dispatcher):
    """
    Pauses the scaling group, causing all scaling policy executions to be
    rejected until unpaused.  This is an idempotent change, if it's already
    paused, this does not raise an error.

    :raises: :class:`NoSuchScalingGroup` if the scaling group does not exist.

    :return: None
    """
    if not tenant_is_enabled(scaling_group.tenant_id, config_value):
        raise NotImplementedError("Pause is not implemented for legay groups")
    return perform(dispatcher,
                   conv_pause_group_eff(scaling_group, transaction_id))
Example #36
0
def _generic_rcv3_request(operation, request_bag, lb_id, server_id):
    """
    Perform a generic RCv3 bulk operation on a single (lb, server) pair.

    :param callable operation: RCv3 function to perform on (lb, server) pair.
    :param request_bag: An object with a bunch of useful data on it.
    :param str lb_id: The id of the RCv3 load balancer to act on.
    :param str server_id: The Nova server id to act on.
    :return: A deferred that will fire when the request has been performed,
        firing with the parsed result of the request, or :data:`None` if the
        request has no body.
    """
    eff = operation(pset([(lb_id, server_id)]))
    scoped = Effect(TenantScope(eff, request_bag.tenant_id))
    return perform(request_bag.dispatcher, scoped)
Example #37
0
def resume_scaling_group(log, transaction_id, scaling_group, dispatcher):
    """
    Resumes the scaling group, causing all scaling policy executions to be
    evaluated as normal again.  This is an idempotent change, if it's already
    paused, this does not raise an error.

    :raises: :class:`NoSuchScalingGroup` if the scaling group does not exist.

    :return: None
    """
    if not tenant_is_enabled(scaling_group.tenant_id, config_value):
        raise NotImplementedError(
            'Resume is not implemented for legacy groups')
    return perform(dispatcher,
                   conv_resume_group_eff(transaction_id, scaling_group))
Example #38
0
def _generic_rcv3_request(operation, request_bag, lb_id, server_id):
    """
    Perform a generic RCv3 bulk operation on a single (lb, server) pair.

    :param callable operation: RCv3 function to perform on (lb, server) pair.
    :param request_bag: An object with a bunch of useful data on it.
    :param str lb_id: The id of the RCv3 load balancer to act on.
    :param str server_id: The Nova server id to act on.
    :return: A deferred that will fire when the request has been performed,
        firing with the parsed result of the request, or :data:`None` if the
        request has no body.
    """
    eff = operation(pset([(lb_id, server_id)]))
    scoped = Effect(TenantScope(eff, request_bag.tenant_id))
    return perform(request_bag.dispatcher, scoped)
Example #39
0
 def test_perform(self):
     """
     The Request effect dispatches a request to treq, and returns a
     two-tuple of the Twisted Response object and the content as bytes.
     """
     req = ('GET', 'http://google.com/', None, None, None,
            {'log': default_log})
     response = StubResponse(200, {})
     treq = StubTreq(reqs=[(req, response)],
                     contents=[(response, "content")])
     req = Request(method="get", url="http://google.com/")
     req.treq = treq
     dispatcher = get_simple_dispatcher(None)
     self.assertEqual(
         self.successResultOf(perform(dispatcher, Effect(req))),
         (response, "content"))
Example #40
0
 def test_log(self):
     """
     The log specified in the Request is passed on to the treq
     implementation.
     """
     log = object()
     req = ('GET', 'http://google.com/', None, None, None, {'log': log})
     response = StubResponse(200, {})
     treq = StubTreq(reqs=[(req, response)],
                     contents=[(response, "content")])
     req = Request(method="get", url="http://google.com/", log=log)
     req.treq = treq
     dispatcher = get_simple_dispatcher(None)
     self.assertEqual(
         self.successResultOf(perform(dispatcher, Effect(req))),
         (response, "content"))
Example #41
0
    def test_run_logs_stdout(self, logger):
        """
        The ``Run`` intent logs the standard output of the specified command.
        """
        command = run_remotely(
            username="******",
            address=str(self.server.ip),
            port=self.server.port,
            commands=run("echo test_ssh_conch:test_run_logs_stdout 1>&2"),
        )

        d = perform(
            make_dispatcher(reactor),
            command,
        )
        return d
Example #42
0
    def test_run_logs_stderr(self, logger):
        """
        The ``Run`` intent logs the standard output of the specified command.
        """
        command = run_remotely(
            username="******",
            address=str(self.server.ip),
            port=self.server.port,
            commands=run("echo test_ssh_conch:test_run_logs_stderr 1>&2"),
        )

        d = perform(
            make_dispatcher(self.reactor),
            command,
        )
        return d
Example #43
0
 def test_log(self):
     """
     The log specified in the Request is passed on to the treq
     implementation.
     """
     log = object()
     req = ('GET', 'http://google.com/', None, None, None, {'log': log})
     response = StubResponse(200, {})
     treq = StubTreq(reqs=[(req, response)],
                     contents=[(response, "content")])
     req = Request(method="get", url="http://google.com/", log=log)
     req.treq = treq
     dispatcher = get_simple_dispatcher(None)
     self.assertEqual(
         self.successResultOf(perform(dispatcher, Effect(req))),
         (response, "content"))
Example #44
0
    def _setup_control_node(self, reactor, node, index):
        print "Selecting node {} for control service".format(node.name)
        certificates = Certificates.generate(
            directory=self.cert_path,
            control_hostname=node.address,
            num_nodes=0,
            cluster_name=self.identity.name,
            cluster_id=self.identity.id,
        )
        dataset_backend_config_file = save_backend_configuration(
            self.dataset_backend, self.dataset_backend_configuration
        )
        cluster = Cluster(
            all_nodes=[node],
            control_node=node,
            agent_nodes=[],
            dataset_backend=self.dataset_backend,
            default_volume_size=get_default_volume_size(
                self.dataset_backend_configuration
            ),
            certificates=certificates,
            dataset_backend_config_file=dataset_backend_config_file
        )
        commands = configure_control_node(
            cluster,
            'libcloud',
            logging_config=self.config.get('logging'),
        )
        d = perform(make_dispatcher(reactor), commands)

        def configure_failed(failure):
            print "Failed to configure control node"
            write_failure(failure)
            return failure

        # It should be sufficient to configure just the control service here,
        # but there is an assumption that the control node is both a control
        # node and an agent node.
        d.addCallbacks(
            lambda _: self._add_node_to_cluster(
                reactor, cluster, node, index
            ),
            errback=configure_failed,
        )
        # Return the cluster.
        d.addCallback(lambda _: cluster)
        return d
Example #45
0
    def _setup_control_node(self, reactor, node, index):
        print "Selecting node {} for control service".format(node.name)
        certificates = Certificates.generate(
            directory=self.cert_path,
            control_hostname=node.address,
            num_nodes=0,
            cluster_name=self.identity.name,
            cluster_id=self.identity.id,
        )
        dataset_backend_config_file = save_backend_configuration(
            self.dataset_backend, self.dataset_backend_configuration
        )
        cluster = Cluster(
            all_nodes=[node],
            control_node=node,
            agent_nodes=[],
            dataset_backend=self.dataset_backend,
            default_volume_size=get_default_volume_size(
                self.dataset_backend_configuration
            ),
            certificates=certificates,
            dataset_backend_config_file=dataset_backend_config_file
        )
        commands = configure_control_node(
            cluster,
            'libcloud',
            logging_config=self.config.get('logging'),
        )
        d = perform(make_dispatcher(reactor), commands)

        def configure_failed(failure):
            print "Failed to configure control node"
            write_failure(failure)
            return failure

        # It should be sufficient to configure just the control service here,
        # but there is an assumption that the control node is both a control
        # node and an agent node.
        d.addCallbacks(
            lambda _: self._add_node_to_cluster(
                reactor, cluster, node, index
            ),
            errback=configure_failed,
        )
        # Return the cluster.
        d.addCallback(lambda _: cluster)
        return d
Example #46
0
 def divergent_changed(self, children):
     """
     ZooKeeper children-watch callback that lets this service know when the
     divergent groups have changed. If any of the divergent flags are for
     tenants associated with this service's buckets, a convergence will be
     triggered.
     """
     if self.partitioner.get_current_state() != PartitionState.ACQUIRED:
         return
     my_buckets = self.partitioner.get_current_buckets()
     changed_buckets = set(
         bucket_of_tenant(parse_dirty_flag(child)[0], len(self._buckets))
         for child in children)
     if set(my_buckets).intersection(changed_buckets):
         # the return value is ignored, but we return this for testing
         eff = self._converge_all(my_buckets, children)
         return perform(self._dispatcher, self._with_conv_runid(eff))
Example #47
0
 def test_perform(self):
     """
     The Request effect dispatches a request to treq, and returns a
     two-tuple of the Twisted Response object and the content as bytes.
     """
     req = ('GET', 'http://google.com/', None, None, None, {
         'log': default_log
     })
     response = StubResponse(200, {})
     treq = StubTreq(reqs=[(req, response)],
                     contents=[(response, "content")])
     req = Request(method="get", url="http://google.com/")
     req.treq = treq
     dispatcher = get_simple_dispatcher(None)
     self.assertEqual(
         self.successResultOf(perform(dispatcher, Effect(req))),
         (response, "content"))
Example #48
0
def set_desired_to_actual_group(dispatcher, cass_client, group):
    """
    Set group's desired to current number of servers in the group
    """
    try:
        res_eff = get_all_launch_server_data(
            group["tenantId"], group["groupId"], datetime.utcnow())
        eff = Effect(TenantScope(res_eff, group["tenantId"]))
        resources = yield perform(dispatcher, eff)
        actual = active_servers_count(resources["servers"])
        print("group", group, "setting desired to ", actual)
        yield cass_client.execute(
            ('UPDATE scaling_group SET desired=:desired WHERE '
             '"tenantId"=:tenantId AND "groupId"=:groupId'),
            assoc(group, "desired", actual), DEFAULT_CONSISTENCY)
    except Exception as e:
        print("Couldn't set group {} to {} due to {}".format(group, actual, e))
Example #49
0
    def test_performs_tenant_scope(self, deferred_lock_run):
        """
        :func:`perform_tenant_scope` performs :obj:`TenantScope`, and uses the
        default throttler
        """
        # We want to ensure
        # 1. the TenantScope can be performed
        # 2. the ServiceRequest is run within a lock, since it matches the
        #    default throttling policy

        set_config_data(
            {"cloud_client": {"throttling": {"create_server_delay": 1,
                                             "delete_server_delay": 0.4}}})
        self.addCleanup(set_config_data, {})
        clock = Clock()
        authenticator = object()
        log = object()
        dispatcher = get_cloud_client_dispatcher(clock, authenticator, log,
                                                 make_service_configs())
        svcreq = service_request(ServiceType.CLOUD_SERVERS, 'POST', 'servers')
        tscope = TenantScope(tenant_id='111', effect=svcreq)

        def run(f, *args, **kwargs):
            result = f(*args, **kwargs)
            result.addCallback(
                lambda x: (x[0], assoc(x[1], 'locked', True)))
            return result
        deferred_lock_run.side_effect = run

        response = stub_pure_response({}, 200)
        seq = SequenceDispatcher([
            (Authenticate(authenticator=authenticator,
                          tenant_id='111', log=log),
             lambda i: ('token', fake_service_catalog)),
            (Request(method='POST', url='http://dfw.openstack/servers',
                     headers=headers('token'), log=log),
             lambda i: response),
        ])

        disp = ComposedDispatcher([seq, dispatcher])
        with seq.consume():
            result = perform(disp, Effect(tscope))
            self.assertNoResult(result)
            clock.advance(1)
            self.assertEqual(self.successResultOf(result),
                             (response[0], {'locked': True}))
Example #50
0
def remove_server_from_group(
    dispatcher, log, trans_id, server_id, replace, purge, group, state, config_value=config_value
):
    """
    Remove a specific server from the group, optionally replacing it
    with a new one, and optionally deleting the old one from Nova.

    If the old server is not deleted from Nova, otter-specific metadata
    is removed: otherwise, a different part of otter may later mistake
    the server as one that *should* still be in the group.

    :param log: A bound logger
    :param bytes trans_id: The transaction id for this operation.
    :param bytes server_id: The id of the server to be removed.
    :param bool replace: Should the server be replaced?
    :param bool purge: Should the server be deleted from Nova?
    :param group: The scaling group to remove a server from.
    :type group: :class:`~otter.models.interface.IScalingGroup`
    :param state: The current state of the group.
    :type state: :class:`~otter.models.interface.GroupState`

    :return: The updated state.
    :rtype: deferred :class:`~otter.models.interface.GroupState`
    """
    # worker case
    if not tenant_is_enabled(group.tenant_id, config_value):
        return worker_remove_server_from_group(log, trans_id, server_id, replace, purge, group, state)

    # convergence case - requires that the convergence dispatcher handles
    # EvictServerFromScalingGroup
    eff = convergence_remove_server_from_group(log, trans_id, server_id, replace, purge, group, state)

    def kick_off_convergence(new_state):
        ceff = trigger_convergence(group.tenant_id, group.uuid)
        return ceff.on(lambda _: new_state)

    return perform(
        dispatcher,
        with_log(
            eff.on(kick_off_convergence),
            tenant_id=group.tenant_id,
            scaling_group_id=group.uuid,
            server_id=server_id,
            transaction_id=trans_id,
        ),
    )
Example #51
0
def trigger_convergence_deletion(dispatcher, group, trans_id):
    """
    Trigger deletion of group that belongs to convergence tenant

    :param log: Bound logger
    :param otter.models.interface.IScalingGroup scaling_group: the scaling
        group object
    """
    # Update group status and trigger convergence
    # DELETING status will take precedence over other status
    d = group.update_status(ScalingGroupStatus.DELETING)
    eff = with_log(trigger_convergence(group.tenant_id, group.uuid),
                   tenant_id=group.tenant_id,
                   scaling_group_id=group.uuid,
                   transaction_id=trans_id)
    d.addCallback(lambda _: perform(dispatcher, eff))
    return d
Example #52
0
 def divergent_changed(self, children):
     """
     ZooKeeper children-watch callback that lets this service know when the
     divergent groups have changed. If any of the divergent flags are for
     tenants associated with this service's buckets, a convergence will be
     triggered.
     """
     if self.partitioner.get_current_state() != PartitionState.ACQUIRED:
         return
     my_buckets = self.partitioner.get_current_buckets()
     changed_buckets = set(
         bucket_of_tenant(parse_dirty_flag(child)[0], len(self._buckets))
         for child in children)
     if set(my_buckets).intersection(changed_buckets):
         # the return value is ignored, but we return this for testing
         eff = self._converge_all(my_buckets, children)
         return perform(self._dispatcher, self._with_conv_runid(eff))
Example #53
0
def trigger_convergence_deletion(dispatcher, group, trans_id):
    """
    Trigger deletion of group that belongs to convergence tenant

    :param log: Bound logger
    :param otter.models.interface.IScalingGroup scaling_group: the scaling
        group object
    """
    # Update group status and trigger convergence
    # DELETING status will take precedence over other status
    d = group.update_status(ScalingGroupStatus.DELETING)
    eff = with_log(trigger_convergence(group.tenant_id, group.uuid),
                   tenant_id=group.tenant_id,
                   scaling_group_id=group.uuid,
                   transaction_id=trans_id)
    d.addCallback(lambda _: perform(dispatcher, eff))
    return d
Example #54
0
def main(reactor, args, base_path, top_level):
    try:
        options = TestBrewOptions()
        try:
            options.parseOptions(args)
        except UsageError as e:
            sys.stderr.write("Error: {error}.\n".format(error=str(e)))
            sys.exit(1)

        add_destination(eliot_output)

        recipe_url = options['recipe_url']
        options['vmpath'] = FilePath(options['vmpath'])
        # Open the recipe URL just to validate and verify that it exists.
        # We do not need to read its content.
        urllib2.urlopen(recipe_url)
        yield run(reactor, [
            "vmrun", "revertToSnapshot",
            options['vmpath'].path, options['vmsnapshot'],
        ])
        yield run(reactor, [
            "vmrun", "start", options['vmpath'].path, "nogui",
        ])
        yield perform(
            make_dispatcher(reactor),
            run_remotely(
                username=options['vmuser'],
                address=options['vmhost'],
                commands=sequence([
                    task_configure_brew_path(),
                    task_test_homebrew(recipe_url),
                ]),
            ),
        )
        yield run(reactor, [
            "vmrun", "stop", options['vmpath'].path, "hard",
        ])
        print "Done."
    except ProcessTerminated as e:
        sys.stderr.write(
            (
                "Error: Command terminated with exit status {code}.\n"
            ).format(code=e.exitCode)
        )
        raise
Example #55
0
 def __call__(self, event_dict):
     """
     Process event and push it to Cloud feeds
     """
     if not event_dict.get("cloud_feed", False):
         return
     # Do further logging without cloud_feed to avoid coming back here
     # in infinite recursion
     log_keys = keyfilter(lambda k: k not in ("message", "cloud_feed"), event_dict)
     log = self.log.bind(system="otter.cloud_feed", cf_msg=event_dict["message"][0], event_data=log_keys)
     try:
         eff = self.add_event(event_dict, self.tenant_id, self.region, log)
     except UnsuitableMessage as me:
         log.err(None, "cf-unsuitable-message", unsuitable_message=me.unsuitable_message)
     else:
         return perform(self.get_disp(self.reactor, self.authenticator, log, self.service_configs), eff).addErrback(
             log.err, "cf-add-failure"
         )
Example #56
0
    def test_run(self):
        """
        The ``Run`` intent runs the specified command via ssh.
        """
        command = run_remotely(
            username="******",
            address=str(self.server.ip),
            port=self.server.port,
            commands=run("touch hello"),
        )

        d = perform(
            make_dispatcher(self.reactor),
            command,
        )

        def check(_):
            self.assertEqual(self.server.home.child('hello').getContent(), "")

        return d
Example #57
0
 def _setup_convergences(self):
     """
     Get groups to converge and setup scheduled calls to trigger convergence
     on each of them within time_range.
     """
     groups = yield perform(self.dispatcher,
                            get_groups_to_converge(self.config_func))
     active = self._cancel_scheduled_calls()
     if active:
         # This should never happen
         self.log.err(RuntimeError("selfheal-calls-err"),
                      "selfheal-calls-err",
                      active=active)
     if not groups:
         returnValue(None)
     wait_time = self.time_range / len(groups)
     for i, group in enumerate(groups):
         self._calls.append(
             self.clock.callLater(
                 i * wait_time, perform, self.dispatcher,
                 check_and_trigger(group["tenantId"], group["groupId"])))
Example #58
0
def modify_and_trigger(dispatcher, group, logargs, modifier, *args, **kwargs):
    """
    Modify group state and trigger convergence after that

    :param IScalingGroup group: Scaling group whose state is getting modified
    :param log: Bound logger
    :param modifier: Callable as described in IScalingGroup.modify_state

    :return: Deferred with None
    """
    cannot_exec_pol_err = None
    try:
        yield group.modify_state(modifier, *args, **kwargs)
    except CannotExecutePolicyError as ce:
        cannot_exec_pol_err = ce
    if tenant_is_enabled(group.tenant_id, config_value):
        eff = Effect(
            BoundFields(trigger_convergence(group.tenant_id, group.uuid),
                        logargs))
        yield perform(dispatcher, eff)
    if cannot_exec_pol_err is not None:
        raise cannot_exec_pol_err