Example #1
0
    def start_cluster(self, reactor):
        """
        Provision cloud cluster for acceptance tests.

        :return Cluster: The cluster to connect to for acceptance tests.
        """
        metadata = {
            'purpose': 'acceptance-testing',
            'distribution': self.distribution,
        }
        metadata.update(self.metadata)

        for index in range(self.num_nodes):
            name = "acceptance-test-%s-%d" % (self.creator, index)
            try:
                print "Creating node %d: %s" % (index, name)
                node = self.provisioner.create_node(
                    name=name,
                    distribution=self.distribution,
                    metadata=metadata,
                )
            except:
                print "Error creating node %d: %s" % (index, name)
                print "It may have leaked into the cloud."
                raise

            yield remove_known_host(reactor, node.address)
            self.nodes.append(node)
            del node

        commands = parallel([
            node.provision(package_source=self.package_source,
                           variants=self.variants)
            for node in self.nodes
        ])
        if self.dataset_backend == DatasetBackend.zfs:
            zfs_commands = parallel([
                configure_zfs(node, variants=self.variants)
                for node in self.nodes
            ])
            commands = commands.on(success=lambda _: zfs_commands)

        yield perform(make_dispatcher(reactor), commands)

        cluster = yield configured_cluster_for_nodes(
            reactor,
            generate_certificates(
                make_cluster_id(
                    TestTypes.ACCEPTANCE,
                    _provider_for_cluster_id(self.dataset_backend),
                ),
                self.nodes),
            self.nodes,
            self.dataset_backend,
            self.dataset_backend_configuration,
            _save_backend_configuration(self.dataset_backend,
                                        self.dataset_backend_configuration)
        )

        returnValue(cluster)
Example #2
0
    def start_cluster(self, reactor):
        """
        Provision cloud cluster for acceptance tests.

        :return Cluster: The cluster to connect to for acceptance tests.
        """
        metadata = {
            'purpose': 'acceptance-testing',
            'distribution': self.distribution,
        }
        metadata.update(self.metadata)

        for index in range(self.num_nodes):
            name = "acceptance-test-%s-%d" % (self.creator, index)
            try:
                print "Creating node %d: %s" % (index, name)
                node = self.provisioner.create_node(
                    name=name,
                    distribution=self.distribution,
                    metadata=metadata,
                )
            except:
                print "Error creating node %d: %s" % (index, name)
                print "It may have leaked into the cloud."
                raise

            yield remove_known_host(reactor, node.address)
            self.nodes.append(node)
            del node

        commands = parallel([
            node.provision(package_source=self.package_source,
                           variants=self.variants)
            for node in self.nodes
        ])
        if self.dataset_backend == DatasetBackend.zfs:
            zfs_commands = parallel([
                configure_zfs(node, variants=self.variants)
                for node in self.nodes
            ])
            commands = commands.on(success=lambda _: zfs_commands)

        yield perform(make_dispatcher(reactor), commands)

        cluster = yield configured_cluster_for_nodes(
            reactor,
            generate_certificates(
                make_cluster_id(
                    TestTypes.ACCEPTANCE,
                    _provider_for_cluster_id(self.dataset_backend),
                ),
                self.nodes),
            self.nodes,
            self.dataset_backend,
            self.dataset_backend_configuration,
            _save_backend_configuration(self.dataset_backend,
                                        self.dataset_backend_configuration)
        )

        returnValue(cluster)
Example #3
0
def get_clb_contents():
    """
    Get Rackspace Cloud Load Balancer contents as list of `CLBNode`. CLB
    health monitor information is also returned as a pmap of :obj:`CLB` objects
    mapped on LB ID.

    :return: Effect of (``list`` of :obj:`CLBNode`, `pmap` of :obj:`CLB`)
    :rtype: :obj:`Effect`
    """
    # If we get a CLBNotFoundError while fetching feeds, we should throw away
    # all nodes related to that load balancer, because we don't want to act on
    # data that we know is invalid/outdated (for example, if we can't fetch a
    # feed because CLB was deleted, we don't want to say that we have a node in
    # DRAINING with draining time of 0; we should just say that the node is
    # gone).

    def gone(r):
        return catch(CLBNotFoundError, lambda exc: r)

    lb_ids = [lb['id'] for lb in (yield _retry(get_clbs()))]
    node_reqs = [_retry(get_clb_nodes(lb_id).on(error=gone([])))
                 for lb_id in lb_ids]
    healthmon_reqs = [
        _retry(get_clb_health_monitor(lb_id).on(error=gone(None)))
        for lb_id in lb_ids]
    all_nodes_hms = yield parallel(node_reqs + healthmon_reqs)
    all_nodes, hms = all_nodes_hms[:len(lb_ids)], all_nodes_hms[len(lb_ids):]
    lb_nodes = {
        lb_id: [CLBNode.from_node_json(lb_id, node)
                for node in nodes]
        for lb_id, nodes in zip(lb_ids, all_nodes)}
    clbs = {
        str(lb_id): CLB(bool(health_mon))
        for lb_id, health_mon in zip(lb_ids, hms) if health_mon is not None}
    draining = [n for n in concat(lb_nodes.values())
                if n.description.condition == CLBNodeCondition.DRAINING]
    feeds = yield parallel(
        [_retry(get_clb_node_feed(n.description.lb_id, n.node_id).on(
            error=gone(None)))
         for n in draining]
    )
    nodes_to_feeds = dict(zip(draining, feeds))
    deleted_lbs = set([
        node.description.lb_id
        for (node, feed) in nodes_to_feeds.items() if feed is None])

    def update_drained_at(node):
        feed = nodes_to_feeds.get(node)
        if node.description.lb_id in deleted_lbs:
            return None
        if feed is not None:
            node.drained_at = extract_clb_drained_at(feed)
        return node

    nodes = map(update_drained_at, concat(lb_nodes.values()))
    yield do_return((
        list(filter(bool, nodes)),
        pmap(keyfilter(lambda k: k not in deleted_lbs, clbs))))
Example #4
0
def get_clb_contents():
    """Get Rackspace Cloud Load Balancer contents as list of `CLBNode`."""

    # If we get a CLBNotFoundError while fetching feeds, we should throw away
    # all nodes related to that load balancer, because we don't want to act on
    # data that we know is invalid/outdated (for example, if we can't fetch a
    # feed because CLB was deleted, we don't want to say that we have a node in
    # DRAINING with draining time of 0; we should just say that the node is
    # gone).

    def gone(r):
        return catch(CLBNotFoundError, lambda exc: r)

    lb_ids = [lb['id'] for lb in (yield _retry(get_clbs()))]
    node_reqs = [
        _retry(get_clb_nodes(lb_id).on(error=gone([]))) for lb_id in lb_ids
    ]
    all_nodes = yield parallel(node_reqs)
    lb_nodes = {
        lb_id: [CLBNode.from_node_json(lb_id, node) for node in nodes]
        for lb_id, nodes in zip(lb_ids, all_nodes)
    }
    draining = [
        n for n in concat(lb_nodes.values())
        if n.description.condition == CLBNodeCondition.DRAINING
    ]
    feeds = yield parallel([
        _retry(
            get_clb_node_feed(n.description.lb_id,
                              n.node_id).on(error=gone(None)))
        for n in draining
    ])
    nodes_to_feeds = dict(zip(draining, feeds))
    deleted_lbs = set([
        node.description.lb_id for (node, feed) in nodes_to_feeds.items()
        if feed is None
    ])

    def update_drained_at(node):
        feed = nodes_to_feeds.get(node)
        if node.description.lb_id in deleted_lbs:
            return None
        if feed is not None:
            return assoc_obj(node, drained_at=extract_CLB_drained_at(feed))
        else:
            return node

    nodes = map(update_drained_at, concat(lb_nodes.values()))
    yield do_return(list(filter(bool, nodes)))
Example #5
0
def get_all_launch_server_data(
        tenant_id,
        group_id,
        now,
        get_scaling_group_servers=get_scaling_group_servers,
        get_clb_contents=get_clb_contents,
        get_rcv3_contents=get_rcv3_contents):
    """
    Gather all launch_server data relevant for convergence w.r.t given time,
    in parallel where possible.

    Returns an Effect of {'servers': [NovaServer], 'lb_nodes': [LBNode],
                          'lbs': pmap(LB_ID -> CLB)}.
    """
    return parallel([
        get_scaling_group_servers(tenant_id, group_id, now).on(
            map(NovaServer.from_server_details_json)).on(list),
        get_clb_contents(),
        get_rcv3_contents()
    ]).on(
        lambda (servers, clb_nodes_and_clbs, rcv3_nodes): {
            'servers': servers,
            'lb_nodes': clb_nodes_and_clbs[0] + rcv3_nodes,
            'lbs': clb_nodes_and_clbs[1]
        })
Example #6
0
def steps_to_effect(steps):
    """Turns a collection of :class:`IStep` providers into an effect."""
    # Treat unknown errors as RETRY.
    return parallel([
        s.as_effect().on(error=lambda e: (StepResult.RETRY,
                                          [ErrorReason.Exception(e)]))
        for s in steps])
Example #7
0
def _log_remove_from_clb(steps):
    lbs = groupby(lambda s: s.lb_id, steps)
    effs = [
        cf_msg('convergence-remove-clb-nodes',
               lb_id=lb, nodes=sorted(concat(s.node_ids for s in lbsteps)))
        for lb, lbsteps in sorted(lbs.iteritems())]
    return parallel(effs)
Example #8
0
def _log_bulk_rcv3(event, steps):
    by_lbs = groupby(lambda s: s[0], concat(s.lb_node_pairs for s in steps))
    effs = [
        cf_msg(event, lb_id=lb_id, servers=sorted(p[1] for p in pairs))
        for lb_id, pairs in sorted(by_lbs.iteritems())
    ]
    return parallel(effs)
Example #9
0
def configure_cluster(cluster, dataset_backend_configuration):
    """
    Configure flocker-control, flocker-dataset-agent and
    flocker-container-agent on a collection of nodes.

    :param Cluster cluster: Description of the cluster to configure.

    :param dict dataset_backend_configuration: Configuration parameters to
        supply to the dataset backend.
    """
    return sequence([
        run_remotely(
            username='******',
            address=cluster.control_node.address,
            commands=sequence([
                task_install_control_certificates(
                    cluster.certificates.cluster.certificate,
                    cluster.certificates.control.certificate,
                    cluster.certificates.control.key),
                task_enable_flocker_control(cluster.control_node.distribution),
                if_firewall_available(
                    cluster.control_node.distribution,
                    task_open_control_firewall(
                        cluster.control_node.distribution)),
            ]),
        ),
        parallel([
            sequence([
                run_remotely(
                    username='******',
                    address=node.address,
                    commands=sequence([
                        task_install_node_certificates(
                            cluster.certificates.cluster.certificate,
                            certnkey.certificate, certnkey.key),
                        task_install_api_certificates(
                            cluster.certificates.user.certificate,
                            cluster.certificates.user.key),
                        task_enable_docker(node.distribution),
                        if_firewall_available(
                            node.distribution,
                            open_firewall_for_docker_api(node.distribution),
                        ),
                        task_configure_flocker_agent(
                            control_node=cluster.control_node.address,
                            dataset_backend=cluster.dataset_backend,
                            dataset_backend_configuration=(
                                dataset_backend_configuration),
                        ),
                        task_enable_docker_plugin(node.distribution),
                        task_enable_flocker_agent(
                            distribution=node.distribution, ),
                    ]),
                ),
            ]) for certnkey, node in zip(cluster.certificates.nodes,
                                         cluster.agent_nodes)
        ])
    ])
Example #10
0
    def start_nodes(self, reactor):
        """
        Provision cloud nodes for acceptance tests.

        :return list: List of addresses of nodes to connect to, for acceptance
            tests.
        """
        metadata = {
            'purpose': 'acceptance-testing',
            'distribution': self.distribution,
        }
        metadata.update(self.metadata)

        for index in range(2):
            name = "acceptance-test-%s-%d" % (self.creator, index)
            try:
                print "Creating node %d: %s" % (index, name)
                node = self.provisioner.create_node(
                    name=name,
                    distribution=self.distribution,
                    metadata=metadata,
                )
            except:
                print "Error creating node %d: %s" % (index, name)
                print "It may have leaked into the cloud."
                raise

            yield remove_known_host(reactor, node.address)
            self.nodes.append(node)
            del node

        commands = parallel([
            node.provision(package_source=self.package_source,
                           variants=self.variants)
            for node in self.nodes
        ])
        if self.dataset_backend == DatasetBackend.zfs:
            zfs_commands = parallel([
                configure_zfs(node, variants=self.variants)
                for node in self.nodes
            ])
            commands = commands.on(success=lambda _: zfs_commands)
        yield perform(make_dispatcher(reactor), commands)

        returnValue(self.nodes)
Example #11
0
def _log_bulk_rcv3(event, steps):
    by_lbs = groupby(lambda s: s[0], concat(s.lb_node_pairs for s in steps))
    effs = [
        cf_msg(event,
               lb_id=lb_id,
               servers=sorted(p[1] for p in pairs))
        for lb_id, pairs in sorted(by_lbs.iteritems())
    ]
    return parallel(effs)
Example #12
0
def groups_steps(groups, reactor, store, cass_client, authenticator, conf):
    """
    Return [(group, steps)] list
    """
    eff = parallel(map(group_steps, groups))
    disp = get_full_dispatcher(
        reactor, authenticator, mock_log(), get_service_configs(conf),
        "kzclient", store, "supervisor", cass_client)
    return perform(disp, eff).addCallback(lambda steps: zip(groups, steps))
Example #13
0
def groups_steps(groups, reactor, store, cass_client, authenticator, conf):
    """
    Return [(group, steps)] list
    """
    eff = parallel(map(group_steps, groups))
    disp = get_full_dispatcher(reactor, authenticator, mock_log(),
                               get_service_configs(conf), "kzclient", store,
                               "supervisor", cass_client)
    return perform(disp, eff).addCallback(lambda steps: zip(groups, steps))
Example #14
0
def get_orgs_repos(name):
    """
    Fetch ALL of the repos that a user has access to, in any organization.
    """
    req = get_orgs(name)
    req = req.on(
        success=lambda org_names: parallel(map(get_org_repos, org_names)))
    req = req.on(success=lambda repo_lists: reduce(operator.add, repo_lists))
    return req
Example #15
0
def _log_remove_from_clb(steps):
    lbs = groupby(lambda s: s.lb_id, steps)
    effs = [
        cf_msg('convergence-remove-clb-nodes',
               lb_id=lb,
               nodes=sorted(concat(s.node_ids for s in lbsteps)))
        for lb, lbsteps in sorted(lbs.iteritems())
    ]
    return parallel(effs)
Example #16
0
def conv_pause_group_eff(group, transaction_id):
    """
    Pause scaling group of convergence enabled tenant
    """
    eff = parallel([Effect(ModifyGroupStatePaused(group, True)),
                    delete_divergent_flag(group.tenant_id, group.uuid, -1)])
    return with_log(eff, transaction_id=transaction_id,
                    tenant_id=group.tenant_id,
                    scaling_group_id=group.uuid).on(lambda _: None)
Example #17
0
 def test_parallel(self):
     """
     'parallel' results in a list of results of the given effects, in the
     same order that they were passed to parallel.
     """
     d = perform(
         _dispatcher(None),
         parallel([Effect(Constant('a')),
                   Effect(Constant('b'))]))
     self.assertEqual(self.successResultOf(d), ['a', 'b'])
Example #18
0
 def on_listing_pools(lblist_result):
     _, body = lblist_result
     return parallel([
         service_request(ServiceType.RACKCONNECT_V3, 'GET',
                         append_segments('load_balancer_pools',
                                         lb_pool['id'], 'nodes')).on(
             partial(on_listing_nodes,
                     RCv3Description(lb_id=lb_pool['id'])))
         for lb_pool in body
     ])
Example #19
0
def conv_resume_group_eff(trans_id, group):
    """
    Resume scaling group of convergence enabled tenant
    """
    eff = parallel([
        Effect(ModifyGroupStatePaused(group, False)),
        mark_divergent(group.tenant_id, group.uuid).on(
            lambda _: msg("mark-dirty-success"))])
    return with_log(eff, transaction_id=trans_id, tenant_id=group.tenant_id,
                    scaling_group_id=group.uuid).on(lambda _: None)
Example #20
0
def get_orgs_repos(name):
    """
    Fetch ALL of the repos that a user has access to, in any organization.

    :return: An Effect resulting in a list of repositories.
    """
    req = get_orgs(name)
    req = req.on(lambda org_names: parallel(map(get_org_repos, org_names)))
    req = req.on(lambda repo_lists: reduce(operator.add, repo_lists))
    return req
Example #21
0
def _log_set_metadata(steps):
    by_kv = groupby(lambda s: (s.key, s.value), steps)
    effs = [
        cf_msg('convergence-set-server-metadata',
               servers=sorted(s.server_id for s in kvsteps),
               key=key,
               value=value)
        for (key, value), kvsteps in sorted(by_kv.iteritems())
    ]
    return parallel(effs)
Example #22
0
def _(steps):
    by_cfg = groupby(lambda s: s.server_config, steps)
    effs = [
        cf_msg(
            'convergence-create-servers',
            num_servers=len(cfg_steps),
            server_config=dict(cfg))
        # We sort the items with `thaw` because PMap does not support
        # comparison
        for cfg, cfg_steps in sorted(by_cfg.iteritems(), key=thaw)]
    return parallel(effs)
Example #23
0
def _log_add_nodes_clb(steps):
    lbs = defaultdict(list)
    for step in steps:
        for (address, config) in step.address_configs:
            lbs[step.lb_id].append('%s:%s' % (address, config.port))
    effs = [
        cf_msg('convergence-add-clb-nodes',
               lb_id=lb_id, addresses=sorted(addresses))
        for lb_id, addresses in sorted(lbs.iteritems())
    ]
    return parallel(effs)
Example #24
0
def get_orgs_repos(name):
    """
    Fetch ALL of the repos that a user has access to, in any organization.
    """
    req = get_orgs(name)
    req = req.on(
        success=lambda org_names:
            parallel(map(get_org_repos, org_names)))
    req = req.on(
        success=lambda repo_lists: reduce(operator.add, repo_lists))
    return req
Example #25
0
def _log_set_metadata(steps):
    by_kv = groupby(lambda s: (s.key, s.value), steps)
    effs = [
        cf_msg(
            'convergence-set-server-metadata',
            servers=sorted(s.server_id for s in kvsteps),
            key=key, value=value
        )
        for (key, value), kvsteps in sorted(by_kv.iteritems())
    ]
    return parallel(effs)
Example #26
0
def _(steps):
    by_cfg = groupby(lambda s: s.server_config, steps)
    effs = [
        cf_msg('convergence-create-servers',
               num_servers=len(cfg_steps),
               server_config=dict(cfg))
        # We sort the items with `thaw` because PMap does not support
        # comparison
        for cfg, cfg_steps in sorted(by_cfg.iteritems(), key=thaw)
    ]
    return parallel(effs)
Example #27
0
def _log_change_clb_node(steps):
    lbs = groupby(lambda s: (s.lb_id, s.condition, s.weight, s.type),
                  steps)
    effs = [
        cf_msg('convergence-change-clb-nodes',
               lb_id=lb,
               nodes=sorted([s.node_id for s in grouped_steps]),
               condition=condition.name, weight=weight, type=node_type.name)
        for (lb, condition, weight, node_type), grouped_steps
        in sorted(lbs.iteritems())
    ]
    return parallel(effs)
Example #28
0
def perform_get_children_with_stats(kz_client, dispatcher, intent):
    """
    Perform :obj:`GetChildrenWithStats`. Must be partialed with ``kz_client``.

    :param kz_client: txKazoo client
    :param dispatcher: dispatcher, supplied by perform
    :param GetChildrenWithStats intent: the intent
    """
    path = intent.path
    children = yield Effect(GetChildren(path))
    stats = yield parallel(Effect(GetStat(path + "/" + p)) for p in children)
    yield do_return([c_and_s for c_and_s in zip(children, stats) if c_and_s[1] is not None])
Example #29
0
def _log_add_nodes_clb(steps):
    lbs = defaultdict(list)
    for step in steps:
        for (address, config) in step.address_configs:
            lbs[step.lb_id].append('%s:%s' % (address, config.port))
    effs = [
        cf_msg('convergence-add-clb-nodes',
               lb_id=lb_id,
               addresses=sorted(addresses))
        for lb_id, addresses in sorted(lbs.iteritems())
    ]
    return parallel(effs)
Example #30
0
def conv_pause_group_eff(group, transaction_id):
    """
    Pause scaling group of convergence enabled tenant
    """
    eff = parallel([
        Effect(ModifyGroupStatePaused(group, True)),
        delete_divergent_flag(group.tenant_id, group.uuid, -1)
    ])
    return with_log(eff,
                    transaction_id=transaction_id,
                    tenant_id=group.tenant_id,
                    scaling_group_id=group.uuid).on(lambda _: None)
Example #31
0
 def on_listing_pools(lblist_result):
     _, body = lblist_result
     return parallel([
         service_request(
             ServiceType.RACKCONNECT_V3, 'GET',
             append_segments('load_balancer_pools', lb_pool['id'],
                             'nodes')).on(
                                 partial(
                                     on_listing_nodes,
                                     RCv3Description(lb_id=lb_pool['id'])))
         for lb_pool in body
     ])
Example #32
0
def configure_cluster(cluster, dataset_backend_configuration):
    """
    Configure flocker-control, flocker-dataset-agent and
    flocker-container-agent on a collection of nodes.

    :param Cluster cluster: Description of the cluster to configure.

    :param dict dataset_backend_configuration: Configuration parameters to
        supply to the dataset backend.
    """
    return sequence([
        run_remotely(
            username='******',
            address=cluster.control_node.address,
            commands=sequence([
                task_install_control_certificates(
                    cluster.certificates.cluster.certificate,
                    cluster.certificates.control.certificate,
                    cluster.certificates.control.key),
                task_enable_flocker_control(cluster.control_node.distribution),
                ]),
        ),
        parallel([
            sequence([
                run_remotely(
                    username='******',
                    address=node.address,
                    commands=sequence([
                        task_install_node_certificates(
                            cluster.certificates.cluster.certificate,
                            certnkey.certificate,
                            certnkey.key),
                        task_install_api_certificates(
                            cluster.certificates.user.certificate,
                            cluster.certificates.user.key),
                        task_enable_docker(node.distribution),
                        task_configure_flocker_agent(
                            control_node=cluster.control_node.address,
                            dataset_backend=cluster.dataset_backend,
                            dataset_backend_configuration=(
                                dataset_backend_configuration
                            ),
                        ),
                        task_enable_docker_plugin(node.distribution),
                        task_enable_flocker_agent(
                            distribution=node.distribution,
                        )]),
                    ),
            ]) for certnkey, node
            in zip(cluster.certificates.nodes, cluster.agent_nodes)
        ])
    ])
Example #33
0
def get_clb_contents():
    """Get Rackspace Cloud Load Balancer contents as list of `CLBNode`."""
    # If we get a CLBNotFoundError while fetching feeds, we should throw away
    # all nodes related to that load balancer, because we don't want to act on
    # data that we know is invalid/outdated (for example, if we can't fetch a
    # feed because CLB was deleted, we don't want to say that we have a node in
    # DRAINING with draining time of 0; we should just say that the node is
    # gone).

    def gone(r):
        return catch(CLBNotFoundError, lambda exc: r)
    lb_ids = [lb['id'] for lb in (yield _retry(get_clbs()))]
    node_reqs = [_retry(get_clb_nodes(lb_id).on(error=gone([])))
                 for lb_id in lb_ids]
    all_nodes = yield parallel(node_reqs)
    lb_nodes = {lb_id: [CLBNode.from_node_json(lb_id, node) for node in nodes]
                for lb_id, nodes in zip(lb_ids, all_nodes)}
    draining = [n for n in concat(lb_nodes.values())
                if n.description.condition == CLBNodeCondition.DRAINING]
    feeds = yield parallel(
        [_retry(get_clb_node_feed(n.description.lb_id, n.node_id).on(
            error=gone(None)))
         for n in draining]
    )
    nodes_to_feeds = dict(zip(draining, feeds))
    deleted_lbs = set([
        node.description.lb_id
        for (node, feed) in nodes_to_feeds.items() if feed is None])

    def update_drained_at(node):
        feed = nodes_to_feeds.get(node)
        if node.description.lb_id in deleted_lbs:
            return None
        if feed is not None:
            return assoc_obj(node, drained_at=extract_CLB_drained_at(feed))
        else:
            return node
    nodes = map(update_drained_at, concat(lb_nodes.values()))
    yield do_return(list(filter(bool, nodes)))
Example #34
0
def _log_change_clb_node(steps):
    lbs = groupby(lambda s: (s.lb_id, s.condition, s.weight, s.type), steps)
    effs = [
        cf_msg('convergence-change-clb-nodes',
               lb_id=lb,
               nodes=sorted([s.node_id for s in grouped_steps]),
               condition=condition.name,
               weight=weight,
               type=node_type.name)
        for (lb, condition, weight,
             node_type), grouped_steps in sorted(lbs.iteritems())
    ]
    return parallel(effs)
Example #35
0
def conv_resume_group_eff(trans_id, group):
    """
    Resume scaling group of convergence enabled tenant
    """
    eff = parallel([
        Effect(ModifyGroupStatePaused(group, False)),
        mark_divergent(group.tenant_id,
                       group.uuid).on(lambda _: msg("mark-dirty-success"))
    ])
    return with_log(eff,
                    transaction_id=trans_id,
                    tenant_id=group.tenant_id,
                    scaling_group_id=group.uuid).on(lambda _: None)
Example #36
0
 async def test_parallel(self, dispatcher):
     """
     'parallel' results in a list of results of the given effects, in the
     same order that they were passed to parallel.
     """
     d = await asyncio_perform(
         dispatcher,
         parallel([
             Effect(Constant('a')),
             Effect(
                 Delay(0.01)).on(success=lambda _: Effect(Constant('...'))),
             Effect(Constant('b'))
         ]))
     assert d == ['a', '...', 'b']
Example #37
0
def perform_get_children_with_stats(kz_client, dispatcher, intent):
    """
    Perform :obj:`GetChildrenWithStats`. Must be partialed with ``kz_client``.

    :param kz_client: txKazoo client
    :param dispatcher: dispatcher, supplied by perform
    :param GetChildrenWithStats intent: the intent
    """
    path = intent.path
    children = yield Effect(GetChildren(path))
    stats = yield parallel(Effect(GetStat(path + '/' + p)) for p in children)
    yield do_return([
        c_and_s for c_and_s in zip(children, stats) if c_and_s[1] is not None
    ])
Example #38
0
def log_steps(steps):
    """
    Log some steps (to cloud feeds).

    In general this tries to reduce the number of Log calls to a reasonable
    minimum, based on how steps are usually used. For example, multiple
    :obj:`SetMetadataItemOnServer` that are setting the same key/value on a
    server will be merged into one Log call that shows all the servers being
    affected.
    """
    steps_by_type = groupby(type, steps)
    effs = []
    for step_type, typed_steps in steps_by_type.iteritems():
        if step_type in _loggers:
            effs.append(_loggers[step_type](typed_steps))
    return parallel(effs)
Example #39
0
def log_steps(steps):
    """
    Log some steps (to cloud feeds).

    In general this tries to reduce the number of Log calls to a reasonable
    minimum, based on how steps are usually used. For example, multiple
    :obj:`SetMetadataItemOnServer` that are setting the same key/value on a
    server will be merged into one Log call that shows all the servers being
    affected.
    """
    steps_by_type = groupby(type, steps)
    effs = []
    for step_type, typed_steps in steps_by_type.iteritems():
        if step_type in _loggers:
            effs.append(_loggers[step_type](typed_steps))
    return parallel(effs)
Example #40
0
def _run_on_all_nodes(nodes, task):
    """
    Run some commands on some nodes.

    :param nodes: An iterable of ``Node`` instances where the commands should
        be run.
    :param task: A one-argument callable which is called with each ``Node`` and
        should return the ``Effect`` to run on that node.

    :return: An ``Effect`` that runs the commands on a group of nodes.
    """
    return parallel(
        list(
            run_remotely(
                username='******',
                address=node.address,
                commands=task(node),
            ) for node in nodes))
Example #41
0
    async def test_parallel_with_error(self, dispatcher):
        """
        'parallel' results in a list of results of the given effects, in the
        same order that they were passed to parallel.
        """
        @do
        def fail():
            yield Effect(Delay(0.01))
            raise RuntimeError('My error')

        future = asyncio_perform(
            dispatcher, parallel([
                Effect(Delay(1)),
                Effect(Delay(1)),
                fail(),
            ]))
        with pytest.raises(FirstError):
            await future
Example #42
0
def _run_on_all_nodes(nodes, task):
    """
    Run some commands on some nodes.

    :param nodes: An iterable of ``Node`` instances where the commands should
        be run.
    :param task: A one-argument callable which is called with each ``Node`` and
        should return the ``Effect`` to run on that node.

    :return: An ``Effect`` that runs the commands on a group of nodes.
    """
    return parallel(list(
        run_remotely(
            username='******',
            address=node.address,
            commands=task(node),
        )
        for node in nodes
    ))
Example #43
0
def get_all_convergence_data(
        tenant_id,
        group_id,
        now,
        get_scaling_group_servers=get_scaling_group_servers,
        get_clb_contents=get_clb_contents,
        get_rcv3_contents=get_rcv3_contents):
    """
    Gather all data relevant for convergence w.r.t given time,
    in parallel where possible.

    Returns an Effect of ([NovaServer], [LBNode]).
    """
    eff = parallel(
        [get_scaling_group_servers(tenant_id, group_id, now)
         .on(map(NovaServer.from_server_details_json)).on(list),
         get_clb_contents(),
         get_rcv3_contents()]
    ).on(lambda (servers, clb, rcv3): (servers, list(concat([clb, rcv3]))))
    return eff
Example #44
0
def get_all_launch_server_data(
        tenant_id,
        group_id,
        now,
        get_scaling_group_servers=get_scaling_group_servers,
        get_clb_contents=get_clb_contents,
        get_rcv3_contents=get_rcv3_contents):
    """
    Gather all launch_server data relevant for convergence w.r.t given time,
    in parallel where possible.

    Returns an Effect of {'servers': [NovaServer], 'lb_nodes': [LBNode]}.
    """
    eff = parallel([
        get_scaling_group_servers(tenant_id, group_id, now).on(
            map(NovaServer.from_server_details_json)).on(list),
        get_clb_contents(),
        get_rcv3_contents()
    ]).on(lambda (servers, clb, rcv3): {
        'servers': servers,
        'lb_nodes': list(concat([clb, rcv3]))
    })
    return eff
Example #45
0
def get_all_launch_server_data(
        tenant_id,
        group_id,
        now,
        get_scaling_group_servers=get_scaling_group_servers,
        get_clb_contents=get_clb_contents,
        get_rcv3_contents=get_rcv3_contents):
    """
    Gather all launch_server data relevant for convergence w.r.t given time,
    in parallel where possible.

    Returns an Effect of {'servers': [NovaServer], 'lb_nodes': [LBNode],
                          'lbs': pmap(LB_ID -> CLB)}.
    """
    return parallel(
        [get_scaling_group_servers(tenant_id, group_id, now)
         .on(map(NovaServer.from_server_details_json)).on(list),
         get_clb_contents(),
         get_rcv3_contents()]
    ).on(lambda (servers, clb_nodes_and_clbs, rcv3_nodes): {
        'servers': servers,
        'lb_nodes': clb_nodes_and_clbs[0] + rcv3_nodes,
        'lbs': clb_nodes_and_clbs[1]
    })
Example #46
0
def main(reactor, args, base_path, top_level):
    """
    :param reactor: Reactor to use.
    :param list args: The arguments passed to the script.
    :param FilePath base_path: The executable being run.
    :param FilePath top_level: The top-level of the flocker repository.
    """
    options = RunOptions(top_level=top_level)

    add_destination(eliot_output)
    try:
        options.parseOptions(args)
    except UsageError as e:
        sys.stderr.write("%s: %s\n" % (base_path.basename(), e))
        raise SystemExit(1)

    runner = options.runner

    try:
        nodes = yield runner.start_nodes(reactor)

        ca_directory = FilePath(mkdtemp())
        print("Generating certificates in: {}".format(ca_directory.path))
        certificates = Certificates.generate(ca_directory, nodes[0].address,
                                             len(nodes))

        yield perform(
            make_dispatcher(reactor),
            parallel([
                run_remotely(
                    username='******',
                    address=node.address,
                    commands=task_pull_docker_images()
                ) for node in nodes
            ]),
        )

        control_node = nodes[0]
        dataset_backend = options.dataset_backend

        yield perform(
            make_dispatcher(reactor),
            configure_cluster(control_node=control_node, agent_nodes=nodes,
                              certificates=certificates,
                              dataset_backend=dataset_backend))

        result = yield run_tests(
            reactor=reactor,
            nodes=nodes,
            control_node=control_node,
            agent_nodes=nodes,
            dataset_backend=dataset_backend,
            trial_args=options['trial-args'],
            certificates_path=ca_directory)
    except:
        result = 1
        raise
    finally:
        # Unless the tests failed, and the user asked to keep the nodes, we
        # delete them.
        if not (result != 0 and options['keep']):
            runner.stop_nodes(reactor)
        elif options['keep']:
            print "--keep specified, not destroying nodes."
            print ("To run acceptance tests against these nodes, "
                   "set the following environment variables: ")

            environment_variables = {
                'FLOCKER_ACCEPTANCE_NODES':
                    ':'.join(node.address for node in nodes),
                'FLOCKER_ACCEPTANCE_CONTROL_NODE': control_node.address,
                'FLOCKER_ACCEPTANCE_AGENT_NODES':
                    ':'.join(node.address for node in nodes),
                'FLOCKER_ACCEPTANCE_VOLUME_BACKEND': dataset_backend.name,
                'FLOCKER_ACCEPTANCE_API_CERTIFICATES_PATH': ca_directory.path,
            }

            for environment_variable in environment_variables:
                print "export {name}={value};".format(
                    name=environment_variable,
                    value=environment_variables[environment_variable],
                )

    raise SystemExit(result)
Example #47
0
    def start_cluster(self, reactor):
        """
        Provision cloud cluster for acceptance tests.

        :return Cluster: The cluster to connect to for acceptance tests.
        """
        metadata = {
            'distribution': self.distribution,
        }
        metadata.update(self.identity.metadata)
        metadata.update(self.metadata)

        # Try to make names unique even if the same creator is starting
        # multiple clusters at the same time.  This lets other code use the
        # name as a way to identify nodes.  This is only necessary in one
        # place, the node creation code, to perform cleanup when the create
        # operation fails in a way such that it isn't clear if the instance has
        # been created or not.
        random_tag = os.urandom(8).encode("base64").strip("\n=")
        print "Assigning random tag:", random_tag

        for index in range(self.num_nodes):
            name = "%s-%s-%s-%d" % (
                self.identity.prefix, self.creator, random_tag, index,
            )
            try:
                print "Creating node %d: %s" % (index, name)
                node = self.provisioner.create_node(
                    name=name,
                    distribution=self.distribution,
                    metadata=metadata,
                )
            except:
                print "Error creating node %d: %s" % (index, name)
                print "It may have leaked into the cloud."
                raise

            yield remove_known_host(reactor, node.address)
            self.nodes.append(node)
            del node

        commands = parallel([
            node.provision(package_source=self.package_source,
                           variants=self.variants)
            for node in self.nodes
        ])
        if self.dataset_backend == DatasetBackend.zfs:
            zfs_commands = parallel([
                configure_zfs(node, variants=self.variants)
                for node in self.nodes
            ])
            commands = commands.on(success=lambda _: zfs_commands)

        yield perform(make_dispatcher(reactor), commands)

        cluster = yield configured_cluster_for_nodes(
            reactor,
            generate_certificates(
                self.identity.name,
                self.identity.id,
                self.nodes,
                self.cert_path,
            ),
            self.nodes,
            self.dataset_backend,
            self.dataset_backend_configuration,
            _save_backend_configuration(self.dataset_backend,
                                        self.dataset_backend_configuration),
            logging_config=self.config.get('logging'),
        )

        returnValue(cluster)
Example #48
0
def main(reactor, args, base_path, top_level):
    """
    :param reactor: Reactor to use.
    :param list args: The arguments passed to the script.
    :param FilePath base_path: The executable being run.
    :param FilePath top_level: The top-level of the flocker repository.
    """
    options = RunOptions(top_level=top_level)

    add_destination(eliot_output)
    try:
        options.parseOptions(args)
    except UsageError as e:
        sys.stderr.write("%s: %s\n" % (base_path.basename(), e))
        raise SystemExit(1)

    runner = options.runner

    from flocker.common.script import eliot_logging_service
    log_writer = eliot_logging_service(
        destination=FileDestination(
            file=open("%s.log" % (base_path.basename(),), "a")
        ),
        reactor=reactor,
        capture_stdout=False)
    log_writer.startService()
    reactor.addSystemEventTrigger(
        'before', 'shutdown', log_writer.stopService)

    cluster = None
    results = []

    setup_succeeded = False
    reached_finally = False

    def cluster_cleanup():
        if not reached_finally:
            print "interrupted..."
        print "stopping cluster"
        return runner.stop_cluster(reactor)

    cleanup_trigger_id = reactor.addSystemEventTrigger('before', 'shutdown',
                                                       cluster_cleanup)

    try:
        yield runner.ensure_keys(reactor)
        cluster = yield runner.start_cluster(reactor)
        if options['distribution'] in ('centos-7',):
            remote_logs_file = open("remote_logs.log", "a")
            for node in cluster.all_nodes:
                results.append(capture_journal(reactor,
                                               node.address,
                                               remote_logs_file)
                               )
        elif options['distribution'] in ('ubuntu-14.04',):
            remote_logs_file = open("remote_logs.log", "a")
            for node in cluster.all_nodes:
                results.append(capture_upstart(reactor,
                                               node.address,
                                               remote_logs_file)
                               )
        gather_deferreds(results)

        if not options["no-pull"]:
            yield perform(
                make_dispatcher(reactor),
                parallel([
                    run_remotely(
                        username='******',
                        address=node.address,
                        commands=task_pull_docker_images()
                    ) for node in cluster.agent_nodes
                ]),
            )

        setup_succeeded = True
        result = yield run_tests(
            reactor=reactor,
            cluster=cluster,
            trial_args=options['trial-args'])

    finally:
        reached_finally = True
        # We delete the nodes if the user hasn't asked to keep them
        # or if we failed to provision the cluster.
        if not setup_succeeded:
            print "cluster provisioning failed"
        elif not options['keep']:
            print "not keeping cluster"
        else:
            print "--keep specified, not destroying nodes."
            print ("To run acceptance tests against these nodes, "
                   "set the following environment variables: ")

            environment_variables = get_trial_environment(cluster)

            for environment_variable in environment_variables:
                print "export {name}={value};".format(
                    name=environment_variable,
                    value=shell_quote(
                        environment_variables[environment_variable]),
                )
            reactor.removeSystemEventTrigger(cleanup_trigger_id)

    raise SystemExit(result)
Example #49
0
def main(reactor, args, base_path, top_level):
    """
    :param reactor: Reactor to use.
    :param list args: The arguments passed to the script.
    :param FilePath base_path: The executable being run.
    :param FilePath top_level: The top-level of the flocker repository.
    """
    options = RunOptions(top_level=top_level)

    add_destination(eliot_output)
    try:
        options.parseOptions(args)
    except UsageError as e:
        sys.stderr.write("%s: %s\n" % (base_path.basename(), e))
        raise SystemExit(1)

    runner = options.runner

    from flocker.common.script import eliot_logging_service
    log_file = open("%s.log" % base_path.basename(), "a")
    log_writer = eliot_logging_service(log_file=log_file,
                                       reactor=reactor,
                                       capture_stdout=False)
    log_writer.startService()
    reactor.addSystemEventTrigger('before', 'shutdown', log_writer.stopService)

    cluster = None
    try:
        cluster = yield runner.start_cluster(reactor)

        if options['distribution'] in ('centos-7', ):
            remote_logs_file = open("remote_logs.log", "a")
            for node in cluster.all_nodes:
                capture_journal(reactor, node.address, remote_logs_file)

        if not options["no-pull"]:
            yield perform(
                make_dispatcher(reactor),
                parallel([
                    run_remotely(username='******',
                                 address=node.address,
                                 commands=task_pull_docker_images())
                    for node in cluster.agent_nodes
                ]),
            )

        result = yield run_tests(reactor=reactor,
                                 cluster=cluster,
                                 trial_args=options['trial-args'])
    except:
        result = 1
        raise
    finally:
        # Unless the tests failed, and the user asked to keep the nodes, we
        # delete them.
        if not options['keep']:
            runner.stop_cluster(reactor)
        else:
            print "--keep specified, not destroying nodes."
            if cluster is None:
                print("Didn't finish creating the cluster.")
            else:
                print(
                    "To run acceptance tests against these nodes, "
                    "set the following environment variables: ")

                environment_variables = get_trial_environment(cluster)

                for environment_variable in environment_variables:
                    print "export {name}={value};".format(
                        name=environment_variable,
                        value=shell_quote(
                            environment_variables[environment_variable]),
                    )

    raise SystemExit(result)
Example #50
0
def get_clb_contents():
    """
    Get Rackspace Cloud Load Balancer contents as list of `CLBNode`. CLB
    health monitor information is also returned as a pmap of :obj:`CLB` objects
    mapped on LB ID.

    :return: Effect of (``list`` of :obj:`CLBNode`, `pmap` of :obj:`CLB`)
    :rtype: :obj:`Effect`
    """

    # If we get a CLBNotFoundError while fetching feeds, we should throw away
    # all nodes related to that load balancer, because we don't want to act on
    # data that we know is invalid/outdated (for example, if we can't fetch a
    # feed because CLB was deleted, we don't want to say that we have a node in
    # DRAINING with draining time of 0; we should just say that the node is
    # gone).

    def gone(r):
        return catch(CLBNotFoundError, lambda exc: r)

    lb_ids = [lb['id'] for lb in (yield _retry(get_clbs()))]
    node_reqs = [
        _retry(get_clb_nodes(lb_id).on(error=gone([]))) for lb_id in lb_ids
    ]
    healthmon_reqs = [
        _retry(get_clb_health_monitor(lb_id).on(error=gone(None)))
        for lb_id in lb_ids
    ]
    all_nodes_hms = yield parallel(node_reqs + healthmon_reqs)
    all_nodes, hms = all_nodes_hms[:len(lb_ids)], all_nodes_hms[len(lb_ids):]
    lb_nodes = {
        lb_id: [CLBNode.from_node_json(lb_id, node) for node in nodes]
        for lb_id, nodes in zip(lb_ids, all_nodes)
    }
    clbs = {
        str(lb_id): CLB(bool(health_mon))
        for lb_id, health_mon in zip(lb_ids, hms) if health_mon is not None
    }
    draining = [
        n for n in concat(lb_nodes.values())
        if n.description.condition == CLBNodeCondition.DRAINING
    ]
    feeds = yield parallel([
        _retry(
            get_clb_node_feed(n.description.lb_id,
                              n.node_id).on(error=gone(None)))
        for n in draining
    ])
    nodes_to_feeds = dict(zip(draining, feeds))
    deleted_lbs = set([
        node.description.lb_id for (node, feed) in nodes_to_feeds.items()
        if feed is None
    ])

    def update_drained_at(node):
        feed = nodes_to_feeds.get(node)
        if node.description.lb_id in deleted_lbs:
            return None
        if feed is not None:
            node.drained_at = extract_clb_drained_at(feed)
        return node

    nodes = map(update_drained_at, concat(lb_nodes.values()))
    yield do_return((list(filter(bool, nodes)),
                     pmap(keyfilter(lambda k: k not in deleted_lbs, clbs))))
Example #51
0
def main(reactor, args, base_path, top_level):
    """
    :param reactor: Reactor to use.
    :param list args: The arguments passed to the script.
    :param FilePath base_path: The executable being run.
    :param FilePath top_level: The top-level of the flocker repository.
    """
    options = RunOptions(top_level=top_level)

    add_destination(eliot_output)
    try:
        options.parseOptions(args)
    except UsageError as e:
        sys.stderr.write("%s: %s\n" % (base_path.basename(), e))
        raise SystemExit(1)

    runner = options.runner

    from flocker.common.script import eliot_logging_service
    log_writer = eliot_logging_service(
        destination=FileDestination(
            file=open("%s.log" % (base_path.basename(),), "a")
        ),
        reactor=reactor,
        capture_stdout=False)
    log_writer.startService()
    reactor.addSystemEventTrigger(
        'before', 'shutdown', log_writer.stopService)

    cluster = None
    try:
        yield runner.ensure_keys(reactor)
        cluster = yield runner.start_cluster(reactor)

        if options['distribution'] in ('centos-7',):
            remote_logs_file = open("remote_logs.log", "a")
            for node in cluster.all_nodes:
                capture_journal(reactor, node.address, remote_logs_file)

        if not options["no-pull"]:
            yield perform(
                make_dispatcher(reactor),
                parallel([
                    run_remotely(
                        username='******',
                        address=node.address,
                        commands=task_pull_docker_images()
                    ) for node in cluster.agent_nodes
                ]),
            )

        result = yield run_tests(
            reactor=reactor,
            cluster=cluster,
            trial_args=options['trial-args'])
    except:
        result = 1
        raise
    finally:
        # Unless the tests failed, and the user asked to keep the nodes, we
        # delete them.
        if not options['keep']:
            runner.stop_cluster(reactor)
        else:
            print "--keep specified, not destroying nodes."
            if cluster is None:
                print ("Didn't finish creating the cluster.")
            else:
                print ("To run acceptance tests against these nodes, "
                       "set the following environment variables: ")

                environment_variables = get_trial_environment(cluster)

                for environment_variable in environment_variables:
                    print "export {name}={value};".format(
                        name=environment_variable,
                        value=shell_quote(
                            environment_variables[environment_variable]),
                    )

    raise SystemExit(result)
Example #52
0
def converge_all_groups(currently_converging,
                        recently_converged,
                        waiting,
                        my_buckets,
                        all_buckets,
                        divergent_flags,
                        build_timeout,
                        interval,
                        limited_retry_iterations,
                        step_limits,
                        converge_one_group=converge_one_group):
    """
    Check for groups that need convergence and which match up to the
    buckets we've been allocated.

    :param Reference currently_converging: pset of currently converging groups
    :param Reference recently_converged: pmap of group ID to time last
        convergence finished
    :param Reference waiting: pmap of group ID to number of iterations already
        waited
    :param my_buckets: The buckets that should be checked for group IDs to
        converge on.
    :param all_buckets: The set of all buckets that can be checked for group
        IDs to converge on.  ``my_buckets`` should be a subset of this.
    :param divergent_flags: divergent flags that were found in zookeeper.
    :param number build_timeout: number of seconds to wait for servers to be in
        building before it's is timed out and deleted
    :param number interval: number of seconds between attempts at convergence.
        Groups will not be converged if less than this amount of time has
        passed since the end of its last convergence.
    :param int limited_retry_iterations: number of iterations to wait for
        LIMITED_RETRY steps
    :param dict step_limits: Mapping of step class to number of executions
        allowed in a convergence cycle
    :param callable converge_one_group: function to use to converge a single
        group - to be used for test injection only
    """
    group_infos = get_my_divergent_groups(my_buckets, all_buckets,
                                          divergent_flags)
    # filter out currently converging groups
    cc = yield currently_converging.read()
    group_infos = [info for info in group_infos if info['group_id'] not in cc]
    if not group_infos:
        return
    yield msg('converge-all-groups',
              group_infos=group_infos,
              currently_converging=list(cc))

    @do
    def converge(tenant_id, group_id, dirty_flag):
        stat = yield Effect(GetStat(dirty_flag))
        # If the node disappeared, ignore it. `stat` will be None here if the
        # divergent flag was discovered only after the group is removed from
        # currently_converging, but before the divergent flag is deleted, and
        # then the deletion happens, and then our GetStat happens. This
        # basically means it happens when one convergence is starting as
        # another one for the same group is ending.
        if stat is None:
            yield msg('converge-divergent-flag-disappeared', znode=dirty_flag)
        else:
            eff = converge_one_group(currently_converging, recently_converged,
                                     waiting, tenant_id, group_id,
                                     stat.version, build_timeout,
                                     limited_retry_iterations, step_limits)
            result = yield Effect(TenantScope(eff, tenant_id))
            yield do_return(result)

    recent_groups = yield get_recently_converged_groups(
        recently_converged, interval)
    effs = []
    for info in group_infos:
        tenant_id, group_id = info['tenant_id'], info['group_id']
        if group_id in recent_groups:
            # Don't converge a group if it has recently been converged.
            continue
        eff = converge(tenant_id, group_id, info['dirty-flag'])
        effs.append(
            with_log(eff, tenant_id=tenant_id, scaling_group_id=group_id))

    yield do_return(parallel(effs))
Example #53
0
def converge_all_groups(
        currently_converging, recently_converged, waiting,
        my_buckets, all_buckets,
        divergent_flags, build_timeout, interval,
        limited_retry_iterations, step_limits,
        converge_one_group=converge_one_group):
    """
    Check for groups that need convergence and which match up to the
    buckets we've been allocated.

    :param Reference currently_converging: pset of currently converging groups
    :param Reference recently_converged: pmap of group ID to time last
        convergence finished
    :param Reference waiting: pmap of group ID to number of iterations already
        waited
    :param my_buckets: The buckets that should be checked for group IDs to
        converge on.
    :param all_buckets: The set of all buckets that can be checked for group
        IDs to converge on.  ``my_buckets`` should be a subset of this.
    :param divergent_flags: divergent flags that were found in zookeeper.
    :param number build_timeout: number of seconds to wait for servers to be in
        building before it's is timed out and deleted
    :param number interval: number of seconds between attempts at convergence.
        Groups will not be converged if less than this amount of time has
        passed since the end of its last convergence.
    :param int limited_retry_iterations: number of iterations to wait for
        LIMITED_RETRY steps
    :param dict step_limits: Mapping of step class to number of executions
        allowed in a convergence cycle
    :param callable converge_one_group: function to use to converge a single
        group - to be used for test injection only
    """
    group_infos = get_my_divergent_groups(
        my_buckets, all_buckets, divergent_flags)
    # filter out currently converging groups
    cc = yield currently_converging.read()
    group_infos = [info for info in group_infos if info['group_id'] not in cc]
    if not group_infos:
        return
    yield msg('converge-all-groups', group_infos=group_infos,
              currently_converging=list(cc))

    @do
    def converge(tenant_id, group_id, dirty_flag):
        stat = yield Effect(GetStat(dirty_flag))
        # If the node disappeared, ignore it. `stat` will be None here if the
        # divergent flag was discovered only after the group is removed from
        # currently_converging, but before the divergent flag is deleted, and
        # then the deletion happens, and then our GetStat happens. This
        # basically means it happens when one convergence is starting as
        # another one for the same group is ending.
        if stat is None:
            yield msg('converge-divergent-flag-disappeared', znode=dirty_flag)
        else:
            eff = converge_one_group(currently_converging, recently_converged,
                                     waiting,
                                     tenant_id, group_id,
                                     stat.version, build_timeout,
                                     limited_retry_iterations, step_limits)
            result = yield Effect(TenantScope(eff, tenant_id))
            yield do_return(result)

    recent_groups = yield get_recently_converged_groups(recently_converged,
                                                        interval)
    effs = []
    for info in group_infos:
        tenant_id, group_id = info['tenant_id'], info['group_id']
        if group_id in recent_groups:
            # Don't converge a group if it has recently been converged.
            continue
        eff = converge(tenant_id, group_id, info['dirty-flag'])
        effs.append(
            with_log(eff, tenant_id=tenant_id, scaling_group_id=group_id))

    yield do_return(parallel(effs))