Exemple #1
0
    def launch_instances(self, ctx, cluster, target_count):
        # create all instances
        cluster = conductor.cluster_update(ctx, cluster,
                                           {"status": self.STAGES[0]})
        LOG.info(g.format_cluster_status(cluster))

        tmpl = heat.ClusterTemplate(cluster)

        self._configure_template(ctx, tmpl, cluster, target_count)
        stack = tmpl.instantiate(update_existing=self.UPDATE_STACK)
        stack.wait_till_active()

        self.inst_ids = self._populate_cluster(ctx, cluster, stack)

        # wait for all instances are up and networks ready
        cluster = conductor.cluster_update(ctx, cluster,
                                           {"status": self.STAGES[1]})
        LOG.info(g.format_cluster_status(cluster))

        instances = g.get_instances(cluster, self.inst_ids)

        self._await_networks(cluster, instances)

        # prepare all instances
        cluster = conductor.cluster_update(ctx, cluster,
                                           {"status": self.STAGES[2]})
        LOG.info(g.format_cluster_status(cluster))

        instances = g.get_instances(cluster, self.inst_ids)
        volumes.mount_to_instances(instances)

        self._configure_instances(cluster)
Exemple #2
0
    def launch_instances(self, ctx, cluster, target_count):
        # create all instances
        cluster = conductor.cluster_update(ctx, cluster, {"status": self.STAGES[0]})
        LOG.info(g.format_cluster_status(cluster))

        tmpl = heat.ClusterTemplate(cluster)

        self._configure_template(ctx, tmpl, cluster, target_count)
        stack = tmpl.instantiate(update_existing=self.UPDATE_STACK)
        stack.wait_till_active()

        self.inst_ids = self._populate_cluster(ctx, cluster, stack)

        # wait for all instances are up and networks ready
        cluster = conductor.cluster_update(ctx, cluster, {"status": self.STAGES[1]})
        LOG.info(g.format_cluster_status(cluster))

        instances = g.get_instances(cluster, self.inst_ids)

        self._await_networks(cluster, instances)

        # prepare all instances
        cluster = conductor.cluster_update(ctx, cluster, {"status": self.STAGES[2]})
        LOG.info(g.format_cluster_status(cluster))

        instances = g.get_instances(cluster, self.inst_ids)
        volumes.mount_to_instances(instances)

        self._configure_instances(cluster)
Exemple #3
0
    def scale_cluster(self, cluster, node_group_id_map):
        ctx = context.ctx()

        instance_ids = []
        try:
            instance_ids = self._scale_cluster_instances(cluster,
                                                         node_group_id_map)

            cluster = conductor.cluster_get(ctx, cluster)
            g.clean_cluster_from_empty_ng(cluster)

            cluster = conductor.cluster_get(ctx, cluster)
            instances = g.get_instances(cluster, instance_ids)

            self._await_active(cluster, instances)

            self._assign_floating_ips(instances)

            self._await_networks(cluster, instances)

            cluster = conductor.cluster_get(ctx, cluster)

            volumes.attach_to_instances(
                g.get_instances(cluster, instance_ids))

        except Exception as ex:
            with excutils.save_and_reraise_exception():
                self._log_operation_exception(
                    "Can't scale cluster '%s' (reason: %s)", cluster, ex)

                cluster = conductor.cluster_get(ctx, cluster)
                self._rollback_cluster_scaling(
                    cluster, g.get_instances(cluster, instance_ids), ex)
                instance_ids = []

                cluster = conductor.cluster_get(ctx, cluster)
                g.clean_cluster_from_empty_ng(cluster)
                if cluster.status == 'Decommissioning':
                    cluster = conductor.cluster_update(ctx, cluster,
                                                       {"status": "Error"})
                else:
                    cluster = conductor.cluster_update(ctx, cluster,
                                                       {"status": "Active"})

                LOG.info(g.format_cluster_status(cluster))

        # we should be here with valid cluster: if instances creation
        # was not successful all extra-instances will be removed above
        if instance_ids:
            self._configure_instances(cluster)
        return instance_ids
Exemple #4
0
    def scale_cluster(self, cluster, node_group_id_map):
        ctx = context.ctx()

        instance_ids = []
        try:
            instance_ids = self._scale_cluster_instances(
                cluster, node_group_id_map)

            cluster = conductor.cluster_get(ctx, cluster)
            g.clean_cluster_from_empty_ng(cluster)

            cluster = conductor.cluster_get(ctx, cluster)
            instances = g.get_instances(cluster, instance_ids)

            self._await_active(cluster, instances)

            self._assign_floating_ips(instances)

            self._await_networks(cluster, instances)

            cluster = conductor.cluster_get(ctx, cluster)

            volumes.attach_to_instances(g.get_instances(cluster, instance_ids))

        except Exception as ex:
            with excutils.save_and_reraise_exception():
                self._log_operation_exception(
                    "Can't scale cluster '%s' (reason: %s)", cluster, ex)

                cluster = conductor.cluster_get(ctx, cluster)
                self._rollback_cluster_scaling(
                    cluster, g.get_instances(cluster, instance_ids), ex)
                instance_ids = []

                cluster = conductor.cluster_get(ctx, cluster)
                g.clean_cluster_from_empty_ng(cluster)
                if cluster.status == 'Decommissioning':
                    cluster = conductor.cluster_update(ctx, cluster,
                                                       {"status": "Error"})
                else:
                    cluster = conductor.cluster_update(ctx, cluster,
                                                       {"status": "Active"})

                LOG.info(g.format_cluster_status(cluster))

        # we should be here with valid cluster: if instances creation
        # was not successful all extra-instances will be removed above
        if instance_ids:
            self._configure_instances(cluster)
        return instance_ids
Exemple #5
0
    def _await_networks(self, cluster, instances):
        if not instances:
            return

        ips_assigned = set()
        while len(ips_assigned) != len(instances):
            if not g.check_cluster_exists(instances[0].node_group.cluster):
                return
            for instance in instances:
                if instance.id not in ips_assigned:
                    if networks.init_instances_ips(instance):
                        ips_assigned.add(instance.id)

            context.sleep(1)

        LOG.info("Cluster '%s': all instances have IPs assigned" % cluster.id)

        ctx = context.ctx()
        cluster = conductor.cluster_get(ctx, instances[0].node_group.cluster)
        instances = g.get_instances(cluster, ips_assigned)

        with context.ThreadGroup() as tg:
            for instance in instances:
                tg.spawn("wait-for-ssh-%s" % instance.instance_name,
                         self._wait_until_accessible, instance)

        LOG.info("Cluster '%s': all instances are accessible" % cluster.id)
Exemple #6
0
    def _await_networks(self, cluster, instances):
        if not instances:
            return

        ips_assigned = set()
        while len(ips_assigned) != len(instances):
            if not g.check_cluster_exists(instances[0].node_group.cluster):
                return
            for instance in instances:
                if instance.id not in ips_assigned:
                    if networks.init_instances_ips(instance):
                        ips_assigned.add(instance.id)

            context.sleep(1)

        LOG.info("Cluster '%s': all instances have IPs assigned" % cluster.id)

        ctx = context.ctx()
        cluster = conductor.cluster_get(ctx, instances[0].node_group.cluster)
        instances = g.get_instances(cluster, ips_assigned)

        with context.ThreadGroup() as tg:
            for instance in instances:
                tg.spawn("wait-for-ssh-%s" % instance.instance_name,
                         self._wait_until_accessible, instance)

        LOG.info("Cluster '%s': all instances are accessible" % cluster.id)
Exemple #7
0
    def shutdown_cluster(self, cluster):
        """Shutdown specified cluster and all related resources."""
        try:
            heat.client().stacks.delete(cluster.name)
        except heat_exc.HTTPNotFound:
            LOG.warn("Did not found stack for cluster %s" % cluster.name)

        self._clean_job_executions(cluster)

        ctx = context.ctx()
        instances = g.get_instances(cluster)
        for inst in instances:
            conductor.instance_remove(ctx, inst)
Exemple #8
0
    def shutdown_cluster(self, cluster):
        """Shutdown specified cluster and all related resources."""
        try:
            heat.client().stacks.delete(cluster.name)
        except heat_exc.HTTPNotFound:
            LOG.warn('Did not found stack for cluster %s' % cluster.name)

        self._clean_job_executions(cluster)

        ctx = context.ctx()
        instances = g.get_instances(cluster)
        for inst in instances:
            conductor.instance_remove(ctx, inst)
Exemple #9
0
    def _populate_cluster(self, ctx, cluster, stack):
        old_ids = [i.instance_id for i in g.get_instances(cluster)]

        new_ids = []

        for node_group in cluster.node_groups:
            nova_ids = stack.get_node_group_instances(node_group)
            for name, nova_id in nova_ids:
                if nova_id not in old_ids:
                    instance_id = conductor.instance_add(
                        ctx, node_group, {"instance_id": nova_id, "instance_name": name}
                    )
                    new_ids.append(instance_id)

        return new_ids
Exemple #10
0
def _provision_scaled_cluster(id, node_group_id_map):
    ctx = context.ctx()
    cluster = conductor.cluster_get(ctx, id)
    plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name)

    # Decommissioning surplus nodes with the plugin

    cluster = conductor.cluster_update(ctx, cluster,
                                       {"status": "Decommissioning"})
    LOG.info(g.format_cluster_status(cluster))

    instances_to_delete = []

    for node_group in cluster.node_groups:
        new_count = node_group_id_map[node_group.id]
        if new_count < node_group.count:
            instances_to_delete += node_group.instances[new_count:
                                                        node_group.count]

    if instances_to_delete:
        plugin.decommission_nodes(cluster, instances_to_delete)

    # Scaling infrastructure
    cluster = conductor.cluster_update(ctx, cluster, {"status": "Scaling"})
    LOG.info(g.format_cluster_status(cluster))

    instances = INFRA.scale_cluster(cluster, node_group_id_map)

    # Setting up new nodes with the plugin

    if instances:
        cluster = conductor.cluster_update(ctx, cluster,
                                           {"status": "Configuring"})
        LOG.info(g.format_cluster_status(cluster))
        try:
            instances = g.get_instances(cluster, instances)
            plugin.scale_cluster(cluster, instances)
        except Exception as ex:
            LOG.exception("Can't scale cluster '%s' (reason: %s)",
                          cluster.name, ex)
            cluster = conductor.cluster_update(ctx, cluster,
                                               {"status": "Error"})
            LOG.info(g.format_cluster_status(cluster))
            return

    cluster = conductor.cluster_update(ctx, cluster, {"status": "Active"})
    LOG.info(g.format_cluster_status(cluster))
Exemple #11
0
def _provision_scaled_cluster(id, node_group_id_map):
    ctx = context.ctx()
    cluster = conductor.cluster_get(ctx, id)
    plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name)

    # Decommissioning surplus nodes with the plugin

    cluster = conductor.cluster_update(ctx, cluster,
                                       {"status": "Decommissioning"})
    LOG.info(g.format_cluster_status(cluster))

    instances_to_delete = []

    for node_group in cluster.node_groups:
        new_count = node_group_id_map[node_group.id]
        if new_count < node_group.count:
            instances_to_delete += node_group.instances[new_count:node_group.
                                                        count]

    if instances_to_delete:
        plugin.decommission_nodes(cluster, instances_to_delete)

    # Scaling infrastructure
    cluster = conductor.cluster_update(ctx, cluster, {"status": "Scaling"})
    LOG.info(g.format_cluster_status(cluster))

    instances = INFRA.scale_cluster(cluster, node_group_id_map)

    # Setting up new nodes with the plugin

    if instances:
        cluster = conductor.cluster_update(ctx, cluster,
                                           {"status": "Configuring"})
        LOG.info(g.format_cluster_status(cluster))
        try:
            instances = g.get_instances(cluster, instances)
            plugin.scale_cluster(cluster, instances)
        except Exception as ex:
            LOG.exception("Can't scale cluster '%s' (reason: %s)",
                          cluster.name, ex)
            cluster = conductor.cluster_update(ctx, cluster,
                                               {"status": "Error"})
            LOG.info(g.format_cluster_status(cluster))
            return

    cluster = conductor.cluster_update(ctx, cluster, {"status": "Active"})
    LOG.info(g.format_cluster_status(cluster))
Exemple #12
0
    def _populate_cluster(self, ctx, cluster, stack):
        old_ids = [i.instance_id for i in g.get_instances(cluster)]

        new_ids = []

        for node_group in cluster.node_groups:
            nova_ids = stack.get_node_group_instances(node_group)
            for name, nova_id in nova_ids:
                if nova_id not in old_ids:
                    instance_id = conductor.instance_add(
                        ctx, node_group, {
                            "instance_id": nova_id,
                            "instance_name": name
                        })
                    new_ids.append(instance_id)

        return new_ids
Exemple #13
0
    def test_delete_floating_ips(self, novaclient):

        nova = _create_nova_mock(novaclient)

        node_groups = [_make_ng_dict("test_group_1", "test_flavor", ["data node", "test tracker"], 2, "pool")]

        ctx = context.ctx()
        cluster = _create_cluster_mock(node_groups, ["datanode"])
        self.engine._create_instances(cluster)

        cluster = conductor.cluster_get(ctx, cluster)
        instances_list = g.get_instances(cluster)

        self.engine._assign_floating_ips(instances_list)

        self.engine._shutdown_instances(cluster)
        self.assertEqual(nova.floating_ips.delete.call_count, 2, "Not expected floating IPs number found in delete")
        self.assertEqual(nova.servers.delete.call_count, 2, "Not expected")
Exemple #14
0
    def test_delete_floating_ips(self):
        node_groups = [_make_ng_dict("test_group_1", "test_flavor",
                                     ["data node", "test tracker"], 2, 'pool')]

        ctx = context.ctx()
        cluster = _create_cluster_mock(node_groups, ["datanode"])
        self.engine._create_instances(cluster)

        cluster = conductor.cluster_get(ctx, cluster)
        instances_list = g.get_instances(cluster)

        self.engine._assign_floating_ips(instances_list)

        self.engine._shutdown_instances(cluster)
        self.assertEqual(self.nova.floating_ips.delete.call_count, 2,
                         "Not expected floating IPs number found in delete")
        self.assertEqual(self.nova.servers.delete.call_count, 2,
                         "Not expected")
Exemple #15
0
    def create_cluster(self, cluster):
        ctx = context.ctx()
        try:
            # create all instances
            conductor.cluster_update(ctx, cluster, {"status": "Spawning"})
            LOG.info(g.format_cluster_status(cluster))
            self._create_instances(cluster)

            # wait for all instances are up and networks ready
            cluster = conductor.cluster_update(ctx, cluster,
                                               {"status": "Waiting"})
            LOG.info(g.format_cluster_status(cluster))

            instances = g.get_instances(cluster)

            self._await_active(cluster, instances)

            self._assign_floating_ips(instances)

            self._await_networks(cluster, instances)

            cluster = conductor.cluster_get(ctx, cluster)

            # attach volumes
            volumes.attach(cluster)

            # prepare all instances
            cluster = conductor.cluster_update(ctx, cluster,
                                               {"status": "Preparing"})
            LOG.info(g.format_cluster_status(cluster))

            self._configure_instances(cluster)
        except Exception as ex:
            with excutils.save_and_reraise_exception():
                self._log_operation_exception(
                    "Can't start cluster '%s' (reason: %s)", cluster, ex)

                cluster = conductor.cluster_update(
                    ctx, cluster, {
                        "status": "Error",
                        "status_description": str(ex)
                    })
                LOG.info(g.format_cluster_status(cluster))
                self._rollback_cluster_creation(cluster, ex)
Exemple #16
0
    def create_cluster(self, cluster):
        ctx = context.ctx()
        try:
            # create all instances
            conductor.cluster_update(ctx, cluster, {"status": "Spawning"})
            LOG.info(g.format_cluster_status(cluster))
            self._create_instances(cluster)

            # wait for all instances are up and networks ready
            cluster = conductor.cluster_update(ctx, cluster,
                                               {"status": "Waiting"})
            LOG.info(g.format_cluster_status(cluster))

            instances = g.get_instances(cluster)

            self._await_active(cluster, instances)

            self._assign_floating_ips(instances)

            self._await_networks(cluster, instances)

            cluster = conductor.cluster_get(ctx, cluster)

            # attach volumes
            volumes.attach(cluster)

            # prepare all instances
            cluster = conductor.cluster_update(ctx, cluster,
                                               {"status": "Preparing"})
            LOG.info(g.format_cluster_status(cluster))

            self._configure_instances(cluster)
        except Exception as ex:
            with excutils.save_and_reraise_exception():
                self._log_operation_exception(
                    "Can't start cluster '%s' (reason: %s)", cluster, ex)

                cluster = conductor.cluster_update(
                    ctx, cluster, {"status": "Error",
                                   "status_description": str(ex)})
                LOG.info(g.format_cluster_status(cluster))
                self._rollback_cluster_creation(cluster, ex)
Exemple #17
0
    def test_ip_assignment_use_no_floating(self):
        self.override_config("use_floating_ips", False)

        node_groups = [_make_ng_dict("test_group_1", "test_flavor",
                                     ["data node", "test tracker"], 2,
                                     'pool'),
                       _make_ng_dict("test_group_2", "test_flavor",
                                     ["name node", "test tracker"], 1)]

        ctx = context.ctx()
        cluster = _create_cluster_mock(node_groups, ["data node"])
        self.engine._create_instances(cluster)

        cluster = conductor.cluster_get(ctx, cluster)
        instances_list = g.get_instances(cluster)

        self.engine._assign_floating_ips(instances_list)

        self.nova.floating_ips.create.assert_has_calls(
            [mock.call("pool"), mock.call("pool")])

        self.assertEqual(self.nova.floating_ips.create.call_count, 2,
                         "Not expected floating IPs number found.")
Exemple #18
0
    def test_ip_assignment_use_no_floating(self, cfg, novaclient):
        cfg.CONF.set_override("use_floating_ips", False)
        try:
            nova = _create_nova_mock(novaclient)

            node_groups = [
                _make_ng_dict("test_group_1", "test_flavor", ["data node", "test tracker"], 2, "pool"),
                _make_ng_dict("test_group_2", "test_flavor", ["name node", "test tracker"], 1),
            ]

            ctx = context.ctx()
            cluster = _create_cluster_mock(node_groups, ["data node"])
            self.engine._create_instances(cluster)

            cluster = conductor.cluster_get(ctx, cluster)
            instances_list = g.get_instances(cluster)

            self.engine._assign_floating_ips(instances_list)

            nova.floating_ips.create.assert_has_calls([mock.call("pool"), mock.call("pool")])

            self.assertEqual(nova.floating_ips.create.call_count, 2, "Not expected floating IPs number found.")
        finally:
            cfg.CONF.clear_override("use_floating_ips")