Exemple #1
0
    def create_cluster(self, cluster):
        ctx = context.ctx()
        self._update_rollback_strategy(cluster, shutdown=True)

        # create all instances
        cluster = g.change_cluster_status(cluster, "Spawning")
        self._create_instances(cluster)

        # wait for all instances are up and networks ready
        cluster = g.change_cluster_status(cluster, "Waiting")
        instances = g.get_instances(cluster)

        self._await_active(cluster, instances)

        self._assign_floating_ips(instances)

        self._await_networks(cluster, instances)

        cluster = conductor.cluster_get(ctx, cluster)

        # attach volumes
        volumes.attach_to_instances(g.get_instances(cluster))

        # prepare all instances
        cluster = g.change_cluster_status(cluster, "Preparing")

        self._configure_instances(cluster)

        self._update_rollback_strategy(cluster)
Exemple #2
0
    def _launch_instances(self,
                          cluster,
                          target_count,
                          stages,
                          update_stack=False,
                          disable_rollback=True):
        # create all instances
        cluster = g.change_cluster_status(cluster, stages[0])

        inst_ids = self._create_instances(cluster, target_count, update_stack,
                                          disable_rollback)

        # wait for all instances are up and networks ready
        cluster = g.change_cluster_status(cluster, stages[1])

        instances = g.get_instances(cluster, inst_ids)

        self._await_networks(cluster, instances)

        # prepare all instances
        cluster = g.change_cluster_status(cluster, stages[2])

        instances = g.get_instances(cluster, inst_ids)
        volumes.mount_to_instances(instances)

        self._configure_instances(cluster)

        return inst_ids
Exemple #3
0
    def test_get_instances(self):
        cluster = self._make_sample()
        ctx = context.ctx()
        idx = 0
        ids = []
        for ng in cluster.node_groups:
            for i in range(ng.count):
                idx += 1
                ids.append(self.api.instance_add(context.ctx(), ng, {
                    'instance_id': str(idx),
                    'instance_name': str(idx),
                }))
        cluster = self.api.cluster_get(ctx, cluster)
        instances = general.get_instances(cluster, ids)
        ids = set()
        for inst in instances:
            ids.add(inst.instance_id)
        self.assertEqual(idx, len(ids))
        for i in range(1, idx):
            self.assertIn(str(i), ids)

        instances = general.get_instances(cluster)
        ids = set()
        for inst in instances:
            ids.add(inst.instance_id)
        self.assertEqual(idx, len(ids))
        for i in range(1, idx):
            self.assertIn(str(i), ids)
Exemple #4
0
    def launch_instances(self, ctx, cluster, target_count):
        # create all instances
        cluster = conductor.cluster_update(ctx, cluster, {"status": self.STAGES[0]})
        LOG.info(g.format_cluster_status(cluster))

        tmpl = heat.ClusterTemplate(cluster)

        self._configure_template(ctx, tmpl, cluster, target_count)
        stack = tmpl.instantiate(update_existing=self.UPDATE_STACK)
        stack.wait_till_active()

        self.inst_ids = self._populate_cluster(ctx, cluster, stack)

        # wait for all instances are up and networks ready
        cluster = conductor.cluster_update(ctx, cluster, {"status": self.STAGES[1]})
        LOG.info(g.format_cluster_status(cluster))

        instances = g.get_instances(cluster, self.inst_ids)

        self._await_networks(cluster, instances)

        if not g.check_cluster_exists(cluster):
            LOG.info(g.format_cluster_deleted_message(cluster))
            return

        # prepare all instances
        cluster = conductor.cluster_update(ctx, cluster, {"status": self.STAGES[2]})
        LOG.info(g.format_cluster_status(cluster))

        instances = g.get_instances(cluster, self.inst_ids)
        volumes.mount_to_instances(instances)

        self._configure_instances(cluster)
Exemple #5
0
    def scale_cluster(self, cluster, node_group_id_map):
        ctx = context.ctx()
        cluster = g.change_cluster_status(cluster, "Scaling")

        instance_ids = self._scale_cluster_instances(cluster,
                                                     node_group_id_map)

        self._update_rollback_strategy(cluster, instance_ids=instance_ids)

        cluster = conductor.cluster_get(ctx, cluster)
        g.clean_cluster_from_empty_ng(cluster)

        cluster = conductor.cluster_get(ctx, cluster)
        instances = g.get_instances(cluster, instance_ids)

        self._await_active(cluster, instances)

        self._assign_floating_ips(instances)

        self._await_networks(cluster, instances)

        cluster = conductor.cluster_get(ctx, cluster)

        volumes.attach_to_instances(g.get_instances(cluster, instance_ids))

        # we should be here with valid cluster: if instances creation
        # was not successful all extra-instances will be removed above
        if instance_ids:
            self._configure_instances(cluster)

        self._update_rollback_strategy(cluster)

        return instance_ids
Exemple #6
0
    def launch_instances(self, cluster, target_count):
        # create all instances
        cluster = g.change_cluster_status(cluster, self.STAGES[0])

        tmpl = heat.ClusterTemplate(cluster)

        self._configure_template(tmpl, cluster, target_count)
        stack = tmpl.instantiate(update_existing=self.UPDATE_STACK,
                                 disable_rollback=self.DISABLE_ROLLBACK)
        heat.wait_stack_completion(stack.heat_stack)

        self.inst_ids = self._populate_cluster(cluster, stack)

        # wait for all instances are up and networks ready
        cluster = g.change_cluster_status(cluster, self.STAGES[1])

        instances = g.get_instances(cluster, self.inst_ids)

        self._await_networks(cluster, instances)

        # prepare all instances
        cluster = g.change_cluster_status(cluster, self.STAGES[2])

        instances = g.get_instances(cluster, self.inst_ids)
        volumes.mount_to_instances(instances)

        self._configure_instances(cluster)
Exemple #7
0
    def launch_instances(self, cluster, target_count):
        # create all instances
        cluster = g.change_cluster_status(cluster, self.STAGES[0])

        tmpl = heat.ClusterTemplate(cluster)

        self._configure_template(tmpl, cluster, target_count)
        stack = tmpl.instantiate(update_existing=self.UPDATE_STACK,
                                 disable_rollback=self.DISABLE_ROLLBACK)
        heat.wait_stack_completion(stack.heat_stack)

        self.inst_ids = self._populate_cluster(cluster, stack)

        # wait for all instances are up and networks ready
        cluster = g.change_cluster_status(cluster, self.STAGES[1])

        instances = g.get_instances(cluster, self.inst_ids)

        self._await_networks(cluster, instances)

        # prepare all instances
        cluster = g.change_cluster_status(cluster, self.STAGES[2])

        instances = g.get_instances(cluster, self.inst_ids)
        volumes.mount_to_instances(instances)

        self._configure_instances(cluster)
Exemple #8
0
    def create_cluster(self, cluster):
        ctx = context.ctx()
        self._update_rollback_strategy(cluster, shutdown=True)

        # create all instances
        cluster = g.change_cluster_status(cluster, "Spawning")
        self._create_instances(cluster)

        # wait for all instances are up and networks ready
        cluster = g.change_cluster_status(cluster, "Waiting")
        instances = g.get_instances(cluster)

        self._await_active(cluster, instances)

        self._assign_floating_ips(instances)

        self._await_networks(cluster, instances)

        cluster = conductor.cluster_get(ctx, cluster)

        # attach volumes
        volumes.attach_to_instances(g.get_instances(cluster))

        # prepare all instances
        cluster = g.change_cluster_status(cluster, "Preparing")

        self._configure_instances(cluster)

        self._update_rollback_strategy(cluster)
Exemple #9
0
    def scale_cluster(self, cluster, node_group_id_map):
        ctx = context.ctx()
        cluster = g.change_cluster_status(cluster, "Scaling")

        instance_ids = self._scale_cluster_instances(cluster,
                                                     node_group_id_map)

        self._update_rollback_strategy(cluster, instance_ids=instance_ids)

        cluster = conductor.cluster_get(ctx, cluster)
        g.clean_cluster_from_empty_ng(cluster)

        cluster = conductor.cluster_get(ctx, cluster)
        instances = g.get_instances(cluster, instance_ids)

        self._await_active(cluster, instances)

        self._assign_floating_ips(instances)

        self._await_networks(cluster, instances)

        cluster = conductor.cluster_get(ctx, cluster)

        volumes.attach_to_instances(
            g.get_instances(cluster, instance_ids))

        # we should be here with valid cluster: if instances creation
        # was not successful all extra-instances will be removed above
        if instance_ids:
            self._configure_instances(cluster)

        self._update_rollback_strategy(cluster)

        return instance_ids
Exemple #10
0
    def launch_instances(self, ctx, cluster, target_count):
        # create all instances
        cluster = conductor.cluster_update(ctx, cluster,
                                           {"status": self.STAGES[0]})
        LOG.info(g.format_cluster_status(cluster))

        tmpl = heat.ClusterTemplate(cluster)

        self._configure_template(ctx, tmpl, cluster, target_count)
        stack = tmpl.instantiate(update_existing=self.UPDATE_STACK)
        stack.wait_till_active()

        self.inst_ids = self._populate_cluster(ctx, cluster, stack)

        # wait for all instances are up and networks ready
        cluster = conductor.cluster_update(ctx, cluster,
                                           {"status": self.STAGES[1]})
        LOG.info(g.format_cluster_status(cluster))

        instances = g.get_instances(cluster, self.inst_ids)

        self._await_networks(cluster, instances)

        # prepare all instances
        cluster = conductor.cluster_update(ctx, cluster,
                                           {"status": self.STAGES[2]})
        LOG.info(g.format_cluster_status(cluster))

        instances = g.get_instances(cluster, self.inst_ids)
        volumes.mount_to_instances(instances)

        self._configure_instances(cluster)
Exemple #11
0
    def scale_cluster(self, cluster, node_group_id_map):
        ctx = context.ctx()

        instance_ids = []
        try:
            instance_ids = self._scale_cluster_instances(cluster,
                                                         node_group_id_map)

            cluster = conductor.cluster_get(ctx, cluster)
            g.clean_cluster_from_empty_ng(cluster)

            cluster = conductor.cluster_get(ctx, cluster)
            instances = g.get_instances(cluster, instance_ids)

            self._await_active(cluster, instances)

            if not g.check_cluster_exists(cluster):
                LOG.info(g.format_cluster_deleted_message(cluster))
                return []

            self._assign_floating_ips(instances)

            self._await_networks(cluster, instances)

            if not g.check_cluster_exists(cluster):
                LOG.info(g.format_cluster_deleted_message(cluster))
                return []

            cluster = conductor.cluster_get(ctx, cluster)

            volumes.attach_to_instances(
                g.get_instances(cluster, instance_ids))

        except Exception as ex:
            with excutils.save_and_reraise_exception():
                if not g.check_cluster_exists(cluster):
                    LOG.info(g.format_cluster_deleted_message(cluster))
                    return []

                self._log_operation_exception(
                    "Can't scale cluster '%s' (reason: %s)", cluster, ex)

                cluster = conductor.cluster_get(ctx, cluster)
                self._rollback_cluster_scaling(
                    cluster, g.get_instances(cluster, instance_ids), ex)
                instance_ids = []

                cluster = conductor.cluster_get(ctx, cluster)
                g.clean_cluster_from_empty_ng(cluster)
                cluster = conductor.cluster_update(ctx, cluster,
                                                   {"status": "Active"})

                LOG.info(g.format_cluster_status(cluster))

        # we should be here with valid cluster: if instances creation
        # was not successful all extra-instances will be removed above
        if instance_ids:
            self._configure_instances(cluster)
        return instance_ids
Exemple #12
0
    def create_cluster(self, cluster):
        ctx = context.ctx()
        try:
            # create all instances
            conductor.cluster_update(ctx, cluster, {"status": "Spawning"})
            LOG.info(g.format_cluster_status(cluster))
            self._create_instances(cluster)

            # wait for all instances are up and networks ready
            cluster = conductor.cluster_update(ctx, cluster,
                                               {"status": "Waiting"})
            LOG.info(g.format_cluster_status(cluster))

            instances = g.get_instances(cluster)

            self._await_active(cluster, instances)

            if not g.check_cluster_exists(cluster):
                LOG.info(g.format_cluster_deleted_message(cluster))
                return

            self._assign_floating_ips(instances)

            self._await_networks(cluster, instances)

            if not g.check_cluster_exists(cluster):
                LOG.info(g.format_cluster_deleted_message(cluster))
                return

            cluster = conductor.cluster_get(ctx, cluster)

            # attach volumes
            volumes.attach_to_instances(g.get_instances(cluster))

            # prepare all instances
            cluster = conductor.cluster_update(ctx, cluster,
                                               {"status": "Preparing"})
            LOG.info(g.format_cluster_status(cluster))

            self._configure_instances(cluster)
        except Exception as ex:
            with excutils.save_and_reraise_exception():
                if not g.check_cluster_exists(cluster):
                    LOG.info(g.format_cluster_deleted_message(cluster))
                    return

                self._log_operation_exception(
                    "Can't start cluster '%s' (reason: %s)", cluster, ex)

                cluster = conductor.cluster_update(
                    ctx, cluster, {"status": "Error",
                                   "status_description": str(ex)})
                LOG.info(g.format_cluster_status(cluster))
                self._rollback_cluster_creation(cluster, ex)
Exemple #13
0
    def scale_cluster(self, cluster, node_group_id_map):
        ctx = context.ctx()

        instance_ids = []
        try:
            instance_ids = self._scale_cluster_instances(cluster,
                                                         node_group_id_map)

            cluster = conductor.cluster_get(ctx, cluster)
            g.clean_cluster_from_empty_ng(cluster)

            cluster = conductor.cluster_get(ctx, cluster)
            instances = g.get_instances(cluster, instance_ids)

            self._await_active(cluster, instances)

            self._assign_floating_ips(instances)

            self._await_networks(cluster, instances)

            cluster = conductor.cluster_get(ctx, cluster)

            volumes.attach_to_instances(
                g.get_instances(cluster, instance_ids))

        except Exception as ex:
            with excutils.save_and_reraise_exception():
                self._log_operation_exception(
                    "Can't scale cluster '%s' (reason: %s)", cluster, ex)

                cluster = conductor.cluster_get(ctx, cluster)
                self._rollback_cluster_scaling(
                    cluster, g.get_instances(cluster, instance_ids), ex)
                instance_ids = []

                cluster = conductor.cluster_get(ctx, cluster)
                g.clean_cluster_from_empty_ng(cluster)
                if cluster.status == 'Decommissioning':
                    cluster = conductor.cluster_update(ctx, cluster,
                                                       {"status": "Error"})
                else:
                    cluster = conductor.cluster_update(ctx, cluster,
                                                       {"status": "Active"})

                LOG.info(g.format_cluster_status(cluster))

        # we should be here with valid cluster: if instances creation
        # was not successful all extra-instances will be removed above
        if instance_ids:
            self._configure_instances(cluster)
        return instance_ids
Exemple #14
0
    def _await_networks(self, cluster, instances):
        if not instances:
            return

        cpo.add_provisioning_step(cluster.id, _("Assign IPs"), len(instances))

        ips_assigned = set()
        self._ips_assign(ips_assigned, cluster, instances)

        LOG.info(_LI("All instances have IPs assigned"))

        cluster = conductor.cluster_get(context.ctx(), cluster)
        instances = g.get_instances(cluster, ips_assigned)

        cpo.add_provisioning_step(cluster.id,
                                  _("Wait for instance accessibility"),
                                  len(instances))

        with context.ThreadGroup() as tg:
            for instance in instances:
                with context.set_current_instance_id(instance.instance_id):
                    tg.spawn("wait-for-ssh-%s" % instance.instance_name,
                             self._wait_until_accessible, instance)

        LOG.info(_LI("All instances are accessible"))
Exemple #15
0
def _provision_scaled_cluster(cluster_id, node_group_id_map):
    ctx, cluster, plugin = _prepare_provisioning(cluster_id)

    # Decommissioning surplus nodes with the plugin
    cluster = g.change_cluster_status(cluster, "Decommissioning")

    instances_to_delete = []

    for node_group in cluster.node_groups:
        new_count = node_group_id_map[node_group.id]
        if new_count < node_group.count:
            instances_to_delete += node_group.instances[new_count:
                                                        node_group.count]

    if instances_to_delete:
        plugin.decommission_nodes(cluster, instances_to_delete)

    # Scaling infrastructure
    cluster = g.change_cluster_status(cluster, "Scaling")

    instance_ids = INFRA.scale_cluster(cluster, node_group_id_map)

    # Setting up new nodes with the plugin
    if instance_ids:
        cluster = g.change_cluster_status(cluster, "Configuring")
        instances = g.get_instances(cluster, instance_ids)
        plugin.scale_cluster(cluster, instances)

    g.change_cluster_status(cluster, "Active")
Exemple #16
0
    def test_attach(self, add_step, add_event,
                    p_create_attach_vol, p_await, p_mount):
        p_create_attach_vol.side_effect = ['/dev/vdb', '/dev/vdc'] * 2
        p_await.return_value = None
        p_mount.return_value = None
        add_event.return_value = None
        add_step.return_value = None

        instance1 = {'id': '1',
                     'instance_id': '123',
                     'instance_name': 'inst_1'}

        instance2 = {'id': '2',
                     'instance_id': '456',
                     'instance_name': 'inst_2'}

        ng = {'volumes_per_node': 2,
              'volumes_size': 2,
              'volumes_availability_zone': None,
              'volume_mount_prefix': '/mnt/vols',
              'volume_type': None,
              'name': 'master',
              'cluster_id': '11',
              'instances': [instance1, instance2],
              'volume_local_to_instance': False}

        cluster = r.ClusterResource({'node_groups': [ng]})

        volumes.attach_to_instances(g.get_instances(cluster))
        self.assertEqual(4, p_create_attach_vol.call_count)
        self.assertEqual(2, p_await.call_count)
        self.assertEqual(4, p_mount.call_count)
Exemple #17
0
def _provision_scaled_cluster(cluster_id, node_group_id_map):
    ctx, cluster, plugin = _prepare_provisioning(cluster_id)

    # Decommissioning surplus nodes with the plugin
    cluster = g.change_cluster_status(cluster, "Decommissioning")

    instances_to_delete = []

    for node_group in cluster.node_groups:
        new_count = node_group_id_map[node_group.id]
        if new_count < node_group.count:
            instances_to_delete += node_group.instances[new_count:node_group.
                                                        count]

    if instances_to_delete:
        plugin.decommission_nodes(cluster, instances_to_delete)

    # Scaling infrastructure
    cluster = g.change_cluster_status(cluster, "Scaling")

    instance_ids = INFRA.scale_cluster(cluster, node_group_id_map)

    # Setting up new nodes with the plugin
    if instance_ids:
        cluster = g.change_cluster_status(cluster, "Configuring")
        instances = g.get_instances(cluster, instance_ids)
        plugin.scale_cluster(cluster, instances)

    g.change_cluster_status(cluster, "Active")
Exemple #18
0
    def test_attach(self, p_create_attach_vol,
                    p_await, p_mount):
        p_create_attach_vol.side_effect = ['/dev/vdb', '/dev/vdc'] * 2
        p_await.return_value = None
        p_mount.return_value = None

        instance1 = {'id': '1',
                     'instance_id': '123',
                     'instance_name': 'inst_1'}
        instance2 = {'id': '2',
                     'instance_id': '456',
                     'instance_name': 'inst_2'}

        ng = {'volumes_per_node': 2,
              'volumes_size': 2,
              'volume_mount_prefix': '/mnt/vols',
              'name': 'master',
              'instances': [instance1, instance2]}

        cluster = r.ClusterResource({'node_groups': [ng]})

        volumes.attach_to_instances(g.get_instances(cluster))
        self.assertEqual(p_create_attach_vol.call_count, 4)
        self.assertEqual(p_await.call_count, 2)
        self.assertEqual(p_mount.call_count, 4)
Exemple #19
0
    def _await_networks(self, cluster, instances):
        if not instances:
            return

        ips_assigned = set()
        while len(ips_assigned) != len(instances):
            if not g.check_cluster_exists(cluster):
                return
            for instance in instances:
                if instance.id not in ips_assigned:
                    if networks.init_instances_ips(instance):
                        ips_assigned.add(instance.id)

            context.sleep(1)

        LOG.info(
            _LI("Cluster '%s': all instances have IPs assigned"), cluster.id)

        cluster = conductor.cluster_get(context.ctx(), cluster)
        instances = g.get_instances(cluster, ips_assigned)

        with context.ThreadGroup() as tg:
            for instance in instances:
                tg.spawn("wait-for-ssh-%s" % instance.instance_name,
                         self._wait_until_accessible, instance)

        LOG.info(_LI("Cluster '%s': all instances are accessible"), cluster.id)
Exemple #20
0
    def _await_networks(self, cluster, instances):
        if not instances:
            return

        cpo.add_provisioning_step(cluster.id, _("Assign IPs"), len(instances))

        ips_assigned = set()
        self._ips_assign(ips_assigned, cluster, instances)

        LOG.info(
            _LI("Cluster {cluster_id}: all instances have IPs assigned")
            .format(cluster_id=cluster.id))

        cluster = conductor.cluster_get(context.ctx(), cluster)
        instances = g.get_instances(cluster, ips_assigned)

        cpo.add_provisioning_step(
            cluster.id, _("Wait for instance accessibility"), len(instances))

        with context.ThreadGroup() as tg:
            for instance in instances:
                tg.spawn("wait-for-ssh-%s" % instance.instance_name,
                         self._wait_until_accessible, instance)

        LOG.info(_LI("Cluster {cluster_id}: all instances are accessible")
                 .format(cluster_id=cluster.id))
Exemple #21
0
 def _add_volumes(self, ctx, cluster):
     for instance in g.get_instances(cluster):
         res_names = heat.client().resources.get(
             cluster.name, instance.instance_name).required_by
         for res_name in res_names:
             vol_res = heat.client().resources.get(cluster.name, res_name)
             if vol_res.resource_type == (('OS::Cinder::'
                                           'VolumeAttachment')):
                 volume_id = vol_res.physical_resource_id
                 conductor.append_volume(ctx, instance, volume_id)
Exemple #22
0
 def _add_volumes(self, ctx, cluster):
     for instance in g.get_instances(cluster):
         res_names = heat.client().resources.get(
             cluster.name, instance.instance_name).required_by
         for res_name in res_names:
             vol_res = heat.client().resources.get(cluster.name, res_name)
             if vol_res.resource_type == (('OS::Cinder::'
                                           'VolumeAttachment')):
                 volume_id = vol_res.physical_resource_id
                 conductor.append_volume(ctx, instance, volume_id)
Exemple #23
0
    def launch_instances(self, cluster, target_count):
        # create all instances
        cluster = g.change_cluster_status(cluster, self.STAGES[0])

        self.create_instances(cluster, target_count)

        # wait for all instances are up and networks ready
        cluster = g.change_cluster_status(cluster, self.STAGES[1])

        instances = g.get_instances(cluster, self.inst_ids)

        self._await_networks(cluster, instances)

        # prepare all instances
        cluster = g.change_cluster_status(cluster, self.STAGES[2])

        instances = g.get_instances(cluster, self.inst_ids)
        volumes.mount_to_instances(instances)

        self._configure_instances(cluster)
Exemple #24
0
def configure_ntp(cluster_id):
    cluster = conductor.cluster_get(context.ctx(), cluster_id)
    if not is_ntp_enabled(cluster):
        LOG.debug("Don't configure NTP on cluster")
        return
    instances = g.get_instances(cluster)
    url = retrieve_ntp_server_url(cluster)
    with context.ThreadGroup() as tg:
        for instance in instances:
            tg.spawn("configure-ntp-%s" % instance.instance_name,
                     _configure_ntp_on_instance, instance, url)
Exemple #25
0
    def launch_instances(self, cluster, target_count):
        # create all instances
        cluster = g.change_cluster_status(cluster, self.STAGES[0])

        self.create_instances(cluster, target_count)

        # wait for all instances are up and networks ready
        cluster = g.change_cluster_status(cluster, self.STAGES[1])

        instances = g.get_instances(cluster, self.inst_ids)

        self._await_networks(cluster, instances)

        # prepare all instances
        cluster = g.change_cluster_status(cluster, self.STAGES[2])

        instances = g.get_instances(cluster, self.inst_ids)
        volumes.mount_to_instances(instances)

        self._configure_instances(cluster)
Exemple #26
0
def _provision_scaled_cluster(cluster_id, node_group_id_map):
    ctx, cluster, plugin = _prepare_provisioning(cluster_id)

    # Decommissioning surplus nodes with the plugin

    cluster = conductor.cluster_update(ctx, cluster,
                                       {"status": "Decommissioning"})
    LOG.info(g.format_cluster_status(cluster))

    instances_to_delete = []

    for node_group in cluster.node_groups:
        new_count = node_group_id_map[node_group.id]
        if new_count < node_group.count:
            instances_to_delete += node_group.instances[new_count:
                                                        node_group.count]

    if instances_to_delete:
        plugin.decommission_nodes(cluster, instances_to_delete)

    # Scaling infrastructure
    cluster = conductor.cluster_update(ctx, cluster, {"status": "Scaling"})
    LOG.info(g.format_cluster_status(cluster))

    instances = INFRA.scale_cluster(cluster, node_group_id_map)

    # Setting up new nodes with the plugin

    if instances:
        cluster = conductor.cluster_update(ctx, cluster,
                                           {"status": "Configuring"})
        LOG.info(g.format_cluster_status(cluster))
        try:
            instances = g.get_instances(cluster, instances)
            plugin.scale_cluster(cluster, instances)
        except Exception as ex:
            if not g.check_cluster_exists(cluster):
                LOG.info(g.format_cluster_deleted_message(cluster))
                return
            LOG.exception("Can't scale cluster '%s' (reason: %s)",
                          cluster.name, ex)
            cluster = conductor.cluster_update(ctx, cluster,
                                               {"status": "Error"})
            LOG.info(g.format_cluster_status(cluster))
            return

    if not g.check_cluster_exists(cluster):
        LOG.info(g.format_cluster_deleted_message(cluster))
        return

    cluster = conductor.cluster_update(ctx, cluster, {"status": "Active"})
    LOG.info(g.format_cluster_status(cluster))
Exemple #27
0
    def shutdown_cluster(self, cluster):
        """Shutdown specified cluster and all related resources."""
        try:
            heat.client().stacks.delete(cluster.name)
        except heat_exc.HTTPNotFound:
            LOG.warn("Did not found stack for cluster %s" % cluster.name)

        self._clean_job_executions(cluster)

        ctx = context.ctx()
        instances = g.get_instances(cluster)
        for inst in instances:
            conductor.instance_remove(ctx, inst)
Exemple #28
0
    def shutdown_cluster(self, cluster):
        """Shutdown specified cluster and all related resources."""
        try:
            heat.client().stacks.delete(cluster.name)
        except heat_exc.HTTPNotFound:
            LOG.warn('Did not found stack for cluster %s' % cluster.name)

        self._clean_job_executions(cluster)

        ctx = context.ctx()
        instances = g.get_instances(cluster)
        for inst in instances:
            conductor.instance_remove(ctx, inst)
Exemple #29
0
    def launch_instances(self, cluster, target_count):
        # create all instances
        cluster = g.change_cluster_status(cluster, self.STAGES[0])

        cpo.add_provisioning_step(cluster.id, _("Create Heat stack"), 1)
        with context.InstanceInfoManager([cluster.id, None, None, None]):
            self.create_instances(cluster, target_count)

        # wait for all instances are up and networks ready
        cluster = g.change_cluster_status(cluster, self.STAGES[1])

        instances = g.get_instances(cluster, self.inst_ids)

        self._await_networks(cluster, instances)

        # prepare all instances
        cluster = g.change_cluster_status(cluster, self.STAGES[2])

        instances = g.get_instances(cluster, self.inst_ids)
        volumes.mount_to_instances(instances)

        self._configure_instances(cluster)
Exemple #30
0
 def _get_instance_if_running(self, job_execution):
     pid, inst_id = self._get_pid_and_inst_id(job_execution.oozie_job_id)
     if not pid or not inst_id or (
        job_execution.info['status'] in edp.JOB_STATUSES_TERMINATED):
         return None, None
     # TODO(tmckay): well, if there is a list index out of range
     # error here it probably means that the instance is gone. If we
     # have a job execution that is not terminated, and the instance
     # is gone, we should probably change the status somehow.
     # For now, do nothing.
     try:
         instance = general.get_instances(self.cluster, [inst_id])[0]
     except Exception:
         instance = None
     return pid, instance
Exemple #31
0
 def _get_instance_if_running(self, job_execution):
     pid, inst_id = self._get_pid_and_inst_id(job_execution.oozie_job_id)
     if not pid or not inst_id or (job_execution.info['status']
                                   in edp.JOB_STATUSES_TERMINATED):
         return None, None
     # TODO(tmckay): well, if there is a list index out of range
     # error here it probably means that the instance is gone. If we
     # have a job execution that is not terminated, and the instance
     # is gone, we should probably change the status somehow.
     # For now, do nothing.
     try:
         instance = general.get_instances(self.cluster, [inst_id])[0]
     except Exception:
         instance = None
     return pid, instance
Exemple #32
0
    def _populate_cluster(self, cluster, stack):
        ctx = context.ctx()
        old_ids = [i.instance_id for i in g.get_instances(cluster)]
        new_ids = []

        for node_group in cluster.node_groups:
            nova_ids = stack.get_node_group_instances(node_group)
            for name, nova_id in nova_ids:
                if nova_id not in old_ids:
                    instance_id = conductor.instance_add(
                        ctx, node_group, {"instance_id": nova_id,
                                          "instance_name": name})
                    new_ids.append(instance_id)

        return new_ids
Exemple #33
0
    def _launch_instances(self, cluster, target_count, stages,
                          update_stack=False, disable_rollback=True):
        # create all instances
        cluster = g.change_cluster_status(cluster, stages[0])

        inst_ids = self._create_instances(
            cluster, target_count, update_stack, disable_rollback)

        # wait for all instances are up and networks ready
        cluster = g.change_cluster_status(cluster, stages[1])

        instances = g.get_instances(cluster, inst_ids)

        self._await_networks(cluster, instances)

        # prepare all instances
        cluster = g.change_cluster_status(cluster, stages[2])

        instances = g.get_instances(cluster, inst_ids)
        volumes.mount_to_instances(instances)

        self._configure_instances(cluster)

        return inst_ids
Exemple #34
0
    def _populate_cluster(self, ctx, cluster, stack):
        old_ids = [i.instance_id for i in g.get_instances(cluster)]

        new_ids = []

        for node_group in cluster.node_groups:
            nova_ids = stack.get_node_group_instances(node_group)
            for name, nova_id in nova_ids:
                if nova_id not in old_ids:
                    instance_id = conductor.instance_add(
                        ctx, node_group, {"instance_id": nova_id, "instance_name": name}
                    )
                    new_ids.append(instance_id)

        return new_ids
Exemple #35
0
def decommission_nodes(cluster, instances, configure_sh_string):
    LOG.info(_LI('Start decommission . Cluster = %s'), cluster.name)
    move_node(cluster, instances)
    stop_services(cluster, instances)
    context.sleep(names.WAIT_NODE_ALARM_NO_HEARTBEAT)
    remove_node(cluster, instances)
    remove_services(cluster, instances)
    if check_for_cldb_or_zookeeper_service(instances):
        all_instances = gen.get_instances(cluster)
        current_cluster_instances = [
            x for x in all_instances if x not in instances]
        for inst in current_cluster_instances:
            start_helper.exec_configure_sh_on_instance(
                cluster, inst, configure_sh_string)
    LOG.info(_LI('End decommission. Cluster = %s'), cluster.name)
Exemple #36
0
    def shutdown_cluster(self, cluster):
        """Shutdown specified cluster and all related resources."""
        try:
            heat.client().stacks.delete(cluster.name)
            stack = heat.get_stack(cluster.name)
            heat.wait_stack_completion(stack)
        except heat_exc.HTTPNotFound:
            LOG.warning(_LW('Did not found stack for cluster {cluster_name}')
                        .format(cluster_name=cluster.name))

        self._clean_job_executions(cluster)

        ctx = context.ctx()
        instances = g.get_instances(cluster)
        for inst in instances:
            conductor.instance_remove(ctx, inst)
Exemple #37
0
    def rollback_cluster(self, cluster, reason):
        rollback_info = cluster.rollback_info or {}
        self._update_rollback_strategy(cluster)

        if rollback_info.get('shutdown', False):
            self._rollback_cluster_creation(cluster, reason)
            return False

        instance_ids = rollback_info.get('instance_ids', [])
        if instance_ids:
            self._rollback_cluster_scaling(
                cluster, g.get_instances(cluster, instance_ids), reason)

            return True

        return False
Exemple #38
0
    def rollback_cluster(self, cluster, reason):
        rollback_info = cluster.rollback_info or {}
        self._update_rollback_strategy(cluster)

        if rollback_info.get('shutdown', False):
            self._rollback_cluster_creation(cluster, reason)
            return False

        instance_ids = rollback_info.get('instance_ids', [])
        if instance_ids:
            self._rollback_cluster_scaling(
                cluster, g.get_instances(cluster, instance_ids), reason)

            return True

        return False
Exemple #39
0
def _provision_scaled_cluster(id, node_group_id_map):
    ctx = context.ctx()
    cluster = conductor.cluster_get(ctx, id)
    plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name)

    # Decommissioning surplus nodes with the plugin

    cluster = conductor.cluster_update(ctx, cluster,
                                       {"status": "Decommissioning"})
    LOG.info(g.format_cluster_status(cluster))

    instances_to_delete = []

    for node_group in cluster.node_groups:
        new_count = node_group_id_map[node_group.id]
        if new_count < node_group.count:
            instances_to_delete += node_group.instances[new_count:node_group.
                                                        count]

    if instances_to_delete:
        plugin.decommission_nodes(cluster, instances_to_delete)

    # Scaling infrastructure
    cluster = conductor.cluster_update(ctx, cluster, {"status": "Scaling"})
    LOG.info(g.format_cluster_status(cluster))

    instances = INFRA.scale_cluster(cluster, node_group_id_map)

    # Setting up new nodes with the plugin

    if instances:
        cluster = conductor.cluster_update(ctx, cluster,
                                           {"status": "Configuring"})
        LOG.info(g.format_cluster_status(cluster))
        try:
            instances = g.get_instances(cluster, instances)
            plugin.scale_cluster(cluster, instances)
        except Exception as ex:
            LOG.exception("Can't scale cluster '%s' (reason: %s)",
                          cluster.name, ex)
            cluster = conductor.cluster_update(ctx, cluster,
                                               {"status": "Error"})
            LOG.info(g.format_cluster_status(cluster))
            return

    cluster = conductor.cluster_update(ctx, cluster, {"status": "Active"})
    LOG.info(g.format_cluster_status(cluster))
Exemple #40
0
    def _populate_cluster(self, cluster, stack):
        ctx = context.ctx()
        old_ids = [i.instance_id for i in g.get_instances(cluster)]
        new_ids = []

        for node_group in cluster.node_groups:
            instances = stack.get_node_group_instances(node_group)
            for instance in instances:
                nova_id = instance['physical_id']
                name = instance['name']
                if nova_id not in old_ids:
                    instance_id = conductor.instance_add(
                        ctx, node_group, {"instance_id": nova_id,
                                          "instance_name": name})
                    new_ids.append(instance_id)

        return new_ids
Exemple #41
0
def _provision_scaled_cluster(cluster_id, node_group_id_map):
    ctx, cluster, plugin = _prepare_provisioning(cluster_id)

    # Decommissioning surplus nodes with the plugin
    cluster = g.change_cluster_status(cluster, "Decommissioning")

    instances_to_delete = []

    for node_group in cluster.node_groups:
        new_count = node_group_id_map[node_group.id]
        if new_count < node_group.count:
            instances_to_delete += node_group.instances[new_count:
                                                        node_group.count]

    if instances_to_delete:
        plugin.decommission_nodes(cluster, instances_to_delete)

    # Scaling infrastructure
    cluster = g.change_cluster_status(cluster, "Scaling")

    instances = INFRA.scale_cluster(cluster, node_group_id_map)

    # Setting up new nodes with the plugin

    if instances:
        cluster = g.change_cluster_status(cluster, "Configuring")
        try:
            instances = g.get_instances(cluster, instances)
            plugin.scale_cluster(cluster, instances)
        except Exception as ex:
            if not g.check_cluster_exists(cluster):
                LOG.info(g.format_cluster_deleted_message(cluster))
                return
            LOG.exception(
                _LE("Can't scale cluster '%(name)s' (reason: %(reason)s)"),
                {'name': cluster.name, 'reason': ex})

            g.change_cluster_status(cluster, "Error")
            return

    if not g.check_cluster_exists(cluster):
        LOG.info(g.format_cluster_deleted_message(cluster))
        return

    g.change_cluster_status(cluster, "Active")
Exemple #42
0
    def test_delete_floating_ips(self):
        node_groups = [_make_ng_dict("test_group_1", "test_flavor",
                                     ["data node", "test tracker"], 2, 'pool')]

        ctx = context.ctx()
        cluster = _create_cluster_mock(node_groups, ["datanode"])
        self.engine._create_instances(cluster)

        cluster = conductor.cluster_get(ctx, cluster)
        instances_list = g.get_instances(cluster)

        self.engine._assign_floating_ips(instances_list)

        self.engine._shutdown_instances(cluster)
        self.assertEqual(self.nova.floating_ips.delete.call_count, 2,
                         "Not expected floating IPs number found in delete")
        self.assertEqual(self.nova.servers.delete.call_count, 2,
                         "Not expected")
Exemple #43
0
    def create_cluster(self, cluster):
        ctx = context.ctx()
        try:
            # create all instances
            conductor.cluster_update(ctx, cluster, {"status": "Spawning"})
            LOG.info(g.format_cluster_status(cluster))
            self._create_instances(cluster)

            # wait for all instances are up and networks ready
            cluster = conductor.cluster_update(ctx, cluster,
                                               {"status": "Waiting"})
            LOG.info(g.format_cluster_status(cluster))

            instances = g.get_instances(cluster)

            self._await_active(cluster, instances)

            self._assign_floating_ips(instances)

            self._await_networks(cluster, instances)

            cluster = conductor.cluster_get(ctx, cluster)

            # attach volumes
            volumes.attach(cluster)

            # prepare all instances
            cluster = conductor.cluster_update(ctx, cluster,
                                               {"status": "Preparing"})
            LOG.info(g.format_cluster_status(cluster))

            self._configure_instances(cluster)
        except Exception as ex:
            with excutils.save_and_reraise_exception():
                self._log_operation_exception(
                    "Can't start cluster '%s' (reason: %s)", cluster, ex)

                cluster = conductor.cluster_update(
                    ctx, cluster, {
                        "status": "Error",
                        "status_description": str(ex)
                    })
                LOG.info(g.format_cluster_status(cluster))
                self._rollback_cluster_creation(cluster, ex)
    def rollback_cluster(self, cluster, reason):
        rollback_info = cluster.rollback_info or {}
        self._update_rollback_strategy(cluster)

        if rollback_info.get('shutdown', False):
            self._rollback_cluster_creation(cluster, reason)
            LOG.warning(_LW("Cluster creation rollback "
                            "(reason: {reason})").format(reason=reason))
            return False

        instance_ids = rollback_info.get('instance_ids', [])
        if instance_ids:
            self._rollback_cluster_scaling(
                cluster, g.get_instances(cluster, instance_ids), reason)
            LOG.warning(_LW("Cluster scaling rollback "
                            "(reason: {reason})").format(reason=reason))

            return True

        return False
Exemple #45
0
    def rollback_cluster(self, cluster, reason):
        rollback_info = cluster.rollback_info or {}
        self._update_rollback_strategy(cluster)

        if rollback_info.get('shutdown', False):
            self._rollback_cluster_creation(cluster, reason)
            LOG.warning(
                _LW("Cluster creation rollback "
                    "(reason: {reason})").format(reason=reason))
            return False

        instance_ids = rollback_info.get('instance_ids', [])
        if instance_ids:
            self._rollback_cluster_scaling(
                cluster, g.get_instances(cluster, instance_ids), reason)
            LOG.warning(
                _LW("Cluster scaling rollback "
                    "(reason: {reason})").format(reason=reason))

            return True

        return False
Exemple #46
0
    def test_ip_assignment_use_no_floating(self):
        self.override_config("use_floating_ips", False)

        node_groups = [_make_ng_dict("test_group_1", "test_flavor",
                                     ["data node", "test tracker"], 2,
                                     'pool'),
                       _make_ng_dict("test_group_2", "test_flavor",
                                     ["name node", "test tracker"], 1)]

        ctx = context.ctx()
        cluster = _create_cluster_mock(node_groups, ["data node"])
        self.engine._create_instances(cluster)

        cluster = conductor.cluster_get(ctx, cluster)
        instances_list = g.get_instances(cluster)

        self.engine._assign_floating_ips(instances_list)

        self.nova.floating_ips.create.assert_has_calls(
            [mock.call("pool"), mock.call("pool")])

        self.assertEqual(self.nova.floating_ips.create.call_count, 2,
                         "Not expected floating IPs number found.")
Exemple #47
0
def _provision_scaled_cluster(cluster_id, node_group_id_map):
    ctx, cluster, plugin = _prepare_provisioning(cluster_id)

    try:
        # Decommissioning surplus nodes with the plugin
        cluster = g.change_cluster_status(cluster, "Decommissioning")

        instances_to_delete = []

        for node_group in cluster.node_groups:
            new_count = node_group_id_map[node_group.id]
            if new_count < node_group.count:
                instances_to_delete += node_group.instances[new_count:
                                                            node_group.count]

        if instances_to_delete:
            context.set_step_type(_("Plugin: decommission cluster"))
            plugin.decommission_nodes(cluster, instances_to_delete)

        # Scaling infrastructure
        cluster = g.change_cluster_status(cluster, "Scaling")
        context.set_step_type(_("Engine: scale cluster"))
        instance_ids = INFRA.scale_cluster(cluster, node_group_id_map)

        # Setting up new nodes with the plugin
        if instance_ids:
            ntp_service.configure_ntp(cluster_id)
            cluster = g.change_cluster_status(cluster, "Configuring")
            instances = g.get_instances(cluster, instance_ids)
            context.set_step_type(_("Plugin: scale cluster"))
            plugin.scale_cluster(cluster, instances)

        g.change_cluster_status(cluster, "Active")

    finally:
        if CONF.use_identity_api_v3 and not cluster.is_transient:
            trusts.delete_trust_from_cluster(cluster)
Exemple #48
0
def _provision_scaled_cluster(cluster_id, node_group_id_map):
    ctx, cluster, plugin = _prepare_provisioning(cluster_id)

    try:
        # Decommissioning surplus nodes with the plugin
        cluster = g.change_cluster_status(cluster, "Decommissioning")

        instances_to_delete = []

        for node_group in cluster.node_groups:
            new_count = node_group_id_map[node_group.id]
            if new_count < node_group.count:
                instances_to_delete += node_group.instances[
                    new_count:node_group.count]

        if instances_to_delete:
            context.set_step_type(_("Plugin: decommission cluster"))
            plugin.decommission_nodes(cluster, instances_to_delete)

        # Scaling infrastructure
        cluster = g.change_cluster_status(cluster, "Scaling")
        context.set_step_type(_("Engine: scale cluster"))
        instance_ids = INFRA.scale_cluster(cluster, node_group_id_map)

        # Setting up new nodes with the plugin
        if instance_ids:
            ntp_service.configure_ntp(cluster_id)
            cluster = g.change_cluster_status(cluster, "Configuring")
            instances = g.get_instances(cluster, instance_ids)
            context.set_step_type(_("Plugin: scale cluster"))
            plugin.scale_cluster(cluster, instances)

        g.change_cluster_status(cluster, "Active")

    finally:
        if CONF.use_identity_api_v3 and not cluster.is_transient:
            trusts.delete_trust_from_cluster(cluster)
Exemple #49
0
 def _remove_db_objects(self, cluster):
     ctx = context.ctx()
     cluster = conductor.cluster_get(ctx, cluster)
     instances = g.get_instances(cluster)
     for inst in instances:
         conductor.instance_remove(ctx, inst)
Exemple #50
0
 def _remove_db_objects(self, cluster):
     ctx = context.ctx()
     cluster = conductor.cluster_get(ctx, cluster)
     instances = g.get_instances(cluster)
     for inst in instances:
         conductor.instance_remove(ctx, inst)