Beispiel #1
0
def _provision_cluster(cluster_id):
    ctx, cluster, plugin = _prepare_provisioning(cluster_id)

    if CONF.use_identity_api_v3 and cluster.is_transient:
        trusts.create_trust_for_cluster(cluster)

    # updating cluster infra
    cluster = g.change_cluster_status(cluster, "InfraUpdating")
    plugin.update_infra(cluster)

    # creating instances and configuring them
    cluster = conductor.cluster_get(ctx, cluster_id)
    INFRA.create_cluster(cluster)

    if not g.check_cluster_exists(cluster):
        LOG.info(g.format_cluster_deleted_message(cluster))
        return

    # configure cluster
    cluster = g.change_cluster_status(cluster, "Configuring")
    try:
        plugin.configure_cluster(cluster)
    except Exception as ex:
        if not g.check_cluster_exists(cluster):
            LOG.info(g.format_cluster_deleted_message(cluster))
            return
        LOG.exception(
            _LE("Can't configure cluster '%(name)s' (reason: %(reason)s)"),
            {'name': cluster.name, 'reason': ex})
        g.change_cluster_status(cluster, "Error")
        return

    if not g.check_cluster_exists(cluster):
        LOG.info(g.format_cluster_deleted_message(cluster))
        return

    # starting prepared and configured cluster
    cluster = g.change_cluster_status(cluster, "Starting")
    try:
        plugin.start_cluster(cluster)
    except Exception as ex:
        if not g.check_cluster_exists(cluster):
            LOG.info(g.format_cluster_deleted_message(cluster))
            return
        LOG.exception(
            _LE("Can't start services for cluster '%(name)s' (reason: "
                "%(reason)s)"), {'name': cluster.name, 'reason': ex})
        g.change_cluster_status(cluster, "Error")
        return

    if not g.check_cluster_exists(cluster):
        LOG.info(g.format_cluster_deleted_message(cluster))
        return

    # cluster is now up and ready
    cluster = g.change_cluster_status(cluster, "Active")

    # schedule execution pending job for cluster
    for je in conductor.job_execution_get_all(ctx, cluster_id=cluster.id):
        job_manager.run_job(je.id)
Beispiel #2
0
    def scale_cluster(self, cluster, node_group_id_map):
        ctx = context.ctx()

        instance_ids = []
        try:
            instance_ids = self._scale_cluster_instances(cluster,
                                                         node_group_id_map)

            cluster = conductor.cluster_get(ctx, cluster)
            g.clean_cluster_from_empty_ng(cluster)

            cluster = conductor.cluster_get(ctx, cluster)
            instances = g.get_instances(cluster, instance_ids)

            self._await_active(cluster, instances)

            if not g.check_cluster_exists(cluster):
                LOG.info(g.format_cluster_deleted_message(cluster))
                return []

            self._assign_floating_ips(instances)

            self._await_networks(cluster, instances)

            if not g.check_cluster_exists(cluster):
                LOG.info(g.format_cluster_deleted_message(cluster))
                return []

            cluster = conductor.cluster_get(ctx, cluster)

            volumes.attach_to_instances(
                g.get_instances(cluster, instance_ids))

        except Exception as ex:
            with excutils.save_and_reraise_exception():
                if not g.check_cluster_exists(cluster):
                    LOG.info(g.format_cluster_deleted_message(cluster))
                    return []

                self._log_operation_exception(
                    "Can't scale cluster '%s' (reason: %s)", cluster, ex)

                cluster = conductor.cluster_get(ctx, cluster)
                self._rollback_cluster_scaling(
                    cluster, g.get_instances(cluster, instance_ids), ex)
                instance_ids = []

                cluster = conductor.cluster_get(ctx, cluster)
                g.clean_cluster_from_empty_ng(cluster)
                cluster = conductor.cluster_update(ctx, cluster,
                                                   {"status": "Active"})

                LOG.info(g.format_cluster_status(cluster))

        # we should be here with valid cluster: if instances creation
        # was not successful all extra-instances will be removed above
        if instance_ids:
            self._configure_instances(cluster)
        return instance_ids
Beispiel #3
0
    def create_cluster(self, cluster):
        ctx = context.ctx()
        try:
            # create all instances
            conductor.cluster_update(ctx, cluster, {"status": "Spawning"})
            LOG.info(g.format_cluster_status(cluster))
            self._create_instances(cluster)

            # wait for all instances are up and networks ready
            cluster = conductor.cluster_update(ctx, cluster,
                                               {"status": "Waiting"})
            LOG.info(g.format_cluster_status(cluster))

            instances = g.get_instances(cluster)

            self._await_active(cluster, instances)

            if not g.check_cluster_exists(cluster):
                LOG.info(g.format_cluster_deleted_message(cluster))
                return

            self._assign_floating_ips(instances)

            self._await_networks(cluster, instances)

            if not g.check_cluster_exists(cluster):
                LOG.info(g.format_cluster_deleted_message(cluster))
                return

            cluster = conductor.cluster_get(ctx, cluster)

            # attach volumes
            volumes.attach(cluster)

            # prepare all instances
            cluster = conductor.cluster_update(ctx, cluster,
                                               {"status": "Preparing"})
            LOG.info(g.format_cluster_status(cluster))

            self._configure_instances(cluster)
        except Exception as ex:
            with excutils.save_and_reraise_exception():
                if not g.check_cluster_exists(cluster):
                    LOG.info(g.format_cluster_deleted_message(cluster))
                    return

                self._log_operation_exception(
                    "Can't start cluster '%s' (reason: %s)", cluster, ex)

                cluster = conductor.cluster_update(
                    ctx, cluster, {"status": "Error",
                                   "status_description": str(ex)})
                LOG.info(g.format_cluster_status(cluster))
                self._rollback_cluster_creation(cluster, ex)
Beispiel #4
0
def _provision_scaled_cluster(cluster_id, node_group_id_map):
    ctx, cluster, plugin = _prepare_provisioning(cluster_id)

    # Decommissioning surplus nodes with the plugin

    cluster = conductor.cluster_update(ctx, cluster,
                                       {"status": "Decommissioning"})
    LOG.info(g.format_cluster_status(cluster))

    instances_to_delete = []

    for node_group in cluster.node_groups:
        new_count = node_group_id_map[node_group.id]
        if new_count < node_group.count:
            instances_to_delete += node_group.instances[new_count:
                                                        node_group.count]

    if instances_to_delete:
        plugin.decommission_nodes(cluster, instances_to_delete)

    # Scaling infrastructure
    cluster = conductor.cluster_update(ctx, cluster, {"status": "Scaling"})
    LOG.info(g.format_cluster_status(cluster))

    instances = INFRA.scale_cluster(cluster, node_group_id_map)

    # Setting up new nodes with the plugin

    if instances:
        cluster = conductor.cluster_update(ctx, cluster,
                                           {"status": "Configuring"})
        LOG.info(g.format_cluster_status(cluster))
        try:
            instances = g.get_instances(cluster, instances)
            plugin.scale_cluster(cluster, instances)
        except Exception as ex:
            if not g.check_cluster_exists(cluster):
                LOG.info(g.format_cluster_deleted_message(cluster))
                return
            LOG.exception("Can't scale cluster '%s' (reason: %s)",
                          cluster.name, ex)
            cluster = conductor.cluster_update(ctx, cluster,
                                               {"status": "Error"})
            LOG.info(g.format_cluster_status(cluster))
            return

    if not g.check_cluster_exists(cluster):
        LOG.info(g.format_cluster_deleted_message(cluster))
        return

    cluster = conductor.cluster_update(ctx, cluster, {"status": "Active"})
    LOG.info(g.format_cluster_status(cluster))
Beispiel #5
0
    def launch_instances(self, ctx, cluster, target_count):
        # create all instances
        cluster = conductor.cluster_update(ctx, cluster, {"status": self.STAGES[0]})
        LOG.info(g.format_cluster_status(cluster))

        tmpl = heat.ClusterTemplate(cluster)

        self._configure_template(ctx, tmpl, cluster, target_count)
        stack = tmpl.instantiate(update_existing=self.UPDATE_STACK)
        stack.wait_till_active()

        self.inst_ids = self._populate_cluster(ctx, cluster, stack)

        # wait for all instances are up and networks ready
        cluster = conductor.cluster_update(ctx, cluster, {"status": self.STAGES[1]})
        LOG.info(g.format_cluster_status(cluster))

        instances = g.get_instances(cluster, self.inst_ids)

        self._await_networks(cluster, instances)

        if not g.check_cluster_exists(cluster):
            LOG.info(g.format_cluster_deleted_message(cluster))
            return

        # prepare all instances
        cluster = conductor.cluster_update(ctx, cluster, {"status": self.STAGES[2]})
        LOG.info(g.format_cluster_status(cluster))

        instances = g.get_instances(cluster, self.inst_ids)
        volumes.mount_to_instances(instances)

        self._configure_instances(cluster)
Beispiel #6
0
    def create_cluster(self, cluster):
        ctx = context.ctx()

        launcher = _CreateLauncher()

        try:
            target_count = self._get_ng_counts(cluster)
            self._nullify_ng_counts(cluster)

            cluster = conductor.cluster_get(ctx, cluster)

            launcher.launch_instances(ctx, cluster, target_count)
        except Exception as ex:
            with excutils.save_and_reraise_exception():
                if not g.check_cluster_exists(cluster):
                    LOG.info(g.format_cluster_deleted_message(cluster))
                    return
                self._log_operation_exception(
                    "Can't start cluster '%s' (reason: %s)", cluster, ex)

                cluster = conductor.cluster_update(
                    ctx, cluster, {"status": "Error",
                                   "status_description": str(ex)})
                LOG.info(g.format_cluster_status(cluster))
                self._rollback_cluster_creation(cluster)
Beispiel #7
0
    def create_cluster(self, cluster):
        ctx = context.ctx()

        launcher = _CreateLauncher()

        try:
            target_count = self._get_ng_counts(cluster)
            self._nullify_ng_counts(cluster)

            cluster = conductor.cluster_get(ctx, cluster)
            launcher.launch_instances(ctx, cluster, target_count)

            cluster = conductor.cluster_get(ctx, cluster)
            self._add_volumes(ctx, cluster)

        except Exception as ex:
            with excutils.save_and_reraise_exception():
                if not g.check_cluster_exists(cluster):
                    LOG.info(g.format_cluster_deleted_message(cluster))
                    return
                self._log_operation_exception(
                    _LW("Can't start cluster '%(cluster)s' "
                        "(reason: %(reason)s)"), cluster, ex)

                cluster = g.change_cluster_status(
                    cluster, "Error", status_description=six.text_type(ex))
                self._rollback_cluster_creation(cluster)
Beispiel #8
0
def _provision_scaled_cluster(cluster_id, node_group_id_map):
    ctx, cluster, plugin = _prepare_provisioning(cluster_id)

    # Decommissioning surplus nodes with the plugin
    cluster = g.change_cluster_status(cluster, "Decommissioning")

    instances_to_delete = []

    for node_group in cluster.node_groups:
        new_count = node_group_id_map[node_group.id]
        if new_count < node_group.count:
            instances_to_delete += node_group.instances[new_count:
                                                        node_group.count]

    if instances_to_delete:
        plugin.decommission_nodes(cluster, instances_to_delete)

    # Scaling infrastructure
    cluster = g.change_cluster_status(cluster, "Scaling")

    instances = INFRA.scale_cluster(cluster, node_group_id_map)

    # Setting up new nodes with the plugin

    if instances:
        cluster = g.change_cluster_status(cluster, "Configuring")
        try:
            instances = g.get_instances(cluster, instances)
            plugin.scale_cluster(cluster, instances)
        except Exception as ex:
            if not g.check_cluster_exists(cluster):
                LOG.info(g.format_cluster_deleted_message(cluster))
                return
            LOG.exception(
                _LE("Can't scale cluster '%(name)s' (reason: %(reason)s)"),
                {'name': cluster.name, 'reason': ex})

            g.change_cluster_status(cluster, "Error")
            return

    if not g.check_cluster_exists(cluster):
        LOG.info(g.format_cluster_deleted_message(cluster))
        return

    g.change_cluster_status(cluster, "Active")
Beispiel #9
0
    def scale_cluster(self, cluster, target_count):
        ctx = context.ctx()

        rollback_count = self._get_ng_counts(cluster)

        launcher = _ScaleLauncher()

        try:
            launcher.launch_instances(ctx, cluster, target_count)
        except Exception as ex:
            with excutils.save_and_reraise_exception():
                if not g.check_cluster_exists(cluster):
                    LOG.info(g.format_cluster_deleted_message(cluster))
                    return
                self._log_operation_exception(
                    "Can't scale cluster '%s' (reason: %s)", cluster, ex)

                cluster = conductor.cluster_get(ctx, cluster)

                try:
                    self._rollback_cluster_scaling(
                        ctx, cluster, rollback_count, target_count)
                except Exception:
                    if not g.check_cluster_exists(cluster):
                        LOG.info(g.format_cluster_deleted_message(cluster))
                        return
                    # if something fails during the rollback, we stop
                    # doing anything further
                    cluster = conductor.cluster_update(ctx, cluster,
                                                       {"status": "Error"})
                    LOG.info(g.format_cluster_status(cluster))
                    LOG.error("Unable to complete rollback, aborting")
                    raise

                cluster = conductor.cluster_update(ctx, cluster,
                                                   {"status": "Active"})
                LOG.info(g.format_cluster_status(cluster))
                LOG.warn(
                    "Rollback successful. Throwing off an initial exception.")
        finally:
            cluster = conductor.cluster_get(ctx, cluster)
            g.clean_cluster_from_empty_ng(cluster)

        return launcher.inst_ids