예제 #1
0
    def cluster_update(self, context, cluster, rollback=False):
        LOG.debug('cluster_heat cluster_update')

        osc = clients.OpenStackClients(context)
        stack = osc.heat().stacks.get(cluster.stack_id)
        allow_update_status = (fields.ClusterStatus.CREATE_COMPLETE,
                               fields.ClusterStatus.UPDATE_COMPLETE,
                               fields.ClusterStatus.RESUME_COMPLETE,
                               fields.ClusterStatus.RESTORE_COMPLETE,
                               fields.ClusterStatus.ROLLBACK_COMPLETE,
                               fields.ClusterStatus.SNAPSHOT_COMPLETE,
                               fields.ClusterStatus.CHECK_COMPLETE,
                               fields.ClusterStatus.ADOPT_COMPLETE)
        if stack.stack_status not in allow_update_status:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE)
            operation = _('Updating a cluster when stack status is '
                          '"%s"') % stack.stack_status
            raise exception.NotSupported(operation=operation)

        delta = cluster.obj_what_changed()
        if not delta:
            return cluster

        manager = scale_manager.ScaleManager(context, osc, cluster)

        conductor_utils.notify_about_cluster_operation(
            context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_PENDING)

        _update_stack(context, osc, cluster, manager, rollback)
        self._poll_and_check(osc, cluster)

        return cluster
예제 #2
0
    def cluster_create(self, context, cluster, create_timeout):
        LOG.debug('cluster_heat cluster_create')

        osc = clients.OpenStackClients(context)

        try:
            # Create trustee/trust and set them to cluster
            trust_manager.create_trustee_and_trust(osc, cluster)
            # Generate certificate and set the cert reference to cluster
            cert_manager.generate_certificates_to_cluster(cluster,
                                                          context=context)
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_CREATE, taxonomy.OUTCOME_PENDING)
            created_stack = _create_stack(context, osc, cluster,
                                          create_timeout)
        except Exception as e:
            cluster.status = fields.ClusterStatus.CREATE_FAILED
            cluster.status_reason = six.text_type(e)
            cluster.create()
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_CREATE, taxonomy.OUTCOME_FAILURE)

            if isinstance(e, exc.HTTPBadRequest):
                e = exception.InvalidParameterValue(message=six.text_type(e))

                raise e
            raise

        cluster.stack_id = created_stack['stack']['id']
        cluster.status = fields.ClusterStatus.CREATE_IN_PROGRESS
        cluster.create()

        self._poll_and_check(osc, cluster)

        return cluster
예제 #3
0
 def update_status(self):
     LOG.debug("Updating status for cluster %s", self.cluster.id)
     # get the driver for the cluster
     cdriver = driver.Driver.get_driver_for_cluster(self.ctx, self.cluster)
     # ask the driver to sync status
     cdriver.update_cluster_status(self.ctx, self.cluster)
     LOG.debug("Status for cluster %s updated to %s (%s)", self.cluster.id,
               self.cluster.status, self.cluster.status_reason)
     # status update notifications
     if self.cluster.status.endswith("_COMPLETE"):
         conductor_utils.notify_about_cluster_operation(
             self.ctx, self.status_to_event[self.cluster.status],
             taxonomy.OUTCOME_SUCCESS, self.cluster)
     if self.cluster.status.endswith("_FAILED"):
         conductor_utils.notify_about_cluster_operation(
             self.ctx, self.status_to_event[self.cluster.status],
             taxonomy.OUTCOME_FAILURE, self.cluster)
     # if we're done with it, delete it
     if self.cluster.status == objects.fields.ClusterStatus.DELETE_COMPLETE:
         # delete all the nodegroups that belong to this cluster
         for ng in objects.NodeGroup.list(self.ctx, self.cluster.uuid):
             ng.destroy()
         self.cluster.destroy()
     # end the "loop"
     raise loopingcall.LoopingCallDone()
예제 #4
0
 def update_status(self):
     LOG.debug("Updating status for cluster %s", self.cluster.id)
     # get the driver for the cluster
     cdriver = driver.Driver.get_driver_for_cluster(self.ctx, self.cluster)
     # ask the driver to sync status
     cdriver.update_cluster_status(self.ctx, self.cluster)
     LOG.debug("Status for cluster %s updated to %s (%s)",
               self.cluster.id, self.cluster.status,
               self.cluster.status_reason)
     # status update notifications
     if self.cluster.status.endswith("_COMPLETE"):
         conductor_utils.notify_about_cluster_operation(
             self.ctx, self.status_to_event[self.cluster.status],
             taxonomy.OUTCOME_SUCCESS)
     if self.cluster.status.endswith("_FAILED"):
         conductor_utils.notify_about_cluster_operation(
             self.ctx, self.status_to_event[self.cluster.status],
             taxonomy.OUTCOME_FAILURE)
     # if we're done with it, delete it
     if self.cluster.status == objects.fields.ClusterStatus.DELETE_COMPLETE:
         # delete all the nodegroups that belong to this cluster
         for ng in objects.NodeGroup.list(self.ctx, self.cluster.uuid):
             ng.destroy()
         self.cluster.destroy()
     # end the "loop"
     raise loopingcall.LoopingCallDone()
예제 #5
0
    def cluster_create(self, context, cluster, master_count, node_count,
                       create_timeout):
        LOG.debug('cluster_heat cluster_create')

        osc = clients.OpenStackClients(context)

        cluster.status = fields.ClusterStatus.CREATE_IN_PROGRESS
        cluster.status_reason = None
        cluster.create()

        # Master nodegroup
        master_ng = conductor_utils._get_nodegroup_object(context,
                                                          cluster,
                                                          master_count,
                                                          is_master=True)
        master_ng.create()
        # Minion nodegroup
        minion_ng = conductor_utils._get_nodegroup_object(context,
                                                          cluster,
                                                          node_count,
                                                          is_master=False)
        minion_ng.create()

        try:
            # Create trustee/trust and set them to cluster
            trust_manager.create_trustee_and_trust(osc, cluster)
            # Generate certificate and set the cert reference to cluster
            cert_manager.generate_certificates_to_cluster(cluster,
                                                          context=context)
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_CREATE, taxonomy.OUTCOME_PENDING,
                cluster)
            # Get driver
            cluster_driver = driver.Driver.get_driver_for_cluster(
                context, cluster)
            # Create cluster
            cluster_driver.create_cluster(context, cluster, create_timeout)
            cluster.save()
            for ng in cluster.nodegroups:
                ng.stack_id = cluster.stack_id
                ng.save()

        except Exception as e:
            cluster.status = fields.ClusterStatus.CREATE_FAILED
            cluster.status_reason = six.text_type(e)
            cluster.save()
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_CREATE, taxonomy.OUTCOME_FAILURE,
                cluster)

            if isinstance(e, exc.HTTPBadRequest):
                e = exception.InvalidParameterValue(message=six.text_type(e))

                raise e
            raise

        return cluster
예제 #6
0
    def cluster_create(self, context, cluster, master_count, node_count,
                       create_timeout):
        LOG.debug('cluster_heat cluster_create')

        osc = clients.OpenStackClients(context)

        cluster.status = fields.ClusterStatus.CREATE_IN_PROGRESS
        cluster.status_reason = None
        cluster.create()

        # Master nodegroup
        master_ng = conductor_utils._get_nodegroup_object(
            context, cluster, master_count, is_master=True)
        master_ng.create()
        # Minion nodegroup
        minion_ng = conductor_utils._get_nodegroup_object(
            context, cluster, node_count, is_master=False)
        minion_ng.create()

        try:
            # Create trustee/trust and set them to cluster
            trust_manager.create_trustee_and_trust(osc, cluster)
            # Generate certificate and set the cert reference to cluster
            cert_manager.generate_certificates_to_cluster(cluster,
                                                          context=context)
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_CREATE, taxonomy.OUTCOME_PENDING)
            # Get driver
            cluster_driver = driver.Driver.get_driver_for_cluster(context,
                                                                  cluster)
            # Create cluster
            cluster_driver.create_cluster(context, cluster, create_timeout)
            cluster.save()

        except Exception as e:
            cluster.status = fields.ClusterStatus.CREATE_FAILED
            cluster.status_reason = six.text_type(e)
            cluster.save()
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_CREATE, taxonomy.OUTCOME_FAILURE)

            if isinstance(e, exc.HTTPBadRequest):
                e = exception.InvalidParameterValue(message=six.text_type(e))

                raise e
            raise

        return cluster
예제 #7
0
    def rotate_ca_certificate(self, context, cluster):
        LOG.info('start rotate_ca_certificate for cluster: %s', cluster.uuid)

        allow_update_status = (fields.ClusterStatus.CREATE_COMPLETE,
                               fields.ClusterStatus.UPDATE_COMPLETE,
                               fields.ClusterStatus.RESUME_COMPLETE,
                               fields.ClusterStatus.RESTORE_COMPLETE,
                               fields.ClusterStatus.ROLLBACK_COMPLETE,
                               fields.ClusterStatus.SNAPSHOT_COMPLETE,
                               fields.ClusterStatus.CHECK_COMPLETE,
                               fields.ClusterStatus.ADOPT_COMPLETE)
        if cluster.status not in allow_update_status:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE,
                cluster)
            operation = _('Updating a cluster when status is '
                          '"%s"') % cluster.status
            raise exception.NotSupported(operation=operation)

        try:
            # re-generate the ca certs
            cert_manager.generate_certificates_to_cluster(cluster,
                                                          context=context)
            cluster_driver = driver.Driver.get_driver_for_cluster(
                context, cluster)
            cluster_driver.rotate_ca_certificate(context, cluster)
            cluster.status = fields.ClusterStatus.UPDATE_IN_PROGRESS
            cluster.status_reason = None
        except Exception as e:
            cluster.status = fields.ClusterStatus.UPDATE_FAILED
            cluster.status_reason = six.text_type(e)
            cluster.save()
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE,
                cluster)
            if isinstance(e, exc.HTTPBadRequest):
                e = exception.InvalidParameterValue(message=six.text_type(e))
                raise e
            raise

        cluster.save()
        return cluster
예제 #8
0
    def cluster_update(self, context, cluster, rollback=False):
        LOG.debug('cluster_heat cluster_update')

        osc = clients.OpenStackClients(context)
        stack = osc.heat().stacks.get(cluster.stack_id)
        allow_update_status = (
            fields.ClusterStatus.CREATE_COMPLETE,
            fields.ClusterStatus.UPDATE_COMPLETE,
            fields.ClusterStatus.RESUME_COMPLETE,
            fields.ClusterStatus.RESTORE_COMPLETE,
            fields.ClusterStatus.ROLLBACK_COMPLETE,
            fields.ClusterStatus.SNAPSHOT_COMPLETE,
            fields.ClusterStatus.CHECK_COMPLETE,
            fields.ClusterStatus.ADOPT_COMPLETE
        )
        if stack.stack_status not in allow_update_status:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE)
            operation = _('Updating a cluster when stack status is '
                          '"%s"') % stack.stack_status
            raise exception.NotSupported(operation=operation)

        delta = cluster.obj_what_changed()
        if not delta:
            return cluster

        manager = scale_manager.get_scale_manager(context, osc, cluster)

        conductor_utils.notify_about_cluster_operation(
            context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_PENDING)

        # Get driver
        ct = conductor_utils.retrieve_cluster_template(context, cluster)
        cluster_driver = driver.Driver.get_driver(ct.server_type,
                                                  ct.cluster_distro,
                                                  ct.coe)
        # Create cluster
        cluster_driver.update_stack(context, osc, cluster, manager, rollback)
        self._poll_and_check(osc, cluster, cluster_driver)

        return cluster
예제 #9
0
    def cluster_create(self, context, cluster, create_timeout):
        LOG.debug('cluster_heat cluster_create')

        osc = clients.OpenStackClients(context)

        try:
            # Create trustee/trust and set them to cluster
            trust_manager.create_trustee_and_trust(osc, cluster)
            # Generate certificate and set the cert reference to cluster
            cert_manager.generate_certificates_to_cluster(cluster,
                                                          context=context)
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_CREATE, taxonomy.OUTCOME_PENDING)
            # Get driver
            ct = conductor_utils.retrieve_cluster_template(context, cluster)
            cluster_driver = driver.Driver.get_driver(ct.server_type,
                                                      ct.cluster_distro,
                                                      ct.coe)
            # Create cluster
            created_stack = cluster_driver.create_stack(context, osc, cluster,
                                                        create_timeout)
        except Exception as e:
            cluster.status = fields.ClusterStatus.CREATE_FAILED
            cluster.status_reason = six.text_type(e)
            cluster.create()
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_CREATE, taxonomy.OUTCOME_FAILURE)

            if isinstance(e, exc.HTTPBadRequest):
                e = exception.InvalidParameterValue(message=six.text_type(e))

                raise e
            raise

        cluster.stack_id = created_stack['stack']['id']
        cluster.status = fields.ClusterStatus.CREATE_IN_PROGRESS
        cluster.create()

        self._poll_and_check(osc, cluster, cluster_driver)

        return cluster
예제 #10
0
    def cluster_update(self, context, cluster, node_count, rollback=False):
        LOG.debug('cluster_heat cluster_update')

        osc = clients.OpenStackClients(context)
        allow_update_status = (fields.ClusterStatus.CREATE_COMPLETE,
                               fields.ClusterStatus.UPDATE_COMPLETE,
                               fields.ClusterStatus.RESUME_COMPLETE,
                               fields.ClusterStatus.RESTORE_COMPLETE,
                               fields.ClusterStatus.ROLLBACK_COMPLETE,
                               fields.ClusterStatus.SNAPSHOT_COMPLETE,
                               fields.ClusterStatus.CHECK_COMPLETE,
                               fields.ClusterStatus.ADOPT_COMPLETE)
        if cluster.status not in allow_update_status:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE)
            operation = _('Updating a cluster when status is '
                          '"%s"') % cluster.status
            raise exception.NotSupported(operation=operation)

        # Updates will be only reflected to the default worker
        # nodegroup.
        worker_ng = cluster.default_ng_worker
        if worker_ng.node_count == node_count:
            return
        # Backup the old node count so that we can restore it
        # in case of an exception.
        old_node_count = worker_ng.node_count

        manager = scale_manager.get_scale_manager(context, osc, cluster)

        # Get driver
        ct = conductor_utils.retrieve_cluster_template(context, cluster)
        cluster_driver = driver.Driver.get_driver(ct.server_type,
                                                  ct.cluster_distro, ct.coe)
        # Update cluster
        try:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_PENDING)
            worker_ng.node_count = node_count
            worker_ng.save()
            cluster_driver.update_cluster(context, cluster, manager, rollback)
            cluster.status = fields.ClusterStatus.UPDATE_IN_PROGRESS
            cluster.status_reason = None
        except Exception as e:
            cluster.status = fields.ClusterStatus.UPDATE_FAILED
            cluster.status_reason = six.text_type(e)
            cluster.save()
            # Restore the node_count
            worker_ng.node_count = old_node_count
            worker_ng.save()
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE)
            if isinstance(e, exc.HTTPBadRequest):
                e = exception.InvalidParameterValue(message=six.text_type(e))
                raise e
            raise

        cluster.save()
        return cluster
예제 #11
0
    def cluster_delete(self, context, uuid):
        LOG.debug('cluster_conductor cluster_delete')
        osc = clients.OpenStackClients(context)
        cluster = objects.Cluster.get_by_uuid(context, uuid)
        ct = conductor_utils.retrieve_cluster_template(context, cluster)
        cluster_driver = driver.Driver.get_driver(ct.server_type,
                                                  ct.cluster_distro, ct.coe)
        try:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_PENDING,
                cluster)
            cluster_driver.delete_cluster(context, cluster)
            cluster.status = fields.ClusterStatus.DELETE_IN_PROGRESS
            cluster.status_reason = None
        except exc.HTTPNotFound:
            LOG.info(
                'The cluster %s was not found during cluster'
                ' deletion.', cluster.id)
            try:
                trust_manager.delete_trustee_and_trust(osc, context, cluster)
                cert_manager.delete_certificates_from_cluster(cluster,
                                                              context=context)
                # delete all cluster's nodegroups
                for ng in cluster.nodegroups:
                    ng.destroy()
                cluster.destroy()
            except exception.ClusterNotFound:
                LOG.info('The cluster %s has been deleted by others.', uuid)
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_SUCCESS,
                cluster)
            return None
        except exc.HTTPConflict:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_FAILURE,
                cluster)
            raise exception.OperationInProgress(cluster_name=cluster.name)
        except Exception as unexp:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_FAILURE,
                cluster)
            cluster.status = fields.ClusterStatus.DELETE_FAILED
            cluster.status_reason = six.text_type(unexp)
            cluster.save()
            raise

        cluster.save()
        return None
예제 #12
0
    def cluster_upgrade(self,
                        context,
                        cluster,
                        cluster_template,
                        max_batch_size,
                        nodegroup,
                        rollback=False):
        LOG.debug('cluster_conductor cluster_upgrade')

        # osc = clients.OpenStackClients(context)
        allow_update_status = (fields.ClusterStatus.CREATE_COMPLETE,
                               fields.ClusterStatus.UPDATE_COMPLETE,
                               fields.ClusterStatus.RESUME_COMPLETE,
                               fields.ClusterStatus.RESTORE_COMPLETE,
                               fields.ClusterStatus.ROLLBACK_COMPLETE,
                               fields.ClusterStatus.SNAPSHOT_COMPLETE,
                               fields.ClusterStatus.CHECK_COMPLETE,
                               fields.ClusterStatus.ADOPT_COMPLETE)
        if cluster.status not in allow_update_status:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE,
                cluster)
            operation = _('Upgrading a cluster when status is '
                          '"%s"') % cluster.status
            raise exception.NotSupported(operation=operation)

        # Get driver
        ct = conductor_utils.retrieve_cluster_template(context, cluster)
        cluster_driver = driver.Driver.get_driver(ct.server_type,
                                                  ct.cluster_distro, ct.coe)
        # Upgrade cluster
        try:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_PENDING,
                cluster)
            cluster_driver.upgrade_cluster(context, cluster, cluster_template,
                                           max_batch_size, nodegroup, rollback)
            cluster.status = fields.ClusterStatus.UPDATE_IN_PROGRESS
            nodegroup.status = fields.ClusterStatus.UPDATE_IN_PROGRESS
            cluster.status_reason = None
        except Exception as e:
            cluster.status = fields.ClusterStatus.UPDATE_FAILED
            cluster.status_reason = six.text_type(e)
            cluster.save()
            nodegroup.status = fields.ClusterStatus.UPDATE_FAILED
            nodegroup.status_reason = six.text_type(e)
            nodegroup.save()
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE,
                cluster)
            if isinstance(e, exc.HTTPBadRequest):
                e = exception.InvalidParameterValue(message=six.text_type(e))
                raise e
            raise

        nodegroup.save()
        cluster.save()
        return cluster
예제 #13
0
    def cluster_delete(self, context, uuid):
        LOG.debug('cluster_heat cluster_delete')
        osc = clients.OpenStackClients(context)
        cluster = objects.Cluster.get_by_uuid(context, uuid)

        stack_id = cluster.stack_id
        # NOTE(sdake): This will execute a stack_delete operation.  This will
        # Ignore HTTPNotFound exceptions (stack wasn't present).  In the case
        # that Heat couldn't find the stack representing the cluster, likely a
        # user has deleted the stack outside the context of Magnum.  Therefore
        # the contents of the cluster are forever lost.
        #
        # If the exception is unhandled, the original exception will be raised.
        try:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_PENDING)
            osc.heat().stacks.delete(stack_id)
        except exc.HTTPNotFound:
            LOG.info(
                _LI('The stack %s was not found during cluster'
                    ' deletion.'), stack_id)
            try:
                trust_manager.delete_trustee_and_trust(osc, context, cluster)
                cert_manager.delete_certificates_from_cluster(cluster,
                                                              context=context)
                cluster.destroy()
            except exception.ClusterNotFound:
                LOG.info(_LI('The cluster %s has been deleted by others.'),
                         uuid)
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_SUCCESS)
            return None
        except exc.HTTPConflict:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_FAILURE)
            raise exception.OperationInProgress(cluster_name=cluster.name)
        except Exception:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_FAILURE)
            raise

        cluster.status = fields.ClusterStatus.DELETE_IN_PROGRESS
        cluster.save()

        self._poll_and_check(osc, cluster)

        return None
예제 #14
0
    def cluster_delete(self, context, uuid):
        LOG.debug('cluster_heat cluster_delete')
        osc = clients.OpenStackClients(context)
        cluster = objects.Cluster.get_by_uuid(context, uuid)

        stack_id = cluster.stack_id
        # NOTE(sdake): This will execute a stack_delete operation.  This will
        # Ignore HTTPNotFound exceptions (stack wasn't present).  In the case
        # that Heat couldn't find the stack representing the cluster, likely a
        # user has deleted the stack outside the context of Magnum.  Therefore
        # the contents of the cluster are forever lost.
        #
        # If the exception is unhandled, the original exception will be raised.
        try:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_PENDING)
            osc.heat().stacks.delete(stack_id)
        except exc.HTTPNotFound:
            LOG.info(_LI('The stack %s was not found during cluster'
                         ' deletion.'), stack_id)
            try:
                trust_manager.delete_trustee_and_trust(osc, context, cluster)
                cert_manager.delete_certificates_from_cluster(cluster,
                                                              context=context)
                cluster.destroy()
            except exception.ClusterNotFound:
                LOG.info(_LI('The cluster %s has been deleted by others.'),
                         uuid)
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_SUCCESS)
            return None
        except exc.HTTPConflict:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_FAILURE)
            raise exception.OperationInProgress(cluster_name=cluster.name)
        except Exception:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_FAILURE)
            raise

        cluster.status = fields.ClusterStatus.DELETE_IN_PROGRESS
        cluster.save()

        self._poll_and_check(osc, cluster)

        return None
예제 #15
0
    def cluster_delete(self, context, uuid):
        LOG.debug('cluster_conductor cluster_delete')
        osc = clients.OpenStackClients(context)
        cluster = objects.Cluster.get_by_uuid(context, uuid)
        ct = conductor_utils.retrieve_cluster_template(context, cluster)
        cluster_driver = driver.Driver.get_driver(ct.server_type,
                                                  ct.cluster_distro,
                                                  ct.coe)
        try:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_PENDING)
            cluster_driver.delete_cluster(context, cluster)
            cluster.status = fields.ClusterStatus.DELETE_IN_PROGRESS
            cluster.status_reason = None
        except exc.HTTPNotFound:
            LOG.info('The cluster %s was not found during cluster'
                     ' deletion.', cluster.id)
            try:
                trust_manager.delete_trustee_and_trust(osc, context, cluster)
                cert_manager.delete_certificates_from_cluster(cluster,
                                                              context=context)
                # delete all cluster's nodegroups
                for ng in cluster.nodegroups:
                    ng.destroy()
                cluster.destroy()
            except exception.ClusterNotFound:
                LOG.info('The cluster %s has been deleted by others.',
                         uuid)
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_SUCCESS)
            return None
        except exc.HTTPConflict:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_FAILURE)
            raise exception.OperationInProgress(cluster_name=cluster.name)
        except Exception as unexp:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_FAILURE)
            cluster.status = fields.ClusterStatus.DELETE_FAILED
            cluster.status_reason = six.text_type(unexp)
            cluster.save()
            raise

        cluster.save()
        return None
예제 #16
0
    def cluster_delete(self, context, uuid):
        LOG.debug('cluster_heat cluster_delete')
        osc = clients.OpenStackClients(context)
        cluster = objects.Cluster.get_by_uuid(context, uuid)
        ct = conductor_utils.retrieve_cluster_template(context, cluster)
        cluster_driver = driver.Driver.get_driver(ct.server_type,
                                                  ct.cluster_distro, ct.coe)

        try:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_PENDING)
            cluster_driver.delete_stack(context, osc, cluster)
        except exc.HTTPNotFound:
            LOG.info(
                _LI('The stack %s was not found during cluster'
                    ' deletion.'), cluster.stack_id)
            try:
                trust_manager.delete_trustee_and_trust(osc, context, cluster)
                cert_manager.delete_certificates_from_cluster(cluster,
                                                              context=context)
                cluster.destroy()
            except exception.ClusterNotFound:
                LOG.info(_LI('The cluster %s has been deleted by others.'),
                         uuid)
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_SUCCESS)
            return None
        except exc.HTTPConflict:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_FAILURE)
            raise exception.OperationInProgress(cluster_name=cluster.name)
        except Exception:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_FAILURE)
            raise

        cluster.status = fields.ClusterStatus.DELETE_IN_PROGRESS
        cluster.save()

        self._poll_and_check(osc, cluster, cluster_driver)

        return None
예제 #17
0
    def cluster_update(self, context, cluster, rollback=False):
        LOG.debug('cluster_heat cluster_update')

        osc = clients.OpenStackClients(context)
        allow_update_status = (
            fields.ClusterStatus.CREATE_COMPLETE,
            fields.ClusterStatus.UPDATE_COMPLETE,
            fields.ClusterStatus.RESUME_COMPLETE,
            fields.ClusterStatus.RESTORE_COMPLETE,
            fields.ClusterStatus.ROLLBACK_COMPLETE,
            fields.ClusterStatus.SNAPSHOT_COMPLETE,
            fields.ClusterStatus.CHECK_COMPLETE,
            fields.ClusterStatus.ADOPT_COMPLETE
        )
        if cluster.status not in allow_update_status:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE)
            operation = _('Updating a cluster when status is '
                          '"%s"') % cluster.status
            raise exception.NotSupported(operation=operation)

        delta = cluster.obj_what_changed()
        if not delta:
            return cluster

        manager = scale_manager.get_scale_manager(context, osc, cluster)

        # Get driver
        ct = conductor_utils.retrieve_cluster_template(context, cluster)
        cluster_driver = driver.Driver.get_driver(ct.server_type,
                                                  ct.cluster_distro,
                                                  ct.coe)
        # Update cluster
        try:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_PENDING)
            cluster_driver.update_cluster(context, cluster, manager, rollback)
            cluster.status = fields.ClusterStatus.UPDATE_IN_PROGRESS
            cluster.status_reason = None
        except Exception as e:
            cluster.status = fields.ClusterStatus.UPDATE_FAILED
            cluster.status_reason = six.text_type(e)
            cluster.save()
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE)
            if isinstance(e, exc.HTTPBadRequest):
                e = exception.InvalidParameterValue(message=six.text_type(e))
                raise e
            raise

        cluster.save()
        return cluster
예제 #18
0
    def cluster_resize(self, context, cluster, node_count, nodes_to_remove,
                       nodegroup):
        LOG.debug('cluster_conductor cluster_resize')

        osc = clients.OpenStackClients(context)
        # NOTE(flwang): One of important user cases of /resize API is
        # supporting the auto scaling action triggered by Kubernetes Cluster
        # Autoscaler, so there are 2 cases may happen:
        # 1. API could be triggered very offen
        # 2. Scale up or down may fail and we would like to offer the ability
        #    that recover the cluster to allow it being resized when last
        #    update failed.
        allow_update_status = (
            fields.ClusterStatus.CREATE_COMPLETE,
            fields.ClusterStatus.UPDATE_COMPLETE,
            fields.ClusterStatus.RESUME_COMPLETE,
            fields.ClusterStatus.RESTORE_COMPLETE,
            fields.ClusterStatus.ROLLBACK_COMPLETE,
            fields.ClusterStatus.SNAPSHOT_COMPLETE,
            fields.ClusterStatus.CHECK_COMPLETE,
            fields.ClusterStatus.ADOPT_COMPLETE,
            fields.ClusterStatus.UPDATE_FAILED,
            fields.ClusterStatus.UPDATE_IN_PROGRESS,
        )
        if cluster.status not in allow_update_status:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE,
                cluster)
            operation = _('Resizing a cluster when status is '
                          '"%s"') % cluster.status
            raise exception.NotSupported(operation=operation)

        resize_manager = scale_manager.get_scale_manager(context, osc, cluster)

        # Get driver
        ct = conductor_utils.retrieve_cluster_template(context, cluster)
        cluster_driver = driver.Driver.get_driver(ct.server_type,
                                                  ct.cluster_distro, ct.coe)
        # Backup the old node count so that we can restore it
        # in case of an exception.
        old_node_count = nodegroup.node_count

        # Resize cluster
        try:
            nodegroup.node_count = node_count
            nodegroup.status = fields.ClusterStatus.UPDATE_IN_PROGRESS
            nodegroup.save()
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_PENDING,
                cluster)
            cluster_driver.resize_cluster(context, cluster, resize_manager,
                                          node_count, nodes_to_remove,
                                          nodegroup)
            cluster.status = fields.ClusterStatus.UPDATE_IN_PROGRESS
            cluster.status_reason = None
        except Exception as e:
            cluster.status = fields.ClusterStatus.UPDATE_FAILED
            cluster.status_reason = six.text_type(e)
            cluster.save()
            nodegroup.node_count = old_node_count
            nodegroup.status = fields.ClusterStatus.UPDATE_FAILED
            nodegroup.status_reason = six.text_type(e)
            nodegroup.save()
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE,
                cluster)
            if isinstance(e, exc.HTTPBadRequest):
                e = exception.InvalidParameterValue(message=six.text_type(e))
                raise e
            raise

        cluster.save()
        return cluster
예제 #19
0
    def poll_and_check(self):
        # TODO(yuanying): temporary implementation to update api_address,
        # node_addresses and cluster status
        stack = self.openstack_client.heat().stacks.get(self.cluster.stack_id)
        self.attempts += 1
        status_to_event = {
            fields.ClusterStatus.DELETE_COMPLETE: taxonomy.ACTION_DELETE,
            fields.ClusterStatus.CREATE_COMPLETE: taxonomy.ACTION_CREATE,
            fields.ClusterStatus.UPDATE_COMPLETE: taxonomy.ACTION_UPDATE,
            fields.ClusterStatus.ROLLBACK_COMPLETE: taxonomy.ACTION_UPDATE,
            fields.ClusterStatus.CREATE_FAILED: taxonomy.ACTION_CREATE,
            fields.ClusterStatus.DELETE_FAILED: taxonomy.ACTION_DELETE,
            fields.ClusterStatus.UPDATE_FAILED: taxonomy.ACTION_UPDATE,
            fields.ClusterStatus.ROLLBACK_FAILED: taxonomy.ACTION_UPDATE
        }
        # poll_and_check is detached and polling long time to check status,
        # so another user/client can call delete cluster/stack.
        if stack.stack_status == fields.ClusterStatus.DELETE_COMPLETE:
            self._delete_complete()
            conductor_utils.notify_about_cluster_operation(
                self.context, status_to_event[stack.stack_status],
                taxonomy.OUTCOME_SUCCESS)
            raise loopingcall.LoopingCallDone()

        if stack.stack_status in (fields.ClusterStatus.CREATE_COMPLETE,
                                  fields.ClusterStatus.UPDATE_COMPLETE):
            self._sync_cluster_and_template_status(stack)
            conductor_utils.notify_about_cluster_operation(
                self.context, status_to_event[stack.stack_status],
                taxonomy.OUTCOME_SUCCESS)
            raise loopingcall.LoopingCallDone()
        elif stack.stack_status != self.cluster.status:
            self._sync_cluster_status(stack)

        if stack.stack_status in (fields.ClusterStatus.CREATE_FAILED,
                                  fields.ClusterStatus.DELETE_FAILED,
                                  fields.ClusterStatus.UPDATE_FAILED,
                                  fields.ClusterStatus.ROLLBACK_COMPLETE,
                                  fields.ClusterStatus.ROLLBACK_FAILED):
            self._sync_cluster_and_template_status(stack)
            self._cluster_failed(stack)
            conductor_utils.notify_about_cluster_operation(
                self.context, status_to_event[stack.stack_status],
                taxonomy.OUTCOME_FAILURE)
            raise loopingcall.LoopingCallDone()
        # only check max attempts when the stack is being created when
        # the timeout hasn't been set. If the timeout has been set then
        # the loop will end when the stack completes or the timeout occurs
        if stack.stack_status == fields.ClusterStatus.CREATE_IN_PROGRESS:
            if (stack.timeout_mins is None
                    and self.attempts > CONF.cluster_heat.max_attempts):
                LOG.error(
                    _LE('Cluster check exit after %(attempts)s attempts,'
                        'stack_id: %(id)s, stack_status: %(status)s') % {
                            'attempts': CONF.cluster_heat.max_attempts,
                            'id': self.cluster.stack_id,
                            'status': stack.stack_status
                        })
                raise loopingcall.LoopingCallDone()
        else:
            if self.attempts > CONF.cluster_heat.max_attempts:
                LOG.error(
                    _LE('Cluster check exit after %(attempts)s attempts,'
                        'stack_id: %(id)s, stack_status: %(status)s') % {
                            'attempts': CONF.cluster_heat.max_attempts,
                            'id': self.cluster.stack_id,
                            'status': stack.stack_status
                        })
                raise loopingcall.LoopingCallDone()
예제 #20
0
    def cluster_resize(self, context, cluster,
                       node_count, nodes_to_remove, nodegroup):
        LOG.debug('cluster_conductor cluster_resize')

        osc = clients.OpenStackClients(context)
        # NOTE(flwang): One of important user cases of /resize API is
        # supporting the auto scaling action triggered by Kubernetes Cluster
        # Autoscaler, so there are 2 cases may happen:
        # 1. API could be triggered very offen
        # 2. Scale up or down may fail and we would like to offer the ability
        #    that recover the cluster to allow it being resized when last
        #    update failed.
        allow_update_status = (
            fields.ClusterStatus.CREATE_COMPLETE,
            fields.ClusterStatus.UPDATE_COMPLETE,
            fields.ClusterStatus.RESUME_COMPLETE,
            fields.ClusterStatus.RESTORE_COMPLETE,
            fields.ClusterStatus.ROLLBACK_COMPLETE,
            fields.ClusterStatus.SNAPSHOT_COMPLETE,
            fields.ClusterStatus.CHECK_COMPLETE,
            fields.ClusterStatus.ADOPT_COMPLETE,
            fields.ClusterStatus.UPDATE_FAILED,
            fields.ClusterStatus.UPDATE_IN_PROGRESS,
        )
        if cluster.status not in allow_update_status:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE)
            operation = _('Resizing a cluster when status is '
                          '"%s"') % cluster.status
            raise exception.NotSupported(operation=operation)

        resize_manager = scale_manager.get_scale_manager(context, osc, cluster)

        # Get driver
        ct = conductor_utils.retrieve_cluster_template(context, cluster)
        cluster_driver = driver.Driver.get_driver(ct.server_type,
                                                  ct.cluster_distro,
                                                  ct.coe)
        # Backup the old node count so that we can restore it
        # in case of an exception.
        old_node_count = nodegroup.node_count

        # Resize cluster
        try:
            nodegroup.node_count = node_count
            nodegroup.save()
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_PENDING)
            cluster_driver.resize_cluster(context, cluster, resize_manager,
                                          node_count, nodes_to_remove,
                                          nodegroup)
            cluster.status = fields.ClusterStatus.UPDATE_IN_PROGRESS
            cluster.status_reason = None
        except Exception as e:
            cluster.status = fields.ClusterStatus.UPDATE_FAILED
            cluster.status_reason = six.text_type(e)
            cluster.save()
            nodegroup.node_count = old_node_count
            nodegroup.save()
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE)
            if isinstance(e, exc.HTTPBadRequest):
                e = exception.InvalidParameterValue(message=six.text_type(e))
                raise e
            raise

        cluster.save()
        return cluster
예제 #21
0
    def poll_and_check(self):
        # TODO(yuanying): temporary implementation to update api_address,
        # node_addresses and cluster status
        stack = self.openstack_client.heat().stacks.get(self.cluster.stack_id)
        self.attempts += 1
        status_to_event = {
            fields.ClusterStatus.DELETE_COMPLETE: taxonomy.ACTION_DELETE,
            fields.ClusterStatus.CREATE_COMPLETE: taxonomy.ACTION_CREATE,
            fields.ClusterStatus.UPDATE_COMPLETE: taxonomy.ACTION_UPDATE,
            fields.ClusterStatus.ROLLBACK_COMPLETE: taxonomy.ACTION_UPDATE,
            fields.ClusterStatus.CREATE_FAILED: taxonomy.ACTION_CREATE,
            fields.ClusterStatus.DELETE_FAILED: taxonomy.ACTION_DELETE,
            fields.ClusterStatus.UPDATE_FAILED: taxonomy.ACTION_UPDATE,
            fields.ClusterStatus.ROLLBACK_FAILED: taxonomy.ACTION_UPDATE
        }
        # poll_and_check is detached and polling long time to check status,
        # so another user/client can call delete cluster/stack.
        if stack.stack_status == fields.ClusterStatus.DELETE_COMPLETE:
            self._delete_complete()
            conductor_utils.notify_about_cluster_operation(
                self.context, status_to_event[stack.stack_status],
                taxonomy.OUTCOME_SUCCESS)
            raise loopingcall.LoopingCallDone()

        if stack.stack_status in (fields.ClusterStatus.CREATE_COMPLETE,
                                  fields.ClusterStatus.UPDATE_COMPLETE):
            self._sync_cluster_and_template_status(stack)
            conductor_utils.notify_about_cluster_operation(
                self.context, status_to_event[stack.stack_status],
                taxonomy.OUTCOME_SUCCESS)
            raise loopingcall.LoopingCallDone()
        elif stack.stack_status != self.cluster.status:
            self._sync_cluster_status(stack)

        if stack.stack_status in (fields.ClusterStatus.CREATE_FAILED,
                                  fields.ClusterStatus.DELETE_FAILED,
                                  fields.ClusterStatus.UPDATE_FAILED,
                                  fields.ClusterStatus.ROLLBACK_COMPLETE,
                                  fields.ClusterStatus.ROLLBACK_FAILED):
            self._sync_cluster_and_template_status(stack)
            self._cluster_failed(stack)
            conductor_utils.notify_about_cluster_operation(
                self.context, status_to_event[stack.stack_status],
                taxonomy.OUTCOME_FAILURE)
            raise loopingcall.LoopingCallDone()
        # only check max attempts when the stack is being created when
        # the timeout hasn't been set. If the timeout has been set then
        # the loop will end when the stack completes or the timeout occurs
        if stack.stack_status == fields.ClusterStatus.CREATE_IN_PROGRESS:
            if (stack.timeout_mins is None and
               self.attempts > CONF.cluster_heat.max_attempts):
                LOG.error(_LE('Cluster check exit after %(attempts)s attempts,'
                              'stack_id: %(id)s, stack_status: %(status)s') %
                          {'attempts': CONF.cluster_heat.max_attempts,
                           'id': self.cluster.stack_id,
                           'status': stack.stack_status})
                raise loopingcall.LoopingCallDone()
        else:
            if self.attempts > CONF.cluster_heat.max_attempts:
                LOG.error(_LE('Cluster check exit after %(attempts)s attempts,'
                              'stack_id: %(id)s, stack_status: %(status)s') %
                          {'attempts': CONF.cluster_heat.max_attempts,
                           'id': self.cluster.stack_id,
                           'status': stack.stack_status})
                raise loopingcall.LoopingCallDone()
예제 #22
0
    def cluster_update(self, context, cluster, node_count, rollback=False):
        LOG.debug('cluster_heat cluster_update')

        osc = clients.OpenStackClients(context)
        allow_update_status = (
            fields.ClusterStatus.CREATE_COMPLETE,
            fields.ClusterStatus.UPDATE_COMPLETE,
            fields.ClusterStatus.RESUME_COMPLETE,
            fields.ClusterStatus.RESTORE_COMPLETE,
            fields.ClusterStatus.ROLLBACK_COMPLETE,
            fields.ClusterStatus.SNAPSHOT_COMPLETE,
            fields.ClusterStatus.CHECK_COMPLETE,
            fields.ClusterStatus.ADOPT_COMPLETE
        )
        if cluster.status not in allow_update_status:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE)
            operation = _('Updating a cluster when status is '
                          '"%s"') % cluster.status
            raise exception.NotSupported(operation=operation)

        # Updates will be only reflected to the default worker
        # nodegroup.
        worker_ng = cluster.default_ng_worker
        if worker_ng.node_count == node_count:
            return
        # Backup the old node count so that we can restore it
        # in case of an exception.
        old_node_count = worker_ng.node_count

        manager = scale_manager.get_scale_manager(context, osc, cluster)

        # Get driver
        ct = conductor_utils.retrieve_cluster_template(context, cluster)
        cluster_driver = driver.Driver.get_driver(ct.server_type,
                                                  ct.cluster_distro,
                                                  ct.coe)
        # Update cluster
        try:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_PENDING)
            worker_ng.node_count = node_count
            worker_ng.save()
            cluster_driver.update_cluster(context, cluster, manager, rollback)
            cluster.status = fields.ClusterStatus.UPDATE_IN_PROGRESS
            cluster.status_reason = None
        except Exception as e:
            cluster.status = fields.ClusterStatus.UPDATE_FAILED
            cluster.status_reason = six.text_type(e)
            cluster.save()
            # Restore the node_count
            worker_ng.node_count = old_node_count
            worker_ng.save()
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE)
            if isinstance(e, exc.HTTPBadRequest):
                e = exception.InvalidParameterValue(message=six.text_type(e))
                raise e
            raise

        cluster.save()
        return cluster