Esempio n. 1
0
    def test_retrieve_cluster_template(self,
                                       mock_cluster_template_get_by_uuid):
        expected_context = 'context'
        expected_cluster_template_uuid = 'ClusterTemplate_uuid'

        cluster = objects.Cluster({})
        cluster.cluster_template_id = expected_cluster_template_uuid

        utils.retrieve_cluster_template(expected_context, cluster)

        mock_cluster_template_get_by_uuid.assert_called_once_with(
            expected_context, expected_cluster_template_uuid)
Esempio n. 2
0
    def test_retrieve_cluster_template(self,
                                       mock_cluster_template_get_by_uuid):
        expected_context = 'context'
        expected_cluster_template_uuid = 'ClusterTemplate_uuid'

        cluster = objects.Cluster({})
        cluster.cluster_template_id = expected_cluster_template_uuid

        utils.retrieve_cluster_template(expected_context, cluster)

        mock_cluster_template_get_by_uuid.assert_called_once_with(
            expected_context,
            expected_cluster_template_uuid)
Esempio n. 3
0
    def test_retrieve_cluster_template(self,
                                       mock_cluster_template_get_by_uuid):
        expected_context = 'context'
        expected_cluster_template_uuid = 'ClusterTemplate_uuid'

        bay = objects.Bay({})
        bay.baymodel_id = expected_cluster_template_uuid

        utils.retrieve_cluster_template(expected_context, bay)

        mock_cluster_template_get_by_uuid.assert_called_once_with(
            expected_context,
            expected_cluster_template_uuid)
Esempio n. 4
0
def docker_for_cluster(context, cluster):
    cluster_template = conductor_utils.retrieve_cluster_template(
        context, cluster)

    ca_cert, magnum_key, magnum_cert = None, None, None
    client_kwargs = dict()
    if not cluster_template.tls_disabled:
        (ca_cert, magnum_key,
         magnum_cert) = cert_manager.create_client_files(cluster)
        client_kwargs['ca_cert'] = ca_cert.name
        client_kwargs['client_key'] = magnum_key.name
        client_kwargs['client_cert'] = magnum_cert.name

    yield DockerHTTPClient(
        cluster.api_address,
        CONF.docker.docker_remote_api_version,
        CONF.docker.default_timeout,
        **client_kwargs
    )

    if ca_cert:
        ca_cert.close()
    if magnum_key:
        magnum_key.close()
    if magnum_cert:
        magnum_cert.close()
Esempio n. 5
0
def docker_for_cluster(context, cluster):
    cluster_template = conductor_utils.retrieve_cluster_template(
        context, cluster)

    ca_cert, magnum_key, magnum_cert = None, None, None
    client_kwargs = dict()
    if not cluster_template.tls_disabled:
        (ca_cert, magnum_key,
         magnum_cert) = cert_manager.create_client_files(cluster, context)
        client_kwargs['ca_cert'] = ca_cert.name
        client_kwargs['client_key'] = magnum_key.name
        client_kwargs['client_cert'] = magnum_cert.name

    yield DockerHTTPClient(
        cluster.api_address,
        CONF.docker.docker_remote_api_version,
        CONF.docker.default_timeout,
        **client_kwargs
    )

    if ca_cert:
        ca_cert.close()
    if magnum_key:
        magnum_key.close()
    if magnum_cert:
        magnum_cert.close()
Esempio n. 6
0
 def __init__(self, openstack_client, context, cluster, cluster_driver):
     self.openstack_client = openstack_client
     self.context = context
     self.cluster = cluster
     self.cluster_template = conductor_utils.retrieve_cluster_template(
         self.context, cluster)
     self.template_def = cluster_driver.get_template_definition()
Esempio n. 7
0
 def __init__(self, openstack_client, context, cluster, cluster_driver):
     self.openstack_client = openstack_client
     self.context = context
     self.cluster = cluster
     self.cluster_template = conductor_utils.retrieve_cluster_template(
         self.context, cluster)
     self.template_def = cluster_driver.get_template_definition()
Esempio n. 8
0
    def rotate_ca_certificate(self, context, cluster):
        cluster_template = conductor_utils.retrieve_cluster_template(context,
                                                                     cluster)
        if cluster_template.cluster_distro not in ["fedora-coreos"]:
            raise exception.NotSupported("Rotating the CA certificate is "
                                         "not supported for cluster with "
                                         "cluster_distro: %s." %
                                         cluster_template.cluster_distro)
        osc = clients.OpenStackClients(context)
        rollback = True
        heat_params = {}

        csr_keys = x509.generate_csr_and_key(u"Kubernetes Service Account")

        heat_params['kube_service_account_key'] = \
            csr_keys["public_key"].replace("\n", "\\n")
        heat_params['kube_service_account_private_key'] = \
            csr_keys["private_key"].replace("\n", "\\n")

        fields = {
            'existing': True,
            'parameters': heat_params,
            'disable_rollback': not rollback
        }
        osc.heat().stacks.update(cluster.stack_id, **fields)
Esempio n. 9
0
    def cluster_update(self, context, cluster, node_count, rollback=False):
        LOG.debug('cluster_heat cluster_update')

        osc = clients.OpenStackClients(context)
        allow_update_status = (fields.ClusterStatus.CREATE_COMPLETE,
                               fields.ClusterStatus.UPDATE_COMPLETE,
                               fields.ClusterStatus.RESUME_COMPLETE,
                               fields.ClusterStatus.RESTORE_COMPLETE,
                               fields.ClusterStatus.ROLLBACK_COMPLETE,
                               fields.ClusterStatus.SNAPSHOT_COMPLETE,
                               fields.ClusterStatus.CHECK_COMPLETE,
                               fields.ClusterStatus.ADOPT_COMPLETE)
        if cluster.status not in allow_update_status:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE)
            operation = _('Updating a cluster when status is '
                          '"%s"') % cluster.status
            raise exception.NotSupported(operation=operation)

        # Updates will be only reflected to the default worker
        # nodegroup.
        worker_ng = cluster.default_ng_worker
        if worker_ng.node_count == node_count:
            return
        # Backup the old node count so that we can restore it
        # in case of an exception.
        old_node_count = worker_ng.node_count

        manager = scale_manager.get_scale_manager(context, osc, cluster)

        # Get driver
        ct = conductor_utils.retrieve_cluster_template(context, cluster)
        cluster_driver = driver.Driver.get_driver(ct.server_type,
                                                  ct.cluster_distro, ct.coe)
        # Update cluster
        try:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_PENDING)
            worker_ng.node_count = node_count
            worker_ng.save()
            cluster_driver.update_cluster(context, cluster, manager, rollback)
            cluster.status = fields.ClusterStatus.UPDATE_IN_PROGRESS
            cluster.status_reason = None
        except Exception as e:
            cluster.status = fields.ClusterStatus.UPDATE_FAILED
            cluster.status_reason = six.text_type(e)
            cluster.save()
            # Restore the node_count
            worker_ng.node_count = old_node_count
            worker_ng.save()
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE)
            if isinstance(e, exc.HTTPBadRequest):
                e = exception.InvalidParameterValue(message=six.text_type(e))
                raise e
            raise

        cluster.save()
        return cluster
Esempio n. 10
0
 def _extract_template_definition(self, context, cluster,
                                  scale_manager=None):
     cluster_template = conductor_utils.retrieve_cluster_template(context,
                                                                  cluster)
     definition = self.get_template_definition()
     return definition.extract_definition(context, cluster_template,
                                          cluster,
                                          scale_manager=scale_manager)
Esempio n. 11
0
 def _extract_template_definition(self, context, cluster,
                                  scale_manager=None):
     cluster_template = conductor_utils.retrieve_cluster_template(context,
                                                                  cluster)
     definition = self.get_template_definition()
     return definition.extract_definition(context, cluster_template,
                                          cluster,
                                          scale_manager=scale_manager)
Esempio n. 12
0
    def cluster_upgrade(self,
                        context,
                        cluster,
                        cluster_template,
                        max_batch_size,
                        nodegroup,
                        rollback=False):
        LOG.debug('cluster_conductor cluster_upgrade')

        # osc = clients.OpenStackClients(context)
        allow_update_status = (fields.ClusterStatus.CREATE_COMPLETE,
                               fields.ClusterStatus.UPDATE_COMPLETE,
                               fields.ClusterStatus.RESUME_COMPLETE,
                               fields.ClusterStatus.RESTORE_COMPLETE,
                               fields.ClusterStatus.ROLLBACK_COMPLETE,
                               fields.ClusterStatus.SNAPSHOT_COMPLETE,
                               fields.ClusterStatus.CHECK_COMPLETE,
                               fields.ClusterStatus.ADOPT_COMPLETE)
        if cluster.status not in allow_update_status:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE,
                cluster)
            operation = _('Upgrading a cluster when status is '
                          '"%s"') % cluster.status
            raise exception.NotSupported(operation=operation)

        # Get driver
        ct = conductor_utils.retrieve_cluster_template(context, cluster)
        cluster_driver = driver.Driver.get_driver(ct.server_type,
                                                  ct.cluster_distro, ct.coe)
        # Upgrade cluster
        try:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_PENDING,
                cluster)
            cluster_driver.upgrade_cluster(context, cluster, cluster_template,
                                           max_batch_size, nodegroup, rollback)
            cluster.status = fields.ClusterStatus.UPDATE_IN_PROGRESS
            nodegroup.status = fields.ClusterStatus.UPDATE_IN_PROGRESS
            cluster.status_reason = None
        except Exception as e:
            cluster.status = fields.ClusterStatus.UPDATE_FAILED
            cluster.status_reason = six.text_type(e)
            cluster.save()
            nodegroup.status = fields.ClusterStatus.UPDATE_FAILED
            nodegroup.status_reason = six.text_type(e)
            nodegroup.save()
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE,
                cluster)
            if isinstance(e, exc.HTTPBadRequest):
                e = exception.InvalidParameterValue(message=six.text_type(e))
                raise e
            raise

        nodegroup.save()
        cluster.save()
        return cluster
Esempio n. 13
0
def _extract_template_definition(context, cluster, scale_manager=None):
    cluster_template = conductor_utils.retrieve_cluster_template(context,
                                                                 cluster)
    cluster_driver = Driver().get_driver(cluster_template.server_type,
                                         cluster_template.cluster_distro,
                                         cluster_template.coe)
    definition = cluster_driver.get_template_definition()
    return definition.extract_definition(context, cluster_template, cluster,
                                         scale_manager=scale_manager)
Esempio n. 14
0
 def __init__(self, openstack_client, bay):
     self.openstack_client = openstack_client
     self.context = self.openstack_client.context
     self.bay = bay
     self.attempts = 0
     self.cluster_template = conductor_utils.retrieve_cluster_template(
         self.context, bay)
     self.template_def = TDef.get_template_definition(
         self.cluster_template.server_type,
         self.cluster_template.cluster_distro, self.cluster_template.coe)
Esempio n. 15
0
def _extract_template_definition(context, bay, scale_manager=None):
    cluster_template = conductor_utils.retrieve_cluster_template(context, bay)
    cluster_distro = cluster_template.cluster_distro
    cluster_coe = cluster_template.coe
    cluster_server_type = cluster_template.server_type
    definition = TDef.get_template_definition(cluster_server_type,
                                              cluster_distro,
                                              cluster_coe)
    return definition.extract_definition(context, cluster_template, bay,
                                         scale_manager=scale_manager)
Esempio n. 16
0
    def _install_addons(self, cluster, cluster_kubectl, context):
        """Install add-on services.

        Including Calico, kube-proxy, CoreDNS
        """
        LOG.info("Starting to install add-ons for cluster %s", cluster.uuid)

        # Add initializing tag for the new cluster.
        tag_template = self.jinja_env.get_template('addon_tag.yaml.j2')
        tag_body = tag_template.render(
            {'namespace': cluster.uuid, 'status': 'initializing'}
        )
        self.kubectl.apply(definition=tag_body)

        cluster_template = conductor_utils.retrieve_cluster_template(
            context, cluster
        )
        if cluster_template.network_driver == 'flannel':
            cluser_pod_ip_range = cluster.labels.get(
                'flannel_network_cidr', '10.100.0.0/16'
            )
        if cluster_template.network_driver == 'calico':
            cluser_pod_ip_range = cluster.labels.get(
                'calico_ipv4pool', '192.168.0.0/16'
            )

        cluser_service_ip_range = cluster.labels.get(
            'service_cluster_ip_range', '10.97.0.0/16'
        )
        service_ip_net = netaddr.IPNetwork(cluser_service_ip_range)
        cluster_dns_service_ip = service_ip_net[10]
        params = {
            'apiserver_address': cluster.master_addresses[0],
            'cluster_id': cluster.uuid,
            'pod_ip_range': cluser_pod_ip_range,
            'cluster_dns_service_ip': cluster_dns_service_ip,
            "kube_version": cluster.labels.get("kube_tag", "v1.14.3"),
        }

        LOG.info(
            'Installing calico, proxy, coredns for cluster %s',
            cluster.uuid
        )
        for t in ['calico_node_rbac.yaml.j2', 'calico.yaml.j2',
                  'kube-proxy.yaml.j2', 'coredns.yaml.j2']:
            template = self.jinja_env.get_template(t)
            body = template.render(params)
            cluster_kubectl.apply(definition=body)

        # Add initialized tag for the new cluster.
        tag_template = self.jinja_env.get_template('addon_tag.yaml.j2')
        tag_body = tag_template.render(
            {'namespace': cluster.uuid, 'status': 'initialized'}
        )
        self.kubectl.apply(definition=tag_body)
Esempio n. 17
0
    def cluster_update(self, context, cluster, rollback=False):
        LOG.debug('cluster_heat cluster_update')

        osc = clients.OpenStackClients(context)
        allow_update_status = (
            fields.ClusterStatus.CREATE_COMPLETE,
            fields.ClusterStatus.UPDATE_COMPLETE,
            fields.ClusterStatus.RESUME_COMPLETE,
            fields.ClusterStatus.RESTORE_COMPLETE,
            fields.ClusterStatus.ROLLBACK_COMPLETE,
            fields.ClusterStatus.SNAPSHOT_COMPLETE,
            fields.ClusterStatus.CHECK_COMPLETE,
            fields.ClusterStatus.ADOPT_COMPLETE
        )
        if cluster.status not in allow_update_status:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE)
            operation = _('Updating a cluster when status is '
                          '"%s"') % cluster.status
            raise exception.NotSupported(operation=operation)

        delta = cluster.obj_what_changed()
        if not delta:
            return cluster

        manager = scale_manager.get_scale_manager(context, osc, cluster)

        # Get driver
        ct = conductor_utils.retrieve_cluster_template(context, cluster)
        cluster_driver = driver.Driver.get_driver(ct.server_type,
                                                  ct.cluster_distro,
                                                  ct.coe)
        # Update cluster
        try:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_PENDING)
            cluster_driver.update_cluster(context, cluster, manager, rollback)
            cluster.status = fields.ClusterStatus.UPDATE_IN_PROGRESS
            cluster.status_reason = None
        except Exception as e:
            cluster.status = fields.ClusterStatus.UPDATE_FAILED
            cluster.status_reason = six.text_type(e)
            cluster.save()
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE)
            if isinstance(e, exc.HTTPBadRequest):
                e = exception.InvalidParameterValue(message=six.text_type(e))
                raise e
            raise

        cluster.save()
        return cluster
Esempio n. 18
0
    def delete_cluster(self, context, cluster):
        LOG.info("Starting to delete cluster %s", cluster.uuid)

        self.pre_delete_cluster(context, cluster)

        c_template = conductor_utils.retrieve_cluster_template(
            context,
            cluster
        )

        # NOTE: The fake fields are only for the yaml file integrity and do not
        # affect the deletion
        params = {
            "namespace": cluster.uuid,
            "cloud_provider_tag": "fake",
            "kube_version": "fake",
        }
        _delete_manifest = functools.partial(self._delete_manifest, params)

        LOG.info("Deleting components for cluster %s", cluster.uuid)
        for tmpl in [
            "openstack-cloud-controller-manager.yaml.j2",
            "kube-scheduler.yaml.j2", "kube-controllermgr.yaml.j2",
            "kube-apiserver.yaml.j2", "etcd.yaml.j2",
            "secrets.yaml.j2", "namespace.yaml.j2"
        ]:
            _delete_manifest(tmpl)

        # Delete floating ip if needed.
        if (self._master_lb_fip_enabled(cluster, c_template) and
                cluster.api_address):
            network_client = clients.OpenStackClients(context).neutron()
            ip = netutils.urlsplit(cluster.api_address).netloc.split(":")[0]
            fips = network_client.list_floatingips(floating_ip_address=ip)
            for fip in fips['floatingips']:
                LOG.info("Deleting floating ip %s for cluster %s",
                         fip["floating_ip_address"], cluster.uuid)
                network_client.delete_floatingip(fip['id'])

        # Delete VIP port
        LOG.info("Deleting ports for cluster %s", cluster.uuid)
        tag = {"magnum": cluster.uuid}
        tags = [jsonutils.dumps(tag)]
        neutron.delete_port_by_tags(context, tags)

        # Delete Heat stack.
        if cluster.stack_id:
            LOG.info("Deleting Heat stack %s for cluster %s",
                     cluster.stack_id, cluster.uuid)
            self._delete_stack(
                context, clients.OpenStackClients(context), cluster
            )
Esempio n. 19
0
    def cluster_delete(self, context, uuid):
        LOG.debug('cluster_conductor cluster_delete')
        osc = clients.OpenStackClients(context)
        cluster = objects.Cluster.get_by_uuid(context, uuid)
        ct = conductor_utils.retrieve_cluster_template(context, cluster)
        cluster_driver = driver.Driver.get_driver(ct.server_type,
                                                  ct.cluster_distro, ct.coe)
        try:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_PENDING,
                cluster)
            cluster_driver.delete_cluster(context, cluster)
            cluster.status = fields.ClusterStatus.DELETE_IN_PROGRESS
            cluster.status_reason = None
        except exc.HTTPNotFound:
            LOG.info(
                'The cluster %s was not found during cluster'
                ' deletion.', cluster.id)
            try:
                trust_manager.delete_trustee_and_trust(osc, context, cluster)
                cert_manager.delete_certificates_from_cluster(cluster,
                                                              context=context)
                # delete all cluster's nodegroups
                for ng in cluster.nodegroups:
                    ng.destroy()
                cluster.destroy()
            except exception.ClusterNotFound:
                LOG.info('The cluster %s has been deleted by others.', uuid)
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_SUCCESS,
                cluster)
            return None
        except exc.HTTPConflict:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_FAILURE,
                cluster)
            raise exception.OperationInProgress(cluster_name=cluster.name)
        except Exception as unexp:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_FAILURE,
                cluster)
            cluster.status = fields.ClusterStatus.DELETE_FAILED
            cluster.status_reason = six.text_type(unexp)
            cluster.save()
            raise

        cluster.save()
        return None
Esempio n. 20
0
    def cluster_delete(self, context, uuid):
        LOG.debug('cluster_conductor cluster_delete')
        osc = clients.OpenStackClients(context)
        cluster = objects.Cluster.get_by_uuid(context, uuid)
        ct = conductor_utils.retrieve_cluster_template(context, cluster)
        cluster_driver = driver.Driver.get_driver(ct.server_type,
                                                  ct.cluster_distro,
                                                  ct.coe)
        try:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_PENDING)
            cluster_driver.delete_cluster(context, cluster)
            cluster.status = fields.ClusterStatus.DELETE_IN_PROGRESS
            cluster.status_reason = None
        except exc.HTTPNotFound:
            LOG.info('The cluster %s was not found during cluster'
                     ' deletion.', cluster.id)
            try:
                trust_manager.delete_trustee_and_trust(osc, context, cluster)
                cert_manager.delete_certificates_from_cluster(cluster,
                                                              context=context)
                # delete all cluster's nodegroups
                for ng in cluster.nodegroups:
                    ng.destroy()
                cluster.destroy()
            except exception.ClusterNotFound:
                LOG.info('The cluster %s has been deleted by others.',
                         uuid)
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_SUCCESS)
            return None
        except exc.HTTPConflict:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_FAILURE)
            raise exception.OperationInProgress(cluster_name=cluster.name)
        except Exception as unexp:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_FAILURE)
            cluster.status = fields.ClusterStatus.DELETE_FAILED
            cluster.status_reason = six.text_type(unexp)
            cluster.save()
            raise

        cluster.save()
        return None
Esempio n. 21
0
    def cluster_delete(self, context, uuid):
        LOG.debug('cluster_heat cluster_delete')
        osc = clients.OpenStackClients(context)
        cluster = objects.Cluster.get_by_uuid(context, uuid)
        ct = conductor_utils.retrieve_cluster_template(context, cluster)
        cluster_driver = driver.Driver.get_driver(ct.server_type,
                                                  ct.cluster_distro, ct.coe)

        try:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_PENDING)
            cluster_driver.delete_stack(context, osc, cluster)
        except exc.HTTPNotFound:
            LOG.info(
                _LI('The stack %s was not found during cluster'
                    ' deletion.'), cluster.stack_id)
            try:
                trust_manager.delete_trustee_and_trust(osc, context, cluster)
                cert_manager.delete_certificates_from_cluster(cluster,
                                                              context=context)
                cluster.destroy()
            except exception.ClusterNotFound:
                LOG.info(_LI('The cluster %s has been deleted by others.'),
                         uuid)
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_SUCCESS)
            return None
        except exc.HTTPConflict:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_FAILURE)
            raise exception.OperationInProgress(cluster_name=cluster.name)
        except Exception:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_FAILURE)
            raise

        cluster.status = fields.ClusterStatus.DELETE_IN_PROGRESS
        cluster.save()

        self._poll_and_check(osc, cluster, cluster_driver)

        return None
Esempio n. 22
0
    def cluster_create(self, context, cluster, create_timeout):
        LOG.debug('cluster_heat cluster_create')

        osc = clients.OpenStackClients(context)

        try:
            # Create trustee/trust and set them to cluster
            trust_manager.create_trustee_and_trust(osc, cluster)
            # Generate certificate and set the cert reference to cluster
            cert_manager.generate_certificates_to_cluster(cluster,
                                                          context=context)
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_CREATE, taxonomy.OUTCOME_PENDING)
            # Get driver
            ct = conductor_utils.retrieve_cluster_template(context, cluster)
            cluster_driver = driver.Driver.get_driver(ct.server_type,
                                                      ct.cluster_distro,
                                                      ct.coe)
            # Create cluster
            created_stack = cluster_driver.create_stack(
                context, osc, cluster, create_timeout)
        except Exception as e:
            cluster.status = fields.ClusterStatus.CREATE_FAILED
            cluster.status_reason = six.text_type(e)
            cluster.create()
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_CREATE, taxonomy.OUTCOME_FAILURE)

            if isinstance(e, exc.HTTPBadRequest):
                e = exception.InvalidParameterValue(message=six.text_type(e))

                raise e
            raise

        cluster.stack_id = created_stack['stack']['id']
        cluster.status = fields.ClusterStatus.CREATE_IN_PROGRESS
        cluster.create()

        self._poll_and_check(osc, cluster, cluster_driver)

        return cluster
Esempio n. 23
0
    def cluster_create(self, context, cluster, create_timeout):
        LOG.debug('cluster_heat cluster_create')

        osc = clients.OpenStackClients(context)

        try:
            # Create trustee/trust and set them to cluster
            trust_manager.create_trustee_and_trust(osc, cluster)
            # Generate certificate and set the cert reference to cluster
            cert_manager.generate_certificates_to_cluster(cluster,
                                                          context=context)
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_CREATE, taxonomy.OUTCOME_PENDING)
            # Get driver
            ct = conductor_utils.retrieve_cluster_template(context, cluster)
            cluster_driver = driver.Driver.get_driver(ct.server_type,
                                                      ct.cluster_distro,
                                                      ct.coe)
            # Create cluster
            created_stack = cluster_driver.create_stack(context, osc, cluster,
                                                        create_timeout)
        except Exception as e:
            cluster.status = fields.ClusterStatus.CREATE_FAILED
            cluster.status_reason = six.text_type(e)
            cluster.create()
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_CREATE, taxonomy.OUTCOME_FAILURE)

            if isinstance(e, exc.HTTPBadRequest):
                e = exception.InvalidParameterValue(message=six.text_type(e))

                raise e
            raise

        cluster.stack_id = created_stack['stack']['id']
        cluster.status = fields.ClusterStatus.CREATE_IN_PROGRESS
        cluster.create()

        self._poll_and_check(osc, cluster, cluster_driver)

        return cluster
Esempio n. 24
0
    def cluster_update(self, context, cluster, rollback=False):
        LOG.debug('cluster_heat cluster_update')

        osc = clients.OpenStackClients(context)
        stack = osc.heat().stacks.get(cluster.stack_id)
        allow_update_status = (
            fields.ClusterStatus.CREATE_COMPLETE,
            fields.ClusterStatus.UPDATE_COMPLETE,
            fields.ClusterStatus.RESUME_COMPLETE,
            fields.ClusterStatus.RESTORE_COMPLETE,
            fields.ClusterStatus.ROLLBACK_COMPLETE,
            fields.ClusterStatus.SNAPSHOT_COMPLETE,
            fields.ClusterStatus.CHECK_COMPLETE,
            fields.ClusterStatus.ADOPT_COMPLETE
        )
        if stack.stack_status not in allow_update_status:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE)
            operation = _('Updating a cluster when stack status is '
                          '"%s"') % stack.stack_status
            raise exception.NotSupported(operation=operation)

        delta = cluster.obj_what_changed()
        if not delta:
            return cluster

        manager = scale_manager.get_scale_manager(context, osc, cluster)

        conductor_utils.notify_about_cluster_operation(
            context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_PENDING)

        # Get driver
        ct = conductor_utils.retrieve_cluster_template(context, cluster)
        cluster_driver = driver.Driver.get_driver(ct.server_type,
                                                  ct.cluster_distro,
                                                  ct.coe)
        # Create cluster
        cluster_driver.update_stack(context, osc, cluster, manager, rollback)
        self._poll_and_check(osc, cluster, cluster_driver)

        return cluster
Esempio n. 25
0
    def cluster_update(self, context, cluster, rollback=False):
        LOG.debug('cluster_heat cluster_update')

        osc = clients.OpenStackClients(context)
        stack = osc.heat().stacks.get(cluster.stack_id)
        allow_update_status = (fields.ClusterStatus.CREATE_COMPLETE,
                               fields.ClusterStatus.UPDATE_COMPLETE,
                               fields.ClusterStatus.RESUME_COMPLETE,
                               fields.ClusterStatus.RESTORE_COMPLETE,
                               fields.ClusterStatus.ROLLBACK_COMPLETE,
                               fields.ClusterStatus.SNAPSHOT_COMPLETE,
                               fields.ClusterStatus.CHECK_COMPLETE,
                               fields.ClusterStatus.ADOPT_COMPLETE)
        if stack.stack_status not in allow_update_status:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE)
            operation = _('Updating a cluster when stack status is '
                          '"%s"') % stack.stack_status
            raise exception.NotSupported(operation=operation)

        delta = cluster.obj_what_changed()
        if not delta:
            return cluster

        manager = scale_manager.get_scale_manager(context, osc, cluster)

        conductor_utils.notify_about_cluster_operation(
            context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_PENDING)

        # Get driver
        ct = conductor_utils.retrieve_cluster_template(context, cluster)
        cluster_driver = driver.Driver.get_driver(ct.server_type,
                                                  ct.cluster_distro, ct.coe)
        # Create cluster
        cluster_driver.update_stack(context, osc, cluster, manager, rollback)
        self._poll_and_check(osc, cluster, cluster_driver)

        return cluster
Esempio n. 26
0
    def cluster_resize(self, context, cluster,
                       node_count, nodes_to_remove, nodegroup):
        LOG.debug('cluster_conductor cluster_resize')

        osc = clients.OpenStackClients(context)
        # NOTE(flwang): One of important user cases of /resize API is
        # supporting the auto scaling action triggered by Kubernetes Cluster
        # Autoscaler, so there are 2 cases may happen:
        # 1. API could be triggered very offen
        # 2. Scale up or down may fail and we would like to offer the ability
        #    that recover the cluster to allow it being resized when last
        #    update failed.
        allow_update_status = (
            fields.ClusterStatus.CREATE_COMPLETE,
            fields.ClusterStatus.UPDATE_COMPLETE,
            fields.ClusterStatus.RESUME_COMPLETE,
            fields.ClusterStatus.RESTORE_COMPLETE,
            fields.ClusterStatus.ROLLBACK_COMPLETE,
            fields.ClusterStatus.SNAPSHOT_COMPLETE,
            fields.ClusterStatus.CHECK_COMPLETE,
            fields.ClusterStatus.ADOPT_COMPLETE,
            fields.ClusterStatus.UPDATE_FAILED,
            fields.ClusterStatus.UPDATE_IN_PROGRESS,
        )
        if cluster.status not in allow_update_status:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE)
            operation = _('Resizing a cluster when status is '
                          '"%s"') % cluster.status
            raise exception.NotSupported(operation=operation)

        resize_manager = scale_manager.get_scale_manager(context, osc, cluster)

        # Get driver
        ct = conductor_utils.retrieve_cluster_template(context, cluster)
        cluster_driver = driver.Driver.get_driver(ct.server_type,
                                                  ct.cluster_distro,
                                                  ct.coe)
        # Backup the old node count so that we can restore it
        # in case of an exception.
        old_node_count = nodegroup.node_count

        # Resize cluster
        try:
            nodegroup.node_count = node_count
            nodegroup.save()
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_PENDING)
            cluster_driver.resize_cluster(context, cluster, resize_manager,
                                          node_count, nodes_to_remove,
                                          nodegroup)
            cluster.status = fields.ClusterStatus.UPDATE_IN_PROGRESS
            cluster.status_reason = None
        except Exception as e:
            cluster.status = fields.ClusterStatus.UPDATE_FAILED
            cluster.status_reason = six.text_type(e)
            cluster.save()
            nodegroup.node_count = old_node_count
            nodegroup.save()
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE)
            if isinstance(e, exc.HTTPBadRequest):
                e = exception.InvalidParameterValue(message=six.text_type(e))
                raise e
            raise

        cluster.save()
        return cluster
Esempio n. 27
0
    def cluster_update(self, context, cluster, node_count, rollback=False):
        LOG.debug('cluster_heat cluster_update')

        osc = clients.OpenStackClients(context)
        allow_update_status = (
            fields.ClusterStatus.CREATE_COMPLETE,
            fields.ClusterStatus.UPDATE_COMPLETE,
            fields.ClusterStatus.RESUME_COMPLETE,
            fields.ClusterStatus.RESTORE_COMPLETE,
            fields.ClusterStatus.ROLLBACK_COMPLETE,
            fields.ClusterStatus.SNAPSHOT_COMPLETE,
            fields.ClusterStatus.CHECK_COMPLETE,
            fields.ClusterStatus.ADOPT_COMPLETE
        )
        if cluster.status not in allow_update_status:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE)
            operation = _('Updating a cluster when status is '
                          '"%s"') % cluster.status
            raise exception.NotSupported(operation=operation)

        # Updates will be only reflected to the default worker
        # nodegroup.
        worker_ng = cluster.default_ng_worker
        if worker_ng.node_count == node_count:
            return
        # Backup the old node count so that we can restore it
        # in case of an exception.
        old_node_count = worker_ng.node_count

        manager = scale_manager.get_scale_manager(context, osc, cluster)

        # Get driver
        ct = conductor_utils.retrieve_cluster_template(context, cluster)
        cluster_driver = driver.Driver.get_driver(ct.server_type,
                                                  ct.cluster_distro,
                                                  ct.coe)
        # Update cluster
        try:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_PENDING)
            worker_ng.node_count = node_count
            worker_ng.save()
            cluster_driver.update_cluster(context, cluster, manager, rollback)
            cluster.status = fields.ClusterStatus.UPDATE_IN_PROGRESS
            cluster.status_reason = None
        except Exception as e:
            cluster.status = fields.ClusterStatus.UPDATE_FAILED
            cluster.status_reason = six.text_type(e)
            cluster.save()
            # Restore the node_count
            worker_ng.node_count = old_node_count
            worker_ng.save()
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE)
            if isinstance(e, exc.HTTPBadRequest):
                e = exception.InvalidParameterValue(message=six.text_type(e))
                raise e
            raise

        cluster.save()
        return cluster
Esempio n. 28
0
    def create_cluster(self, context, cluster, cluster_create_timeout):
        LOG.info("Starting to create cluster %s", cluster.uuid)

        cluster_template = conductor_utils.retrieve_cluster_template(
            context,
            cluster
        )

        cluser_service_ip_range = cluster.labels.get(
            'service_cluster_ip_range', '10.97.0.0/16'
        )
        if cluster_template.network_driver == 'flannel':
            cluser_pod_ip_range = cluster.labels.get(
                'flannel_network_cidr', '10.100.0.0/16'
            )
        if cluster_template.network_driver == 'calico':
            cluser_pod_ip_range = cluster.labels.get(
                'calico_ipv4pool', '192.168.0.0/16'
            )

        port_info = self._create_vip_port(context, cluster, cluster_template)

        # This address should be internal IP that other services could
        # communicate with.
        self.apiserver_address = port_info["private_ip"]
        external_apiserver_address = port_info.get("public_ip",
                                                   port_info["private_ip"])

        # The master address is always the private VIP address.
        cluster.api_address = 'https://%s:6443' % external_apiserver_address
        master_ng = cluster.default_ng_master
        setattr(master_ng, "node_addresses", [self.apiserver_address])
        master_ng.save()

        self.public_network_id = (
            cluster_template.external_network_id or "public")
        if not uuidutils.is_uuid_like(self.public_network_id):
            self.public_network_id = neutron.get_network_id(
                context,
                self.public_network_id
            )

        ca_cert = cert_manager.get_cluster_ca_certificate(
            cluster,
            context=context
        )
        ca_cert_encoded = base64.b64encode(ca_cert.get_certificate())
        ca_key_encoded = base64.b64encode(ca_cert.get_decrypted_private_key())

        cloud_provider_enabled = strutils.bool_from_string(
            cluster.labels.get("cloud_provider_enabled", "true")
        )

        ca_cert_encoded_str = ca_cert_encoded.decode('utf-8')
        ca_cert_encoded_str = ca_cert_encoded_str.replace("'","")
        ca_key_encoded_str = ca_key_encoded.decode('utf-8')
        ca_key_encoded_str = ca_key_encoded_str.replace("'","")

        params = {
            "namespace": cluster.uuid,
            "vip_port_ip": self.apiserver_address,
            "vip_external_ip": external_apiserver_address,
            "vip_port_id": port_info["port_id"],
            "service_ip_range": cluser_service_ip_range,
            "pod_ip_range": cluser_pod_ip_range,
            "ca_cert": ca_cert_encoded_str,
            "ca_key": ca_key_encoded_str,
            "subnet_id": cluster_template.fixed_subnet,
            "public_network_id": self.public_network_id,
            "cloud_provider_enabled": cloud_provider_enabled,
            "kube_version": cluster.labels.get("kube_tag", "v1.14.3"),
            "cloud_provider_tag": cluster.labels.get("cloud_provider_tag",
                                                     "v1.15.0")
        }

        # Keystone related info.
        osc = clients.OpenStackClients(context)
        params['trustee_user_id'] = cluster.trustee_user_id
        params['trustee_password'] = cluster.trustee_password
        if CONF.trust.cluster_user_trust:
            params['trust_id'] = cluster.trust_id
        else:
            params['trust_id'] = ""
        kwargs = {
            'service_type': 'identity',
            'interface': CONF.trust.trustee_keystone_interface,
            'version': 3
        }
        if CONF.trust.trustee_keystone_region_name:
            kwargs['region_name'] = CONF.trust.trustee_keystone_region_name
        params['auth_url'] = osc.url_for(**kwargs).rstrip('/')

        _apply_manifest = functools.partial(self._apply_manifest, params)

        LOG.info("Creating namespace for cluster %s", cluster.uuid)
        _apply_manifest('namespace.yaml.j2')

        # Create Secret for the new cluster CA and the kube services, the CA
        # could be referenced by various cluster components.
        LOG.info("Creating Secrets for cluster %s", cluster.uuid)
        _apply_manifest('secrets.yaml.j2')
        # TODO: Wait for all the certificates are ready

        # etcd Service and StatefulSet
        LOG.info("Creating etcd service for cluster %s", cluster.uuid)
        _apply_manifest('etcd.yaml.j2')

        # apiserver Service and Deployment
        LOG.info("Creating kube-apiserver for cluster %s", cluster.uuid)
        _apply_manifest('kube-apiserver.yaml.j2')

        # Deploy kube-controller-manager
        LOG.info("Creating kube-controller-manager for cluster %s",
                 cluster.uuid)
        _apply_manifest('kube-controllermgr.yaml.j2')

        # Deploy kube-scheduler
        LOG.info("Creating kube-scheduler for cluster %s", cluster.uuid)
        _apply_manifest('kube-scheduler.yaml.j2')

        kubeconfig_path = self._get_kubeconfig(
            context, cluster,
            ca_cert_encoded=ca_cert_encoded
        )
        LOG.info(
            "Kubeconfig created for cluster %s, path: %s",
            cluster.uuid, kubeconfig_path
        )

        cluster_kubectl = kubectl.KubeCtl(
            bin="/usr/bin/kubectl",
            global_flags="--kubeconfig %s" % kubeconfig_path
        )

        LOG.info(
            "Waiting for all the components up and running for "
            "cluster %s", cluster.uuid
        )
        self._wait_for_apiserver(cluster.uuid, cluster_kubectl)

        if cloud_provider_enabled:
            # Deploy openstack-cloud-controller-manager
            LOG.info("Creating openstack-cloud-controller-manager for "
                     "cluster %s", cluster.uuid)
            # Create RBAC for openstack-cloud-controller-manager in the
            # cluster.
            _apply_manifest(
                "openstack-cloud-controller-manager-in-cluster.yaml.j2",
                cluster_kubectl
            )
            _apply_manifest('openstack-cloud-controller-manager.yaml.j2')

        # Create bootstrap token and the bootstrap RBAC in the new cluster
        LOG.info(
            "Creating bootstrap token and RBAC in the cluster %s",
            cluster.uuid
        )
        expiration = timeutils.utcnow() + datetime.timedelta(days=1)
        # For bootstrap token, refer to
        # https://kubernetes.io/docs/reference/access-authn-authz/bootstrap-tokens/
        token_id = self._generate_random_string(6)
        token_secret = self._generate_random_string(16)
        bootstrap_params = {
            "token_id": token_id,
            "token_secret": token_secret,
            "expiration": expiration.strftime('%Y-%m-%dT%H:%M:%SZ'),
        }
        bootstrap_template = self.jinja_env.get_template('bootstrap.yaml.j2')
        bootstrap_body = bootstrap_template.render(bootstrap_params)
        cluster_kubectl.apply(definition=bootstrap_body)

        self.bootstrap_token = "%s.%s" % (token_id, token_secret)

        # Grant privilege to 'kubernetes' user so that apiserver can access
        # to kubelet for operations like logs, exec, etc.
        # The user name here must be the same with apiserver CN in
        # secrets.yaml.j2
        cluster_kubectl.execute(
            "create clusterrolebinding kube-apiserver --clusterrole "
            "cluster-admin --user kubernetes"
        )

        # Starts to create VMs and bootstrap kubelet
        LOG.info("Creating worker nodes for cluster %s", cluster.uuid)
        super(Driver, self).create_cluster(
            context, cluster, cluster_create_timeout
        )
Esempio n. 29
0
    def update_cluster_status(self, context, cluster):
        """Updates the cluster status.

        This method should be finished within the periodic interval(10s).

        :param context: Admin context.
        :param cluster: Cluster object.
        """
        if cluster.status == fields.ClusterStatus.CREATE_IN_PROGRESS:
            if cluster.stack_id is None:
                return

            stack_ctx = mag_ctx.make_cluster_context(cluster)
            os_clients = clients.OpenStackClients(stack_ctx)
            stack = os_clients.heat().stacks.get(
                cluster.stack_id,
                resolve_outputs=False
            )

            if stack.stack_status == fields.ClusterStatus.CREATE_COMPLETE:
                stack_ctx = mag_ctx.make_cluster_context(cluster)
                kubeconfig_path = self._get_kubeconfig(stack_ctx, cluster)
                cluster_kubectl = kubectl.KubeCtl(
                    bin="/usr/bin/kubectl",
                    global_flags="--kubeconfig %s" % kubeconfig_path
                )

                ns = self.kubectl.get("namespace %s" % cluster.uuid)
                labels = ns['metadata'].get('labels', {})

                if not labels.get('magnum.k8s.io/status'):
                    self._install_addons(cluster, cluster_kubectl, context)
                    return

                if self._workers_ready(cluster, cluster_kubectl):
                    LOG.info(
                        'Cluster %s is created successfully', cluster.uuid
                    )

                    # Update the worker addresses in the cluster from the Heat
                    # stack output.
                    stack = os_clients.heat().stacks.get(
                        cluster.stack_id,
                        resolve_outputs=True
                    )
                    template_def = self.get_template_definition()
                    c_template = conductor_utils.retrieve_cluster_template(
                        context,
                        cluster
                    )
                    template_def.update_outputs(stack, c_template, cluster)

                    cluster.status = fields.ClusterStatus.CREATE_COMPLETE
                    cluster.save()
            elif stack.stack_status in (
                fields.ClusterStatus.CREATE_FAILED,
                fields.ClusterStatus.DELETE_FAILED,
                fields.ClusterStatus.UPDATE_FAILED,
                fields.ClusterStatus.ROLLBACK_COMPLETE,
                fields.ClusterStatus.ROLLBACK_FAILED
            ):
                self._sync_cluster_status(cluster, stack)
                LOG.error('Failed to create cluster %s', cluster.uuid)

        elif cluster.status == fields.ClusterStatus.DELETE_IN_PROGRESS:
            # Check if the namespace is deleted.
            ns_template = self.jinja_env.get_template('namespace.yaml.j2')
            ns_body = ns_template.render({"namespace": cluster.uuid})
            namespaces = self.kubectl.get('namespace')
            names = [n['metadata']['name'] for n in namespaces]

            if cluster.uuid not in names:
                LOG.debug(
                    "Namespace has been deleted for cluster %s",
                    cluster.uuid
                )
                stack_ctx = mag_ctx.make_cluster_context(cluster)
                os_client = clients.OpenStackClients(stack_ctx)

                try:
                    trust_manager.delete_trustee_and_trust(
                        os_client,
                        context,
                        cluster
                    )
                    cert_manager.delete_certificates_from_cluster(
                        cluster,
                        context=context
                    )
                    cert_manager.delete_client_files(cluster, context=context)
                except exception.ClusterNotFound:
                    LOG.info(
                        'The cluster %s has been deleted by others.',
                        cluster.uuid
                    )

                LOG.info('Cluster %s has been deleted.', cluster.uuid)

                cluster.status = fields.ClusterStatus.DELETE_COMPLETE
                cluster.save()
Esempio n. 30
0
    def cluster_resize(self, context, cluster, node_count, nodes_to_remove,
                       nodegroup):
        LOG.debug('cluster_conductor cluster_resize')

        osc = clients.OpenStackClients(context)
        # NOTE(flwang): One of important user cases of /resize API is
        # supporting the auto scaling action triggered by Kubernetes Cluster
        # Autoscaler, so there are 2 cases may happen:
        # 1. API could be triggered very offen
        # 2. Scale up or down may fail and we would like to offer the ability
        #    that recover the cluster to allow it being resized when last
        #    update failed.
        allow_update_status = (
            fields.ClusterStatus.CREATE_COMPLETE,
            fields.ClusterStatus.UPDATE_COMPLETE,
            fields.ClusterStatus.RESUME_COMPLETE,
            fields.ClusterStatus.RESTORE_COMPLETE,
            fields.ClusterStatus.ROLLBACK_COMPLETE,
            fields.ClusterStatus.SNAPSHOT_COMPLETE,
            fields.ClusterStatus.CHECK_COMPLETE,
            fields.ClusterStatus.ADOPT_COMPLETE,
            fields.ClusterStatus.UPDATE_FAILED,
            fields.ClusterStatus.UPDATE_IN_PROGRESS,
        )
        if cluster.status not in allow_update_status:
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE,
                cluster)
            operation = _('Resizing a cluster when status is '
                          '"%s"') % cluster.status
            raise exception.NotSupported(operation=operation)

        resize_manager = scale_manager.get_scale_manager(context, osc, cluster)

        # Get driver
        ct = conductor_utils.retrieve_cluster_template(context, cluster)
        cluster_driver = driver.Driver.get_driver(ct.server_type,
                                                  ct.cluster_distro, ct.coe)
        # Backup the old node count so that we can restore it
        # in case of an exception.
        old_node_count = nodegroup.node_count

        # Resize cluster
        try:
            nodegroup.node_count = node_count
            nodegroup.status = fields.ClusterStatus.UPDATE_IN_PROGRESS
            nodegroup.save()
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_PENDING,
                cluster)
            cluster_driver.resize_cluster(context, cluster, resize_manager,
                                          node_count, nodes_to_remove,
                                          nodegroup)
            cluster.status = fields.ClusterStatus.UPDATE_IN_PROGRESS
            cluster.status_reason = None
        except Exception as e:
            cluster.status = fields.ClusterStatus.UPDATE_FAILED
            cluster.status_reason = six.text_type(e)
            cluster.save()
            nodegroup.node_count = old_node_count
            nodegroup.status = fields.ClusterStatus.UPDATE_FAILED
            nodegroup.status_reason = six.text_type(e)
            nodegroup.save()
            conductor_utils.notify_about_cluster_operation(
                context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE,
                cluster)
            if isinstance(e, exc.HTTPBadRequest):
                e = exception.InvalidParameterValue(message=six.text_type(e))
                raise e
            raise

        cluster.save()
        return cluster