Example #1
0
    def create_cluster_thread(self):
        network_name = self.body['network']
        try:
            clusters = load_from_metadata(self.client_tenant,
                                          name=self.cluster_name)
            if len(clusters) != 0:
                raise Exception('Cluster already exists.')
            org_resource = self.client_tenant.get_org()
            org = Org(self.client_tenant, resource=org_resource)
            vdc_resource = org.get_vdc(self.body['vdc'])
            vdc = VDC(self.client_tenant, resource=vdc_resource)
            template = self.get_template()
            self.update_task(TaskStatus.RUNNING,
                             self.op,
                             message='Creating cluster vApp %s(%s)' %
                             (self.cluster_name, self.cluster_id))
            vapp_resource = vdc.create_vapp(self.cluster_name,
                                            description='cluster %s' %
                                            self.cluster_name,
                                            network=network_name,
                                            fence_mode='bridged')
            t = self.client_tenant.get_task_monitor().wait_for_status(
                task=vapp_resource.Tasks.Task[0],
                timeout=60,
                poll_frequency=2,
                fail_on_status=None,
                expected_target_statuses=[
                    TaskStatus.SUCCESS, TaskStatus.ABORTED, TaskStatus.ERROR,
                    TaskStatus.CANCELED
                ],
                callback=None)
            assert t.get('status').lower() == TaskStatus.SUCCESS.value
            tags = {}
            tags['cse.cluster.id'] = self.cluster_id
            tags['cse.version'] = pkg_resources.require(
                'container-service-extension')[0].version
            tags['cse.template'] = template['name']
            vapp = VApp(self.client_tenant, href=vapp_resource.get('href'))
            for k, v in tags.items():
                t = vapp.set_metadata('GENERAL', 'READWRITE', k, v)
                self.client_tenant.get_task_monitor().\
                    wait_for_status(
                        task=t,
                        timeout=600,
                        poll_frequency=5,
                        fail_on_status=None,
                        expected_target_statuses=[TaskStatus.SUCCESS],
                        callback=None)
            self.update_task(TaskStatus.RUNNING,
                             self.op,
                             message='Creating master node for %s(%s)' %
                             (self.cluster_name, self.cluster_id))
            vapp.reload()
            add_nodes(1,
                      template,
                      TYPE_MASTER,
                      self.config,
                      self.client_tenant,
                      org,
                      vdc,
                      vapp,
                      self.body,
                      wait=True)

            self.update_task(TaskStatus.RUNNING,
                             self.op,
                             message='Initializing cluster %s(%s)' %
                             (self.cluster_name, self.cluster_id))

            vapp.reload()
            init_cluster(self.config, vapp, template)

            master_ip = get_master_ip(self.config, vapp, template)
            t = vapp.set_metadata('GENERAL', 'READWRITE', 'cse.master.ip',
                                  master_ip)
            self.client_tenant.get_task_monitor().\
                wait_for_status(
                    task=t,
                    timeout=600,
                    poll_frequency=5,
                    fail_on_status=None,
                    expected_target_statuses=[TaskStatus.SUCCESS],
                    callback=None)

            if self.body['node_count'] > 0:

                self.update_task(TaskStatus.RUNNING,
                                 self.op,
                                 message='Creating %s node(s) for %s(%s)' %
                                 (self.body['node_count'], self.cluster_name,
                                  self.cluster_id))
                add_nodes(self.body['node_count'],
                          template,
                          TYPE_NODE,
                          self.config,
                          self.client_tenant,
                          org,
                          vdc,
                          vapp,
                          self.body,
                          wait=True)
                self.update_task(TaskStatus.RUNNING,
                                 self.op,
                                 message='Adding %s node(s) to %s(%s)' %
                                 (self.body['node_count'], self.cluster_name,
                                  self.cluster_id))
                vapp.reload()
                join_cluster(self.config, vapp, template)

            self.update_task(TaskStatus.SUCCESS,
                             self.op,
                             message='Created cluster %s(%s)' %
                             (self.cluster_name, self.cluster_id))

        except Exception as e:
            LOGGER.error(traceback.format_exc())
            self.update_task(TaskStatus.ERROR, self.op, error_message=str(e))
    def _create_cluster_async(self, *args,
                              org_name, ovdc_name, cluster_name, cluster_id,
                              template_name, template_revision, num_workers,
                              network_name, num_cpu, mb_memory,
                              storage_profile_name, ssh_key_filepath,
                              enable_nfs, rollback):
        org = vcd_utils.get_org(self.tenant_client, org_name=org_name)
        vdc = vcd_utils.get_vdc(
            self.tenant_client, vdc_name=ovdc_name, org=org)

        LOGGER.debug(f"About to create cluster {cluster_name} on {ovdc_name}"
                     f" with {num_workers} worker nodes, "
                     f"storage profile={storage_profile_name}")
        try:
            self._update_task(
                TaskStatus.RUNNING,
                message=f"Creating cluster vApp {cluster_name}({cluster_id})")
            try:
                vapp_resource = \
                    vdc.create_vapp(cluster_name,
                                    description=f"cluster {cluster_name}",
                                    network=network_name,
                                    fence_mode='bridged')
            except Exception as e:
                msg = f"Error while creating vApp: {e}"
                LOGGER.debug(str(e))
                raise ClusterOperationError(msg)
            self.tenant_client.get_task_monitor().wait_for_status(vapp_resource.Tasks.Task[0]) # noqa: E501

            template = get_template(template_name, template_revision)

            tags = {
                ClusterMetadataKey.CLUSTER_ID: cluster_id,
                ClusterMetadataKey.CSE_VERSION: pkg_resources.require('container-service-extension')[0].version, # noqa: E501
                ClusterMetadataKey.TEMPLATE_NAME: template[LocalTemplateKey.NAME], # noqa: E501
                ClusterMetadataKey.TEMPLATE_REVISION: template[LocalTemplateKey.REVISION] # noqa: E501
            }
            vapp = VApp(self.tenant_client, href=vapp_resource.get('href'))
            task = vapp.set_multiple_metadata(tags)
            self.tenant_client.get_task_monitor().wait_for_status(task)

            self._update_task(
                TaskStatus.RUNNING,
                message=f"Creating master node for "
                        f"{cluster_name} ({cluster_id})")
            vapp.reload()
            server_config = utils.get_server_runtime_config()
            catalog_name = server_config['broker']['catalog']
            try:
                add_nodes(client=self.tenant_client,
                          num_nodes=1,
                          node_type=NodeType.MASTER,
                          org=org,
                          vdc=vdc,
                          vapp=vapp,
                          catalog_name=catalog_name,
                          template=template,
                          network_name=network_name,
                          num_cpu=num_cpu,
                          memory_in_mb=mb_memory,
                          storage_profile=storage_profile_name,
                          ssh_key_filepath=ssh_key_filepath)
            except Exception as e:
                raise MasterNodeCreationError("Error adding master node:",
                                              str(e))

            self._update_task(
                TaskStatus.RUNNING,
                message=f"Initializing cluster {cluster_name} ({cluster_id})")
            vapp.reload()
            init_cluster(vapp, template[LocalTemplateKey.NAME],
                         template[LocalTemplateKey.REVISION])
            master_ip = get_master_ip(vapp)
            task = vapp.set_metadata('GENERAL', 'READWRITE', 'cse.master.ip',
                                     master_ip)
            self.tenant_client.get_task_monitor().wait_for_status(task)

            self._update_task(
                TaskStatus.RUNNING,
                message=f"Creating {num_workers} node(s) for "
                        f"{cluster_name}({cluster_id})")
            try:
                add_nodes(client=self.tenant_client,
                          num_nodes=num_workers,
                          node_type=NodeType.WORKER,
                          org=org,
                          vdc=vdc,
                          vapp=vapp,
                          catalog_name=catalog_name,
                          template=template,
                          network_name=network_name,
                          num_cpu=num_cpu,
                          memory_in_mb=mb_memory,
                          storage_profile=storage_profile_name,
                          ssh_key_filepath=ssh_key_filepath)
            except Exception as e:
                raise WorkerNodeCreationError("Error creating worker node:",
                                              str(e))

            self._update_task(
                TaskStatus.RUNNING,
                message=f"Adding {num_workers} node(s) to "
                        f"{cluster_name}({cluster_id})")
            vapp.reload()
            join_cluster(vapp, template[LocalTemplateKey.NAME],
                         template[LocalTemplateKey.REVISION])

            if enable_nfs:
                self._update_task(
                    TaskStatus.RUNNING,
                    message=f"Creating NFS node for "
                            f"{cluster_name} ({cluster_id})")
                try:
                    add_nodes(client=self.tenant_client,
                              num_nodes=1,
                              node_type=NodeType.NFS,
                              org=org,
                              vdc=vdc,
                              vapp=vapp,
                              catalog_name=catalog_name,
                              template=template,
                              network_name=network_name,
                              num_cpu=num_cpu,
                              memory_in_mb=mb_memory,
                              storage_profile=storage_profile_name,
                              ssh_key_filepath=ssh_key_filepath)
                except Exception as e:
                    raise NFSNodeCreationError("Error creating NFS node:",
                                               str(e))

            self._update_task(
                TaskStatus.SUCCESS,
                message=f"Created cluster {cluster_name} ({cluster_id})")
        except (MasterNodeCreationError, WorkerNodeCreationError,
                NFSNodeCreationError, ClusterJoiningError,
                ClusterInitializationError, ClusterOperationError) as e:
            if rollback:
                msg = f"Error creating cluster {cluster_name}. " \
                      f"Deleting cluster (rollback=True)"
                self._update_task(TaskStatus.RUNNING, message=msg)
                LOGGER.info(msg)
                try:
                    cluster = get_cluster(self.tenant_client,
                                          cluster_name,
                                          cluster_id=cluster_id,
                                          org_name=org_name,
                                          ovdc_name=ovdc_name)
                    self._delete_cluster(cluster_name=cluster_name,
                                         cluster_vdc_href=cluster['vdc_href'])
                except Exception:
                    LOGGER.error(f"Failed to delete cluster {cluster_name}",
                                 exc_info=True)
            LOGGER.error(f"Error creating cluster {cluster_name}",
                         exc_info=True)
            error_obj = error_to_json(e)
            stack_trace = ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY]) # noqa: E501
            self._update_task(
                TaskStatus.ERROR,
                error_message=error_obj[ERROR_MESSAGE_KEY][ERROR_DESCRIPTION_KEY], # noqa: E501
                stack_trace=stack_trace)
            # raising an exception here prints a stacktrace to server console
        except Exception as e:
            LOGGER.error(f"Unknown error creating cluster {cluster_name}",
                         exc_info=True)
            error_obj = error_to_json(e)
            stack_trace = ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY]) # noqa: E501
            self._update_task(
                TaskStatus.ERROR,
                error_message=error_obj[ERROR_MESSAGE_KEY][ERROR_DESCRIPTION_KEY], # noqa: E501
                stack_trace=stack_trace)
        finally:
            self.logout_sys_admin_client()
    def create_cluster_thread(self):
        network_name = self.req_spec.get(RequestKey.NETWORK_NAME)
        try:
            clusters = load_from_metadata(self.tenant_client,
                                          name=self.cluster_name)
            if len(clusters) != 0:
                raise ClusterAlreadyExistsError(f"Cluster {self.cluster_name} "
                                                "already exists.")

            org_resource = self.tenant_client.get_org_by_name(
                self.req_spec.get(RequestKey.ORG_NAME))
            org = Org(self.tenant_client, resource=org_resource)
            vdc_resource = org.get_vdc(self.req_spec.get(RequestKey.OVDC_NAME))
            vdc = VDC(self.tenant_client, resource=vdc_resource)
            template = self._get_template()
            self._update_task(
                TaskStatus.RUNNING,
                message=f"Creating cluster vApp {self.cluster_name}"
                        f"({self.cluster_id})")
            try:
                vapp_resource = vdc.create_vapp(
                    self.cluster_name,
                    description=f"cluster {self.cluster_name}",
                    network=network_name,
                    fence_mode='bridged')
            except Exception as e:
                raise ClusterOperationError(
                    "Error while creating vApp:", str(e))

            self.tenant_client.get_task_monitor().wait_for_status(
                vapp_resource.Tasks.Task[0])
            tags = {}
            tags['cse.cluster.id'] = self.cluster_id
            tags['cse.version'] = pkg_resources.require(
                'container-service-extension')[0].version
            tags['cse.template'] = template['name']
            vapp = VApp(self.tenant_client, href=vapp_resource.get('href'))
            for k, v in tags.items():
                task = vapp.set_metadata('GENERAL', 'READWRITE', k, v)
                self.tenant_client.get_task_monitor().wait_for_status(task)
            self._update_task(
                TaskStatus.RUNNING,
                message=f"Creating master node for {self.cluster_name}"
                        f"({self.cluster_id})")
            vapp.reload()

            server_config = get_server_runtime_config()
            try:
                add_nodes(1, template, NodeType.MASTER, server_config,
                          self.tenant_client, org, vdc, vapp, self.req_spec)
            except Exception as e:
                raise MasterNodeCreationError(
                    "Error while adding master node:", str(e))

            self._update_task(
                TaskStatus.RUNNING,
                message=f"Initializing cluster {self.cluster_name}"
                        f"({self.cluster_id})")
            vapp.reload()
            init_cluster(server_config, vapp, template)
            master_ip = get_master_ip(server_config, vapp, template)
            task = vapp.set_metadata('GENERAL', 'READWRITE', 'cse.master.ip',
                                     master_ip)
            self.tenant_client.get_task_monitor().wait_for_status(task)
            if self.req_spec.get(RequestKey.NUM_WORKERS) > 0:
                self._update_task(
                    TaskStatus.RUNNING,
                    message=f"Creating "
                            f"{self.req_spec.get(RequestKey.NUM_WORKERS)} "
                            f"node(s) for "
                            f"{self.cluster_name}({self.cluster_id})")
                try:
                    add_nodes(self.req_spec.get(RequestKey.NUM_WORKERS),
                              template, NodeType.WORKER, server_config,
                              self.tenant_client, org, vdc, vapp,
                              self.req_spec)
                except Exception as e:
                    raise WorkerNodeCreationError(
                        "Error while creating worker node:", str(e))

                self._update_task(
                    TaskStatus.RUNNING,
                    message=f"Adding "
                            f"{self.req_spec.get(RequestKey.NUM_WORKERS)} "
                            f"node(s) to "
                            f"{self.cluster_name}({self.cluster_id})")
                vapp.reload()
                join_cluster(server_config, vapp, template)
            if self.req_spec.get(RequestKey.ENABLE_NFS):
                self._update_task(
                    TaskStatus.RUNNING,
                    message=f"Creating NFS node for {self.cluster_name}"
                            f"({self.cluster_id})")
                try:
                    add_nodes(1, template, NodeType.NFS,
                              server_config, self.tenant_client, org, vdc,
                              vapp, self.req_spec)
                except Exception as e:
                    raise NFSNodeCreationError(
                        "Error while creating NFS node:", str(e))

            self._update_task(
                TaskStatus.SUCCESS,
                message=f"Created cluster {self.cluster_name}"
                        f"({self.cluster_id})")
        except (MasterNodeCreationError, WorkerNodeCreationError,
                NFSNodeCreationError, ClusterJoiningError,
                ClusterInitializationError, ClusterOperationError) as e:
            LOGGER.error(traceback.format_exc())
            error_obj = error_to_json(e)
            stack_trace = \
                ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY])
            self._update_task(
                TaskStatus.ERROR,
                error_message=error_obj[ERROR_MESSAGE_KEY]
                [ERROR_DESCRIPTION_KEY],
                stack_trace=stack_trace)
            raise e
        except Exception as e:
            LOGGER.error(traceback.format_exc())
            error_obj = error_to_json(e)
            stack_trace = \
                ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY])
            self._update_task(
                TaskStatus.ERROR,
                error_message=error_obj[ERROR_MESSAGE_KEY][ERROR_DESCRIPTION_KEY], # noqa: E501
                stack_trace=stack_trace)
        finally:
            self._disconnect_sys_admin()
    def create_cluster_thread(self):
        network_name = self.body['network']
        try:
            clusters = load_from_metadata(self.tenant_client,
                                          name=self.cluster_name)
            if len(clusters) != 0:
                raise ClusterAlreadyExistsError(f"Cluster {self.cluster_name} "
                                                "already exists.")
            org_resource = self.tenant_client.get_org()
            org = Org(self.tenant_client, resource=org_resource)
            vdc_resource = org.get_vdc(self.body['vdc'])
            vdc = VDC(self.tenant_client, resource=vdc_resource)
            template = self.get_template()
            self.update_task(TaskStatus.RUNNING,
                             message='Creating cluster vApp %s(%s)' %
                             (self.cluster_name, self.cluster_id))
            try:
                vapp_resource = vdc.create_vapp(self.cluster_name,
                                                description='cluster %s' %
                                                self.cluster_name,
                                                network=network_name,
                                                fence_mode='bridged')
            except Exception as e:
                raise ClusterOperationError('Error while creating vApp:',
                                            str(e))

            self.tenant_client.get_task_monitor().wait_for_status(
                vapp_resource.Tasks.Task[0])
            tags = {}
            tags['cse.cluster.id'] = self.cluster_id
            tags['cse.version'] = pkg_resources.require(
                'container-service-extension')[0].version
            tags['cse.template'] = template['name']
            vapp = VApp(self.tenant_client, href=vapp_resource.get('href'))
            for k, v in tags.items():
                task = vapp.set_metadata('GENERAL', 'READWRITE', k, v)
                self.tenant_client.get_task_monitor().wait_for_status(task)
            self.update_task(TaskStatus.RUNNING,
                             message='Creating master node for %s(%s)' %
                             (self.cluster_name, self.cluster_id))
            vapp.reload()

            server_config = get_server_runtime_config()
            try:
                add_nodes(1, template, TYPE_MASTER, server_config,
                          self.tenant_client, org, vdc, vapp, self.body)
            except Exception as e:
                raise MasterNodeCreationError(
                    "Error while adding master node:", str(e))

            self.update_task(TaskStatus.RUNNING,
                             message='Initializing cluster %s(%s)' %
                             (self.cluster_name, self.cluster_id))
            vapp.reload()
            init_cluster(server_config, vapp, template)
            master_ip = get_master_ip(server_config, vapp, template)
            task = vapp.set_metadata('GENERAL', 'READWRITE', 'cse.master.ip',
                                     master_ip)
            self.tenant_client.get_task_monitor().wait_for_status(task)
            if self.body['node_count'] > 0:
                self.update_task(TaskStatus.RUNNING,
                                 message='Creating %s node(s) for %s(%s)' %
                                 (self.body['node_count'], self.cluster_name,
                                  self.cluster_id))
                try:
                    add_nodes(self.body['node_count'], template, TYPE_NODE,
                              server_config, self.tenant_client, org, vdc,
                              vapp, self.body)
                except Exception as e:
                    raise WorkerNodeCreationError(
                        "Error while creating worker node:", str(e))

                self.update_task(TaskStatus.RUNNING,
                                 message='Adding %s node(s) to %s(%s)' %
                                 (self.body['node_count'], self.cluster_name,
                                  self.cluster_id))
                vapp.reload()
                join_cluster(server_config, vapp, template)
            if self.body['enable_nfs']:
                self.update_task(TaskStatus.RUNNING,
                                 message='Creating NFS node for %s(%s)' %
                                 (self.cluster_name, self.cluster_id))
                try:
                    add_nodes(1, template, TYPE_NFS, server_config,
                              self.tenant_client, org, vdc, vapp, self.body)
                except Exception as e:
                    raise NFSNodeCreationError(
                        "Error while creating NFS node:", str(e))

            self.update_task(TaskStatus.SUCCESS,
                             message='Created cluster %s(%s)' %
                             (self.cluster_name, self.cluster_id))
        except (MasterNodeCreationError, WorkerNodeCreationError,
                NFSNodeCreationError, ClusterJoiningError,
                ClusterInitializationError, ClusterOperationError) as e:
            LOGGER.error(traceback.format_exc())
            error_obj = error_to_json(e)
            stack_trace = ''.join(error_obj[ERROR_MESSAGE][ERROR_STACKTRACE])
            self.update_task(
                TaskStatus.ERROR,
                error_message=error_obj[ERROR_MESSAGE][ERROR_DESCRIPTION],
                stack_trace=stack_trace)
            raise e
        except Exception as e:
            LOGGER.error(traceback.format_exc())
            error_obj = error_to_json(e)
            stack_trace = ''.join(error_obj[ERROR_MESSAGE][ERROR_STACKTRACE])
            self.update_task(
                TaskStatus.ERROR,
                error_message=error_obj[ERROR_MESSAGE][ERROR_DESCRIPTION],
                stack_trace=stack_trace)
        finally:
            self._disconnect_sys_admin()