def _provision_scaled_cluster(cluster_id, node_group_id_map): ctx, cluster, plugin = _prepare_provisioning(cluster_id) # Decommissioning surplus nodes with the plugin cluster = g.change_cluster_status(cluster, "Decommissioning") instances_to_delete = [] for node_group in cluster.node_groups: new_count = node_group_id_map[node_group.id] if new_count < node_group.count: instances_to_delete += node_group.instances[new_count: node_group.count] if instances_to_delete: plugin.decommission_nodes(cluster, instances_to_delete) # Scaling infrastructure cluster = g.change_cluster_status(cluster, "Scaling") instance_ids = INFRA.scale_cluster(cluster, node_group_id_map) # Setting up new nodes with the plugin if instance_ids: cluster = g.change_cluster_status(cluster, "Configuring") instances = g.get_instances(cluster, instance_ids) plugin.scale_cluster(cluster, instances) g.change_cluster_status(cluster, "Active")
def launch_instances(self, cluster, target_count): # create all instances cluster = g.change_cluster_status(cluster, self.STAGES[0]) tmpl = heat.ClusterTemplate(cluster) self._configure_template(tmpl, cluster, target_count) stack = tmpl.instantiate(update_existing=self.UPDATE_STACK, disable_rollback=self.DISABLE_ROLLBACK) heat.wait_stack_completion(stack.heat_stack) self.inst_ids = self._populate_cluster(cluster, stack) # wait for all instances are up and networks ready cluster = g.change_cluster_status(cluster, self.STAGES[1]) instances = g.get_instances(cluster, self.inst_ids) self._await_networks(cluster, instances) # prepare all instances cluster = g.change_cluster_status(cluster, self.STAGES[2]) instances = g.get_instances(cluster, self.inst_ids) volumes.mount_to_instances(instances) self._configure_instances(cluster)
def _launch_instances(self, cluster, target_count, stages, update_stack=False, disable_rollback=True): # create all instances cluster = g.change_cluster_status(cluster, stages[0]) inst_ids = self._create_instances(cluster, target_count, update_stack, disable_rollback) # wait for all instances are up and networks ready cluster = g.change_cluster_status(cluster, stages[1]) instances = g.get_instances(cluster, inst_ids) self._await_networks(cluster, instances) # prepare all instances cluster = g.change_cluster_status(cluster, stages[2]) instances = g.get_instances(cluster, inst_ids) volumes.mount_to_instances(instances) self._configure_instances(cluster) return inst_ids
def _provision_scaled_cluster(cluster_id, node_group_id_map): ctx, cluster, plugin = _prepare_provisioning(cluster_id) # Decommissioning surplus nodes with the plugin cluster = g.change_cluster_status(cluster, "Decommissioning") instances_to_delete = [] for node_group in cluster.node_groups: new_count = node_group_id_map[node_group.id] if new_count < node_group.count: instances_to_delete += node_group.instances[new_count:node_group. count] if instances_to_delete: plugin.decommission_nodes(cluster, instances_to_delete) # Scaling infrastructure cluster = g.change_cluster_status(cluster, "Scaling") instance_ids = INFRA.scale_cluster(cluster, node_group_id_map) # Setting up new nodes with the plugin if instance_ids: cluster = g.change_cluster_status(cluster, "Configuring") instances = g.get_instances(cluster, instance_ids) plugin.scale_cluster(cluster, instances) g.change_cluster_status(cluster, "Active")
def _provision_cluster(cluster_id): ctx, cluster, plugin = _prepare_provisioning(cluster_id) cluster = _update_sahara_info(ctx, cluster) if CONF.use_identity_api_v3 and cluster.is_transient: trusts.create_trust_for_cluster(cluster) # updating cluster infra cluster = g.change_cluster_status(cluster, "InfraUpdating") plugin.update_infra(cluster) # creating instances and configuring them cluster = conductor.cluster_get(ctx, cluster_id) INFRA.create_cluster(cluster) # configure cluster cluster = g.change_cluster_status(cluster, "Configuring") plugin.configure_cluster(cluster) # starting prepared and configured cluster cluster = g.change_cluster_status(cluster, "Starting") plugin.start_cluster(cluster) # cluster is now up and ready cluster = g.change_cluster_status(cluster, "Active") # schedule execution pending job for cluster for je in conductor.job_execution_get_all(ctx, cluster_id=cluster.id): job_manager.run_job(je.id)
def launch_instances(self, ctx, cluster, target_count): # create all instances cluster = g.change_cluster_status(cluster, self.STAGES[0]) tmpl = heat.ClusterTemplate(cluster) self._configure_template(ctx, tmpl, cluster, target_count) stack = tmpl.instantiate(update_existing=self.UPDATE_STACK) heat.wait_stack_completion(stack.heat_stack) self.inst_ids = self._populate_cluster(ctx, cluster, stack) # wait for all instances are up and networks ready cluster = g.change_cluster_status(cluster, self.STAGES[1]) instances = g.get_instances(cluster, self.inst_ids) self._await_networks(cluster, instances) if not g.check_cluster_exists(cluster): LOG.info(g.format_cluster_deleted_message(cluster)) return # prepare all instances cluster = g.change_cluster_status(cluster, self.STAGES[2]) instances = g.get_instances(cluster, self.inst_ids) volumes.mount_to_instances(instances) self._configure_instances(cluster)
def test_change_cluster_status(self): cluster = self._make_sample() cluster = general.change_cluster_status(cluster, "Deleting", "desc") self.assertEqual("Deleting", cluster.status) self.assertEqual("desc", cluster.status_description) general.change_cluster_status(cluster, "Spawning") self.assertEqual("Deleting", cluster.status)
def create_cluster(self, cluster): ctx = context.ctx() self._update_rollback_strategy(cluster, shutdown=True) # create all instances cluster = g.change_cluster_status(cluster, "Spawning") self._create_instances(cluster) # wait for all instances are up and networks ready cluster = g.change_cluster_status(cluster, "Waiting") instances = g.get_instances(cluster) self._await_active(cluster, instances) self._assign_floating_ips(instances) self._await_networks(cluster, instances) cluster = conductor.cluster_get(ctx, cluster) # attach volumes volumes.attach_to_instances(g.get_instances(cluster)) # prepare all instances cluster = g.change_cluster_status(cluster, "Preparing") self._configure_instances(cluster) self._update_rollback_strategy(cluster)
def _scale_cluster_instances(self, cluster, node_group_id_map): ctx = context.ctx() aa_groups = self._generate_anti_affinity_groups(cluster) instances_to_delete = [] node_groups_to_enlarge = [] for node_group in cluster.node_groups: new_count = node_group_id_map[node_group.id] if new_count < node_group.count: instances_to_delete += node_group.instances[new_count: node_group.count] elif new_count > node_group.count: node_groups_to_enlarge.append(node_group) if instances_to_delete: cluster = g.change_cluster_status(cluster, "Deleting Instances") for instance in instances_to_delete: self._shutdown_instance(instance) cluster = conductor.cluster_get(ctx, cluster) instances_to_add = [] if node_groups_to_enlarge: cluster = g.change_cluster_status(cluster, "Adding Instances") for node_group in node_groups_to_enlarge: count = node_group_id_map[node_group.id] for idx in six.moves.xrange(node_group.count + 1, count + 1): instance_id = self._run_instance(cluster, node_group, idx, aa_groups) instances_to_add.append(instance_id) return instances_to_add
def _scale_cluster_instances(self, cluster, node_group_id_map): ctx = context.ctx() aa_group = None old_aa_groups = None if cluster.anti_affinity: aa_group = self._find_aa_server_group(cluster) if not aa_group: old_aa_groups = self._generate_anti_affinity_groups(cluster) instances_to_delete = [] node_groups_to_enlarge = set() node_groups_to_delete = set() for node_group in cluster.node_groups: new_count = node_group_id_map[node_group.id] if new_count < node_group.count: instances_to_delete += node_group.instances[ new_count:node_group.count] if new_count == 0: node_groups_to_delete.add(node_group.id) elif new_count > node_group.count: node_groups_to_enlarge.add(node_group.id) if node_group.count == 0 and node_group.auto_security_group: self._create_auto_security_group(node_group) if instances_to_delete: cluster = g.change_cluster_status(cluster, "Deleting Instances") for instance in instances_to_delete: with context.set_current_instance_id(instance.instance_id): self._shutdown_instance(instance) self._await_deleted(cluster, instances_to_delete) for ng in cluster.node_groups: if ng.id in node_groups_to_delete: self._delete_auto_security_group(ng) cluster = conductor.cluster_get(ctx, cluster) instances_to_add = [] if node_groups_to_enlarge: cpo.add_provisioning_step( cluster.id, _("Add instances"), self._count_instances_to_scale(node_groups_to_enlarge, node_group_id_map, cluster)) cluster = g.change_cluster_status(cluster, "Adding Instances") for ng in cluster.node_groups: if ng.id in node_groups_to_enlarge: count = node_group_id_map[ng.id] for idx in six.moves.xrange(ng.count + 1, count + 1): instance_id = self._start_instance( cluster, ng, idx, aa_group, old_aa_groups) instances_to_add.append(instance_id) return instances_to_add
def _scale_cluster_instances(self, cluster, node_group_id_map): ctx = context.ctx() aa_group = None old_aa_groups = None if cluster.anti_affinity: aa_group = self._find_aa_server_group(cluster) if not aa_group: old_aa_groups = self._generate_anti_affinity_groups(cluster) instances_to_delete = [] node_groups_to_enlarge = set() node_groups_to_delete = set() for node_group in cluster.node_groups: new_count = node_group_id_map[node_group.id] if new_count < node_group.count: instances_to_delete += node_group.instances[new_count: node_group.count] if new_count == 0: node_groups_to_delete.add(node_group.id) elif new_count > node_group.count: node_groups_to_enlarge.add(node_group.id) if node_group.count == 0 and node_group.auto_security_group: self._create_auto_security_group(node_group) if instances_to_delete: cluster = g.change_cluster_status(cluster, "Deleting Instances") for instance in instances_to_delete: with context.set_current_instance_id(instance.instance_id): self._shutdown_instance(instance) self._await_deleted(cluster, instances_to_delete) for ng in cluster.node_groups: if ng.id in node_groups_to_delete: self._delete_auto_security_group(ng) cluster = conductor.cluster_get(ctx, cluster) instances_to_add = [] if node_groups_to_enlarge: cpo.add_provisioning_step( cluster.id, _("Add instances"), self._count_instances_to_scale( node_groups_to_enlarge, node_group_id_map, cluster)) cluster = g.change_cluster_status(cluster, "Adding Instances") for ng in cluster.node_groups: if ng.id in node_groups_to_enlarge: count = node_group_id_map[ng.id] for idx in six.moves.xrange(ng.count + 1, count + 1): instance_id = self._start_instance( cluster, ng, idx, aa_group, old_aa_groups) instances_to_add.append(instance_id) return instances_to_add
def _provision_cluster(cluster_id): ctx, cluster, plugin = _prepare_provisioning(cluster_id) if CONF.use_identity_api_v3 and cluster.is_transient: trusts.create_trust_for_cluster(cluster) # updating cluster infra cluster = g.change_cluster_status(cluster, "InfraUpdating") plugin.update_infra(cluster) # creating instances and configuring them cluster = conductor.cluster_get(ctx, cluster_id) INFRA.create_cluster(cluster) if not g.check_cluster_exists(cluster): LOG.info(g.format_cluster_deleted_message(cluster)) return # configure cluster cluster = g.change_cluster_status(cluster, "Configuring") try: plugin.configure_cluster(cluster) except Exception as ex: if not g.check_cluster_exists(cluster): LOG.info(g.format_cluster_deleted_message(cluster)) return LOG.exception( _LE("Can't configure cluster '%(name)s' (reason: %(reason)s)"), {'name': cluster.name, 'reason': ex}) g.change_cluster_status(cluster, "Error") return if not g.check_cluster_exists(cluster): LOG.info(g.format_cluster_deleted_message(cluster)) return # starting prepared and configured cluster cluster = g.change_cluster_status(cluster, "Starting") try: plugin.start_cluster(cluster) except Exception as ex: if not g.check_cluster_exists(cluster): LOG.info(g.format_cluster_deleted_message(cluster)) return LOG.exception( _LE("Can't start services for cluster '%(name)s' (reason: " "%(reason)s)"), {'name': cluster.name, 'reason': ex}) g.change_cluster_status(cluster, "Error") return if not g.check_cluster_exists(cluster): LOG.info(g.format_cluster_deleted_message(cluster)) return # cluster is now up and ready cluster = g.change_cluster_status(cluster, "Active") # schedule execution pending job for cluster for je in conductor.job_execution_get_all(ctx, cluster_id=cluster.id): job_manager.run_job(je.id)
def create_cluster(self, cluster): version = cluster.hadoop_version handler = self.version_factory.get_version_handler(version) cluster_spec = handler.get_cluster_spec( cluster, self._map_to_user_inputs(version, cluster.cluster_configs)) hosts = self._get_servers(cluster) ambari_info = self.get_ambari_info(cluster_spec) self.cluster_ambari_mapping[cluster.name] = ambari_info rpm = self._get_rpm_uri(cluster_spec) servers = [] for host in hosts: host_role = utils.get_host_role(host) servers.append( h.HadoopServer(host, cluster_spec.node_groups[host_role], ambari_rpm=rpm)) self._provision_cluster(cluster.name, cluster_spec, ambari_info, servers, cluster.hadoop_version) # add the topology data file and script if rack awareness is # enabled self._configure_topology_for_cluster(cluster, servers) LOG.info(_LI("Install of Hadoop stack successful.")) # add service urls self._set_cluster_info(cluster, cluster_spec) # check if HDFS HA is enabled; set it up if so if cluster_spec.is_hdfs_ha_enabled(cluster): cluster = g.change_cluster_status(cluster, "Configuring HA") self.configure_hdfs_ha(cluster)
def terminate_cluster(id): context.set_current_cluster_id(id) cluster = g.change_cluster_status(id, "Deleting") OPS.terminate_cluster(id) sender.notify(context.ctx(), cluster.id, cluster.name, cluster.status, "delete")
def create_cluster(self, cluster): ctx = context.ctx() launcher = _CreateLauncher() try: target_count = self._get_ng_counts(cluster) self._nullify_ng_counts(cluster) cluster = conductor.cluster_get(ctx, cluster) launcher.launch_instances(ctx, cluster, target_count) cluster = conductor.cluster_get(ctx, cluster) self._add_volumes(ctx, cluster) except Exception as ex: with excutils.save_and_reraise_exception(): if not g.check_cluster_exists(cluster): LOG.info(g.format_cluster_deleted_message(cluster)) return self._log_operation_exception( _LW("Can't start cluster '%(cluster)s' " "(reason: %(reason)s)"), cluster, ex) cluster = g.change_cluster_status( cluster, "Error", status_description=six.text_type(ex)) self._rollback_cluster_creation(cluster)
def scale_cluster(self, cluster, node_group_id_map): ctx = context.ctx() cluster = g.change_cluster_status(cluster, "Scaling") instance_ids = self._scale_cluster_instances(cluster, node_group_id_map) self._update_rollback_strategy(cluster, instance_ids=instance_ids) cluster = conductor.cluster_get(ctx, cluster) g.clean_cluster_from_empty_ng(cluster) cluster = conductor.cluster_get(ctx, cluster) instances = g.get_instances(cluster, instance_ids) self._await_active(cluster, instances) self._assign_floating_ips(instances) self._await_networks(cluster, instances) cluster = conductor.cluster_get(ctx, cluster) volumes.attach_to_instances( g.get_instances(cluster, instance_ids)) # we should be here with valid cluster: if instances creation # was not successful all extra-instances will be removed above if instance_ids: self._configure_instances(cluster) self._update_rollback_strategy(cluster) return instance_ids
def scale_cluster(self, cluster, node_group_id_map): ctx = context.ctx() cluster = g.change_cluster_status(cluster, "Scaling") instance_ids = self._scale_cluster_instances(cluster, node_group_id_map) self._update_rollback_strategy(cluster, instance_ids=instance_ids) cluster = conductor.cluster_get(ctx, cluster) g.clean_cluster_from_empty_ng(cluster) cluster = conductor.cluster_get(ctx, cluster) instances = g.get_instances(cluster, instance_ids) self._await_active(cluster, instances) self._assign_floating_ips(instances) self._await_networks(cluster, instances) cluster = conductor.cluster_get(ctx, cluster) volumes.attach_to_instances(g.get_instances(cluster, instance_ids)) # we should be here with valid cluster: if instances creation # was not successful all extra-instances will be removed above if instance_ids: self._configure_instances(cluster) self._update_rollback_strategy(cluster) return instance_ids
def create_cluster(self, cluster): version = cluster.hadoop_version handler = self.version_factory.get_version_handler(version) cluster_spec = handler.get_cluster_spec( cluster, self._map_to_user_inputs( version, cluster.cluster_configs)) hosts = self._get_servers(cluster) ambari_info = self.get_ambari_info(cluster_spec) self.cluster_ambari_mapping[cluster.name] = ambari_info rpm = self._get_rpm_uri(cluster_spec) servers = [] for host in hosts: host_role = utils.get_host_role(host) servers.append( h.HadoopServer(host, cluster_spec.node_groups[host_role], ambari_rpm=rpm)) self._provision_cluster( cluster.name, cluster_spec, ambari_info, servers, cluster.hadoop_version) # add the topology data file and script if rack awareness is # enabled self._configure_topology_for_cluster(cluster, servers) LOG.info(_LI("Install of Hadoop stack successful.")) # add service urls self._set_cluster_info(cluster, cluster_spec) # check if HDFS HA is enabled; set it up if so if cluster_spec.is_hdfs_ha_enabled(cluster): cluster = g.change_cluster_status(cluster, "Configuring HA") self.configure_hdfs_ha(cluster)
def create_cluster(values): ctx = context.ctx() cluster = conductor.cluster_create(ctx, values) plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name) # validating cluster try: cluster = g.change_cluster_status(cluster, "Validating") plugin.validate(cluster) except Exception as e: with excutils.save_and_reraise_exception(): g.change_cluster_status(cluster, "Error", status_description=six.text_type(e)) OPS.provision_cluster(cluster.id) return cluster
def create_cluster(values): ctx = context.ctx() cluster = conductor.cluster_create(ctx, values) sender.notify(ctx, cluster.id, cluster.name, "New", "create") plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name) # validating cluster try: cluster = g.change_cluster_status(cluster, "Validating") plugin.validate(cluster) except Exception as e: with excutils.save_and_reraise_exception(): g.change_cluster_status(cluster, "Error", status_description=six.text_type(e)) OPS.provision_cluster(cluster.id) return cluster
def scale_cluster(self, cluster, target_count): ctx = context.ctx() rollback_count = self._get_ng_counts(cluster) launcher = _ScaleLauncher() try: launcher.launch_instances(ctx, cluster, target_count) except Exception as ex: with excutils.save_and_reraise_exception(): if not g.check_cluster_exists(cluster): LOG.info(g.format_cluster_deleted_message(cluster)) return self._log_operation_exception( _LW("Can't scale cluster '%(cluster)s' " "(reason: %(reason)s)"), cluster, ex) cluster = conductor.cluster_get(ctx, cluster) try: self._rollback_cluster_scaling( ctx, cluster, rollback_count, target_count) except Exception: if not g.check_cluster_exists(cluster): LOG.info(g.format_cluster_deleted_message(cluster)) return # if something fails during the rollback, we stop # doing anything further cluster = g.change_cluster_status(cluster, "Error") LOG.error(_LE("Unable to complete rollback, aborting")) raise cluster = g.change_cluster_status(cluster, "Active") LOG.warn( _LW("Rollback successful. " "Throwing off an initial exception.")) finally: cluster = conductor.cluster_get(ctx, cluster) g.clean_cluster_from_empty_ng(cluster) return launcher.inst_ids
def launch_instances(self, cluster, target_count): # create all instances cluster = g.change_cluster_status(cluster, self.STAGES[0]) self.create_instances(cluster, target_count) # wait for all instances are up and networks ready cluster = g.change_cluster_status(cluster, self.STAGES[1]) instances = g.get_instances(cluster, self.inst_ids) self._await_networks(cluster, instances) # prepare all instances cluster = g.change_cluster_status(cluster, self.STAGES[2]) instances = g.get_instances(cluster, self.inst_ids) volumes.mount_to_instances(instances) self._configure_instances(cluster)
def create_cluster(values): ctx = context.ctx() cluster = conductor.cluster_create(ctx, values) context.set_current_cluster_id(cluster.id) sender.notify(ctx, cluster.id, cluster.name, "New", "create") plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name) _add_ports_for_auto_sg(ctx, cluster, plugin) # validating cluster try: cluster = g.change_cluster_status(cluster, "Validating") quotas.check_cluster(cluster) plugin.validate(cluster) except Exception as e: with excutils.save_and_reraise_exception(): g.change_cluster_status(cluster, "Error", six.text_type(e)) OPS.provision_cluster(cluster.id) return cluster
def create_cluster(values): ctx = context.ctx() cluster = conductor.cluster_create(ctx, values) sender.notify(ctx, cluster.id, cluster.name, "New", "create") plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name) _add_ports_for_auto_sg(ctx, cluster, plugin) # validating cluster try: cluster = g.change_cluster_status(cluster, "Validating") quotas.check_cluster(cluster) plugin.validate(cluster) except Exception as e: with excutils.save_and_reraise_exception(): g.change_cluster_status(cluster, "Error", six.text_type(e)) OPS.provision_cluster(cluster.id) return cluster
def scale_cluster(id, data): context.set_current_cluster_id(id) ctx = context.ctx() cluster = conductor.cluster_get(ctx, id) plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name) existing_node_groups = data.get('resize_node_groups', []) additional_node_groups = data.get('add_node_groups', []) # the next map is the main object we will work with # to_be_enlarged : {node_group_id: desired_amount_of_instances} to_be_enlarged = {} for ng in existing_node_groups: ng_id = g.find(cluster.node_groups, name=ng['name'])['id'] to_be_enlarged.update({ng_id: ng['count']}) additional = construct_ngs_for_scaling(cluster, additional_node_groups) cluster = conductor.cluster_get(ctx, cluster) _add_ports_for_auto_sg(ctx, cluster, plugin) try: cluster = g.change_cluster_status(cluster, "Validating") quotas.check_scaling(cluster, to_be_enlarged, additional) plugin.recommend_configs(cluster) plugin.validate_scaling(cluster, to_be_enlarged, additional) except Exception as e: with excutils.save_and_reraise_exception(): g.clean_cluster_from_empty_ng(cluster) g.change_cluster_status(cluster, "Active", six.text_type(e)) # If we are here validation is successful. # So let's update to_be_enlarged map: to_be_enlarged.update(additional) for node_group in cluster.node_groups: if node_group.id not in to_be_enlarged: to_be_enlarged[node_group.id] = node_group.count OPS.provision_scaled_cluster(id, to_be_enlarged) return cluster
def launch_instances(self, cluster, target_count): # create all instances cluster = g.change_cluster_status(cluster, self.STAGES[0]) cpo.add_provisioning_step(cluster.id, _("Create Heat stack"), 1) with context.InstanceInfoManager([cluster.id, None, None, None]): self.create_instances(cluster, target_count) # wait for all instances are up and networks ready cluster = g.change_cluster_status(cluster, self.STAGES[1]) instances = g.get_instances(cluster, self.inst_ids) self._await_networks(cluster, instances) # prepare all instances cluster = g.change_cluster_status(cluster, self.STAGES[2]) instances = g.get_instances(cluster, self.inst_ids) volumes.mount_to_instances(instances) self._configure_instances(cluster)
def _cluster_create(values, plugin): ctx = context.ctx() cluster = conductor.cluster_create(ctx, values) context.set_current_cluster_id(cluster.id) sender.notify(ctx, cluster.id, cluster.name, "New", "create") _add_ports_for_auto_sg(ctx, cluster, plugin) # validating cluster try: plugin.recommend_configs(cluster) cluster = g.change_cluster_status(cluster, "Validating") quotas.check_cluster(cluster) plugin.validate(cluster) except Exception as e: with excutils.save_and_reraise_exception(): g.change_cluster_status(cluster, "Error", six.text_type(e)) OPS.provision_cluster(cluster.id) return cluster
def _provision_scaled_cluster(cluster_id, node_group_id_map): ctx, cluster, plugin = _prepare_provisioning(cluster_id) try: # Decommissioning surplus nodes with the plugin cluster = g.change_cluster_status(cluster, "Decommissioning") instances_to_delete = [] for node_group in cluster.node_groups: new_count = node_group_id_map[node_group.id] if new_count < node_group.count: instances_to_delete += node_group.instances[new_count: node_group.count] if instances_to_delete: context.set_step_type(_("Plugin: decommission cluster")) plugin.decommission_nodes(cluster, instances_to_delete) # Scaling infrastructure cluster = g.change_cluster_status(cluster, "Scaling") context.set_step_type(_("Engine: scale cluster")) instance_ids = INFRA.scale_cluster(cluster, node_group_id_map) # Setting up new nodes with the plugin if instance_ids: ntp_service.configure_ntp(cluster_id) cluster = g.change_cluster_status(cluster, "Configuring") instances = g.get_instances(cluster, instance_ids) context.set_step_type(_("Plugin: scale cluster")) plugin.scale_cluster(cluster, instances) g.change_cluster_status(cluster, "Active") finally: if CONF.use_identity_api_v3 and not cluster.is_transient: trusts.delete_trust_from_cluster(cluster)
def _provision_cluster(cluster_id): ctx, cluster, plugin = _prepare_provisioning(cluster_id) try: cluster = _update_sahara_info(ctx, cluster) # updating cluster infra cluster = g.change_cluster_status(cluster, "InfraUpdating") plugin.update_infra(cluster) # creating instances and configuring them cluster = conductor.cluster_get(ctx, cluster_id) context.set_step_type(_("Engine: create cluster")) INFRA.create_cluster(cluster) # configure cluster cluster = g.change_cluster_status(cluster, "Configuring") shares.mount_shares(cluster) context.set_step_type(_("Plugin: configure cluster")) plugin.configure_cluster(cluster) # starting prepared and configured cluster ntp_service.configure_ntp(cluster_id) cluster = g.change_cluster_status(cluster, "Starting") context.set_step_type(_("Plugin: start cluster")) plugin.start_cluster(cluster) # cluster is now up and ready cluster = g.change_cluster_status(cluster, "Active") # schedule execution pending job for cluster for je in conductor.job_execution_get_all(ctx, cluster_id=cluster.id): job_manager.run_job(je.id) finally: if CONF.use_identity_api_v3 and not cluster.is_transient: trusts.delete_trust_from_cluster(cluster)
def _provision_scaled_cluster(cluster_id, node_group_id_map): ctx, cluster, plugin = _prepare_provisioning(cluster_id) try: # Decommissioning surplus nodes with the plugin cluster = g.change_cluster_status(cluster, "Decommissioning") instances_to_delete = [] for node_group in cluster.node_groups: new_count = node_group_id_map[node_group.id] if new_count < node_group.count: instances_to_delete += node_group.instances[ new_count:node_group.count] if instances_to_delete: context.set_step_type(_("Plugin: decommission cluster")) plugin.decommission_nodes(cluster, instances_to_delete) # Scaling infrastructure cluster = g.change_cluster_status(cluster, "Scaling") context.set_step_type(_("Engine: scale cluster")) instance_ids = INFRA.scale_cluster(cluster, node_group_id_map) # Setting up new nodes with the plugin if instance_ids: ntp_service.configure_ntp(cluster_id) cluster = g.change_cluster_status(cluster, "Configuring") instances = g.get_instances(cluster, instance_ids) context.set_step_type(_("Plugin: scale cluster")) plugin.scale_cluster(cluster, instances) g.change_cluster_status(cluster, "Active") finally: if CONF.use_identity_api_v3 and not cluster.is_transient: trusts.delete_trust_from_cluster(cluster)
def _provision_cluster(cluster_id): ctx, cluster, plugin = _prepare_provisioning(cluster_id) try: cluster = _update_sahara_info(ctx, cluster) # updating cluster infra cluster = g.change_cluster_status(cluster, "InfraUpdating") plugin.update_infra(cluster) # creating instances and configuring them cluster = conductor.cluster_get(ctx, cluster_id) context.set_step_type(_("Engine: create cluster")) INFRA.create_cluster(cluster) # configure cluster cluster = g.change_cluster_status(cluster, "Configuring") context.set_step_type(_("Plugin: configure cluster")) plugin.configure_cluster(cluster) # starting prepared and configured cluster ntp_service.configure_ntp(cluster_id) cluster = g.change_cluster_status(cluster, "Starting") context.set_step_type(_("Plugin: start cluster")) plugin.start_cluster(cluster) # cluster is now up and ready cluster = g.change_cluster_status(cluster, "Active") # schedule execution pending job for cluster for je in conductor.job_execution_get_all(ctx, cluster_id=cluster.id): job_manager.run_job(je.id) finally: if CONF.use_identity_api_v3 and not cluster.is_transient: trusts.delete_trust_from_cluster(cluster)
def _launch_instances(self, cluster, target_count, stages, update_stack=False, disable_rollback=True): # create all instances cluster = g.change_cluster_status(cluster, stages[0]) inst_ids = self._create_instances( cluster, target_count, update_stack, disable_rollback) # wait for all instances are up and networks ready cluster = g.change_cluster_status(cluster, stages[1]) instances = g.get_instances(cluster, inst_ids) self._await_networks(cluster, instances) # prepare all instances cluster = g.change_cluster_status(cluster, stages[2]) instances = g.get_instances(cluster, inst_ids) volumes.mount_to_instances(instances) self._configure_instances(cluster) return inst_ids
def _provision_scaled_cluster(cluster_id, node_group_id_map): ctx, cluster, plugin = _prepare_provisioning(cluster_id) # Decommissioning surplus nodes with the plugin cluster = g.change_cluster_status(cluster, "Decommissioning") instances_to_delete = [] for node_group in cluster.node_groups: new_count = node_group_id_map[node_group.id] if new_count < node_group.count: instances_to_delete += node_group.instances[new_count: node_group.count] if instances_to_delete: plugin.decommission_nodes(cluster, instances_to_delete) # Scaling infrastructure cluster = g.change_cluster_status(cluster, "Scaling") instances = INFRA.scale_cluster(cluster, node_group_id_map) # Setting up new nodes with the plugin if instances: cluster = g.change_cluster_status(cluster, "Configuring") try: instances = g.get_instances(cluster, instances) plugin.scale_cluster(cluster, instances) except Exception as ex: if not g.check_cluster_exists(cluster): LOG.info(g.format_cluster_deleted_message(cluster)) return LOG.exception( _LE("Can't scale cluster '%(name)s' (reason: %(reason)s)"), {'name': cluster.name, 'reason': ex}) g.change_cluster_status(cluster, "Error") return if not g.check_cluster_exists(cluster): LOG.info(g.format_cluster_deleted_message(cluster)) return g.change_cluster_status(cluster, "Active")
def wrapper(cluster_id, *args, **kwds): ctx = context.ctx() try: # Clearing status description before executing g.change_cluster_status_description(cluster_id, "") f(cluster_id, *args, **kwds) except Exception as ex: # something happened during cluster operation cluster = conductor.cluster_get(ctx, cluster_id) # check if cluster still exists (it might have been removed) if cluster is None or cluster.status == 'Deleting': LOG.debug( "Cluster id={id} was deleted or marked for " "deletion. Canceling current operation.".format( id=cluster_id)) return msg = six.text_type(ex) LOG.error( _LE("Error during operating on cluster {name} (reason: " "{reason})").format(name=cluster.name, reason=msg)) try: # trying to rollback desc = description.format(reason=msg) if _rollback_cluster(cluster, ex): g.change_cluster_status(cluster, "Active", desc) else: g.change_cluster_status(cluster, "Error", desc) except Exception as rex: cluster = conductor.cluster_get(ctx, cluster_id) # check if cluster still exists (it might have been # removed during rollback) if cluster is None or cluster.status == 'Deleting': LOG.debug( "Cluster id={id} was deleted or marked for " "deletion. Canceling current operation." .format(id=cluster_id)) return LOG.error( _LE("Error during rollback of cluster {name} (reason:" " {reason})").format(name=cluster.name, reason=six.text_type(rex))) desc = "{0}, {1}".format(msg, six.text_type(rex)) g.change_cluster_status( cluster, "Error", description.format(reason=desc))
def wrapper(cluster_id, *args, **kwds): try: f(cluster_id, *args, **kwds) except Exception as ex: # something happened during cluster operation ctx = context.ctx() cluster = conductor.cluster_get(ctx, cluster_id) # check if cluster still exists (it might have been removed) if cluster is None or cluster.status == 'Deleting': LOG.info( _LI("Cluster %s was deleted or marked for " "deletion. Canceling current operation."), cluster_id) return LOG.exception( _LE("Error during operating cluster '%(name)s' (reason: " "%(reason)s)"), { 'name': cluster.name, 'reason': ex }) try: # trying to rollback if _rollback_cluster(cluster, ex): g.change_cluster_status(cluster, "Active") else: g.change_cluster_status(cluster, "Error") except Exception as rex: cluster = conductor.cluster_get(ctx, cluster_id) # check if cluster still exists (it might have been # removed during rollback) if cluster is None: LOG.info( _LI("Cluster with %s was deleted. Canceling " "current operation."), cluster_id) return LOG.exception( _LE("Error during rollback of cluster '%(name)s' (reason: " "%(reason)s)"), { 'name': cluster.name, 'reason': rex }) g.change_cluster_status(cluster, "Error")
def wrapper(cluster_id, *args, **kwds): try: f(cluster_id, *args, **kwds) except Exception as ex: # something happened during cluster operation ctx = context.ctx() cluster = conductor.cluster_get(ctx, cluster_id) # check if cluster still exists (it might have been removed) if cluster is None: LOG.info(_LI("Cluster with %s was deleted. Canceling current " "operation."), cluster_id) return LOG.exception( _LE("Error during operating cluster '%(name)s' (reason: " "%(reason)s)"), {'name': cluster.name, 'reason': ex}) try: # trying to rollback if _rollback_cluster(cluster, ex): g.change_cluster_status(cluster, "Active") else: g.change_cluster_status(cluster, "Error") except Exception as rex: cluster = conductor.cluster_get(ctx, cluster_id) # check if cluster still exists (it might have been # removed during rollback) if cluster is None: LOG.info(_LI("Cluster with %s was deleted. Canceling " "current operation."), cluster_id) return LOG.exception( _LE("Error during rollback of cluster '%(name)s' (reason: " "%(reason)s)"), {'name': cluster.name, 'reason': rex}) g.change_cluster_status(cluster, "Error")
def terminate_cluster(id): g.change_cluster_status(id, "Deleting") OPS.terminate_cluster(id)