def test_get_hadoop_ssh_keys(self): cluster_dict = { 'name': 'cluster1', 'plugin_name': 'mock_plugin', 'hadoop_version': 'mock_version', 'default_image_id': 'initial', 'node_groups': [tu._make_ng_dict("ng1", "f1", ["s1"], 1)]} cluster1 = conductor.cluster_create(context.ctx(), cluster_dict) (private_key1, public_key1) = c_h.get_hadoop_ssh_keys(cluster1) #should store keys for old cluster cluster1 = conductor.cluster_get(context.ctx(), cluster1) (private_key2, public_key2) = c_h.get_hadoop_ssh_keys(cluster1) self.assertEqual(public_key1, public_key2) self.assertEqual(private_key1, private_key2) #should generate new keys for new cluster cluster_dict.update({'name': 'cluster2'}) cluster2 = conductor.cluster_create(context.ctx(), cluster_dict) (private_key3, public_key3) = c_h.get_hadoop_ssh_keys(cluster2) self.assertNotEqual(public_key1, public_key3) self.assertNotEqual(private_key1, private_key3)
def test_get_hadoop_ssh_keys(self): cluster_dict = { 'name': 'cluster1', 'plugin_name': 'mock_plugin', 'hadoop_version': 'mock_version', 'default_image_id': 'initial', 'node_groups': [tu.make_ng_dict("ng1", "f1", ["s1"], 1)] } cluster1 = conductor.cluster_create(context.ctx(), cluster_dict) (private_key1, public_key1) = c_h.get_hadoop_ssh_keys(cluster1) #should store keys for old cluster cluster1 = conductor.cluster_get(context.ctx(), cluster1) (private_key2, public_key2) = c_h.get_hadoop_ssh_keys(cluster1) self.assertEqual(public_key1, public_key2) self.assertEqual(private_key1, private_key2) #should generate new keys for new cluster cluster_dict.update({'name': 'cluster2'}) cluster2 = conductor.cluster_create(context.ctx(), cluster_dict) (private_key3, public_key3) = c_h.get_hadoop_ssh_keys(cluster2) self.assertNotEqual(public_key1, public_key3) self.assertNotEqual(private_key1, private_key3)
def get_raw_binary(job_binary): url = job_binary.url if url.startswith("savanna-db://"): res = db.get_raw_data(context.ctx(), job_binary) if url.startswith(su.SWIFT_INTERNAL_PREFIX): res = i_swift.get_raw_data(context.ctx(), job_binary) return res
def get_raw_binary(job_binary): url = job_binary.url if url.startswith("savanna-db://"): res = db.get_raw_data(context.ctx(), job_binary) # TODO(mattf): remove support for OLD_SWIFT_INTERNAL_PREFIX if url.startswith(su.SWIFT_INTERNAL_PREFIX) or ( url.startswith(su.OLD_SWIFT_INTERNAL_PREFIX)): res = i_swift.get_raw_data(context.ctx(), job_binary) return res
def _provision_cluster(cluster_id): ctx = context.ctx() cluster = conductor.cluster_get(ctx, cluster_id) plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name) # updating cluster infra cluster = conductor.cluster_update(ctx, cluster, {"status": "InfraUpdating"}) LOG.info(g.format_cluster_status(cluster)) plugin.update_infra(cluster) # creating instances and configuring them cluster = conductor.cluster_get(ctx, cluster_id) i.create_cluster(cluster) # configure cluster cluster = conductor.cluster_update(ctx, cluster, {"status": "Configuring"}) LOG.info(g.format_cluster_status(cluster)) plugin.configure_cluster(cluster) # starting prepared and configured cluster cluster = conductor.cluster_update(ctx, cluster, {"status": "Starting"}) LOG.info(g.format_cluster_status(cluster)) plugin.start_cluster(cluster) # cluster is now up and ready cluster = conductor.cluster_update(ctx, cluster, {"status": "Active"}) LOG.info(g.format_cluster_status(cluster)) # schedule execution pending job for cluster for je in conductor.job_execution_get_all(ctx, cluster_id=cluster.id): jm.run_job(ctx, je)
def _set_cluster_info(self, cluster, cluster_spec, hosts, ambari_info): info = {} try: jobtracker_ip = self._determine_host_for_server_component( 'JOBTRACKER', cluster_spec, hosts).management_ip except Exception: pass else: info['MapReduce'] = { 'Web UI': 'http://%s:50030' % jobtracker_ip } try: namenode_ip = self._determine_host_for_server_component( 'NAMENODE', cluster_spec, hosts).management_ip except Exception: pass else: info['HDFS'] = { 'Web UI': 'http://%s:50070' % namenode_ip } info['Ambari Console'] = { 'Web UI': 'http://%s' % ambari_info.get_address() } ctx = context.ctx() conductor.cluster_update(ctx, cluster, {'info': info})
def test_ip_assignment_use_no_floating(self, cfg, novaclient): cfg.CONF.use_floating_ips = False nova = _create_nova_mock(novaclient) node_groups = [ _make_ng_dict("test_group_1", "test_flavor", ["data node", "test tracker"], 2, 'pool'), _make_ng_dict("test_group_2", "test_flavor", ["name node", "test tracker"], 1) ] ctx = context.ctx() cluster = _create_cluster_mock(node_groups, ["data node"]) instances._create_instances(cluster) cluster = conductor.cluster_get(ctx, cluster) instances_list = instances.get_instances(cluster) instances._assign_floating_ips(instances_list) nova.floating_ips.create.assert_has_calls( [mock.call("pool"), mock.call("pool")], any_order=False) self.assertEqual(nova.floating_ips.create.call_count, 2, "Not expected floating IPs number found.")
def validate_scaling(self, cluster, existing, additional): orig_existing_count = {} ctx = context.ctx() try: for ng_id in existing: node_group = self._get_by_id(cluster.node_groups, ng_id) if node_group: orig_existing_count[ng_id] = node_group.count conductor.node_group_update( ctx, node_group, {'count': int(existing[ng_id])}) else: raise RuntimeError( 'Node group not found: {0}'.format(ng_id)) for ng_id in additional: node_group = self._get_by_id(cluster.node_groups, ng_id) if node_group: conductor.node_group_update( ctx, node_group, {'count': int(additional[ng_id])}) else: raise RuntimeError( 'Node group not found: {0}'.format(ng_id)) self.validate(cluster) finally: for ng_id in additional: for ng_id in additional: node_group = self._get_by_id(cluster.node_groups, ng_id) conductor.node_group_update(ctx, node_group, {'count': 0}) for ng_id in orig_existing_count: node_group = self._get_by_id(cluster.node_groups, ng_id) conductor.node_group_update( ctx, node_group, {'count': orig_existing_count[ng_id]})
def test_remove_instance(self): ctx = context.ctx() cluster_db_obj = self.api.cluster_create(ctx, SAMPLE_CLUSTER) _id = cluster_db_obj["id"] ng_id = cluster_db_obj["node_groups"][-1]["id"] count = cluster_db_obj["node_groups"][-1]["count"] node_group = self._add_instance(ctx, ng_id) instance_id = node_group["instances"][-1]["id"] cluster_db_obj = self.api.cluster_get(ctx, _id) for ng in cluster_db_obj["node_groups"]: if ng["id"] != ng_id: continue self.assertEqual(count + 1, ng["count"]) self.api.instance_remove(ctx, instance_id) cluster_db_obj = self.api.cluster_get(ctx, _id) for ng in cluster_db_obj["node_groups"]: if ng["id"] != ng_id: continue self.assertEqual(count, ng["count"]) with self.assertRaises(RuntimeError): self.api.instance_remove(ctx, instance_id)
def clean_cluster_from_empty_ng(cluster): ctx = context.ctx() for ng in cluster.node_groups: if ng.count == 0: conductor.node_group_remove(ctx, ng) return conductor.cluster_get(ctx, cluster)
def init_instances_ips(instance, server): """Extracts internal and management ips. As internal ip will be used the first ip from the nova networks CIDRs. If use_floating_ip flag is set than management ip will be the first non-internal ip. """ ctx = context.ctx() if instance.internal_ip and instance.management_ip: return True management_ip = instance.management_ip internal_ip = instance.internal_ip for network_label in server.networks: nova_network = nova.client().networks.find(label=network_label) network = netaddr.IPNetwork(nova_network.cidr) for ip in server.networks[network_label]: if netaddr.IPAddress(ip) in network: internal_ip = instance.internal_ip or ip else: management_ip = instance.management_ip or ip if not CONF.use_floating_ips: management_ip = internal_ip conductor.instance_update(ctx, instance, { "management_ip": management_ip, "internal_ip": internal_ip }) return internal_ip and management_ip
def _set_cluster_info(self, cluster, cluster_spec): info = {} for service in cluster_spec.services: if service.deployed: service.register_service_urls(cluster_spec, info) conductor.cluster_update(context.ctx(), cluster, {'info': info})
def init_instances_ips(instance): """Extracts internal and management ips. As internal ip will be used the first ip from the nova networks CIDRs. If use_floating_ip flag is set than management ip will be the first non-internal ip. """ server = nova.get_instance_info(instance) management_ip = None internal_ip = None for network_label, addresses in six.iteritems(server.addresses): for address in addresses: if address['OS-EXT-IPS:type'] == 'fixed': internal_ip = internal_ip or address['addr'] else: management_ip = management_ip or address['addr'] if not CONF.use_floating_ips: management_ip = internal_ip conductor.instance_update(context.ctx(), instance, {"management_ip": management_ip, "internal_ip": internal_ip}) return internal_ip and management_ip
def _set_cluster_info(self, cluster, cluster_spec, ambari_info): info = {} try: jobtracker_ip = cluster_spec.determine_host_for_server_component( 'JOBTRACKER').management_ip except Exception: pass else: info['MapReduce'] = { 'Web UI': 'http://%s:50030' % jobtracker_ip } try: namenode_ip = cluster_spec.determine_host_for_server_component( 'NAMENODE').management_ip except Exception: pass else: info['HDFS'] = { 'Web UI': 'http://%s:50070' % namenode_ip } info['Ambari Console'] = { 'Web UI': 'http://%s' % ambari_info.get_address() } ctx = context.ctx() conductor.cluster_update(ctx, cluster, {'info': info})
def _provision_cluster(cluster_id): ctx = context.ctx() cluster = conductor.cluster_get(ctx, cluster_id) plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name) # updating cluster infra cluster = conductor.cluster_update(ctx, cluster, {"status": "InfraUpdating"}) LOG.info(g.format_cluster_status(cluster)) plugin.update_infra(cluster) # creating instances and configuring them cluster = conductor.cluster_get(ctx, cluster_id) i.create_cluster(cluster) # configure cluster cluster = conductor.cluster_update(ctx, cluster, {"status": "Configuring"}) LOG.info(g.format_cluster_status(cluster)) plugin.configure_cluster(cluster) # starting prepared and configured cluster cluster = conductor.cluster_update(ctx, cluster, {"status": "Starting"}) LOG.info(g.format_cluster_status(cluster)) plugin.start_cluster(cluster) # cluster is now up and ready cluster = conductor.cluster_update(ctx, cluster, {"status": "Active"}) LOG.info(g.format_cluster_status(cluster))
def test_crud_operation_create_list_delete_update(self): ctx = context.ctx() job = self.api.job_create(ctx, SAMPLE_JOB) ds_input = self.api.data_source_create(ctx, SAMPLE_DATA_SOURCE) SAMPLE_DATA_OUTPUT = copy.copy(SAMPLE_DATA_SOURCE) SAMPLE_DATA_OUTPUT['name'] = 'output' ds_output = self.api.data_source_create(ctx, SAMPLE_DATA_OUTPUT) SAMPLE_JOB_EXECUTION['job_id'] = job['id'] SAMPLE_JOB_EXECUTION['input_id'] = ds_input['id'] SAMPLE_JOB_EXECUTION['output_id'] = ds_output['id'] self.api.job_execution_create(ctx, SAMPLE_JOB_EXECUTION) lst = self.api.job_execution_get_all(ctx) self.assertEqual(len(lst), 1) job_ex_id = lst[0]['id'] self.assertEqual(lst[0]['progress'], 0.1) self.api.job_execution_update(ctx, job_ex_id, {'progress': '0.2'}) updated_job = self.api.job_execution_get(ctx, job_ex_id) self.assertEqual(updated_job['progress'], 0.2) self.api.job_execution_destroy(ctx, job_ex_id) lst = self.api.job_execution_get_all(ctx) self.assertEqual(len(lst), 0)
def scale_cluster(id, data): ctx = context.ctx() cluster = conductor.cluster_get(ctx, id) plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name) existing_node_groups = data.get("resize_node_groups", []) additional_node_groups = data.get("add_node_groups", []) # the next map is the main object we will work with # to_be_enlarged : {node_group_id: desired_amount_of_instances} to_be_enlarged = {} for ng in existing_node_groups: ng_id = g.find(cluster.node_groups, name=ng["name"])["id"] to_be_enlarged.update({ng_id: ng["count"]}) additional = construct_ngs_for_scaling(cluster, additional_node_groups) try: cluster = conductor.cluster_update(ctx, cluster, {"status": "Validating"}) LOG.info(g.format_cluster_status(cluster)) plugin.validate_scaling(cluster, to_be_enlarged, additional) except Exception: with excutils.save_and_reraise_exception(): i.clean_cluster_from_empty_ng(cluster) cluster = conductor.cluster_update(ctx, cluster, {"status": "Active"}) LOG.info(g.format_cluster_status(cluster)) # If we are here validation is successful. # So let's update to_be_enlarged map: to_be_enlarged.update(additional) context.spawn("cluster-scaling-%s" % id, _provision_nodes, id, to_be_enlarged) return conductor.cluster_get(ctx, id)
def test_one_node_groups_and_one_affinity_group(self, novaclient): node_groups = [_make_ng_dict('test_group', 'test_flavor', ['data node'], 2)] cluster = _create_cluster_mock(node_groups, ["data node"]) nova = _create_nova_mock(novaclient) instances._create_instances(cluster) userdata = _generate_user_data_script(cluster) nova.servers.create.assert_has_calls( [mock.call("test_cluster-test_group-001", "initial", "test_flavor", scheduler_hints=None, userdata=userdata, key_name='user_keypair'), mock.call("test_cluster-test_group-002", "initial", "test_flavor", scheduler_hints={'different_host': ["1"]}, userdata=userdata, key_name='user_keypair')], any_order=False) ctx = context.ctx() cluster_obj = conductor.cluster_get_all(ctx)[0] self.assertEqual(len(cluster_obj.node_groups[0].instances), 2)
def create_cluster(values): ctx = context.ctx() cluster = conductor.cluster_create(ctx, values) plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name) # validating cluster try: cluster = conductor.cluster_update(ctx, cluster, {"status": "Validating"}) LOG.info(g.format_cluster_status(cluster)) plugin.validate(cluster) except Exception as e: with excutils.save_and_reraise_exception(): cluster = conductor.cluster_update(ctx, cluster, {"status": "Error", "status_description": str(e)}) LOG.info(g.format_cluster_status(cluster)) context.spawn("cluster-creating-%s" % cluster.id, _provision_cluster, cluster.id) if CONF.use_identity_api_v3 and cluster.is_transient: trusts.create_trust(cluster) return conductor.cluster_get(ctx, cluster.id)
def test_cluster_terminate(self, terminate_cluster, get_job_status): cfg.CONF.set_override("use_identity_api_v3", True) try: ctx = context.ctx() job = self.api.job_create(ctx, te.SAMPLE_JOB) ds = self.api.data_source_create(ctx, te.SAMPLE_DATA_SOURCE) c = tc.SAMPLE_CLUSTER.copy() c["status"] = "Active" c["id"] = "1" c["name"] = "1" self.api.cluster_create(ctx, c) c["id"] = "2" c["name"] = "2" self.api.cluster_create(ctx, c) self._create_job_execution({"end_time": datetime.datetime.now(), "id": 1, "cluster_id": "1"}, job, ds, ds) self._create_job_execution({"end_time": None, "id": 2, "cluster_id": "2"}, job, ds, ds) self._create_job_execution({"end_time": None, "id": 3, "cluster_id": "2"}, job, ds, ds) p.SavannaPeriodicTasks().terminate_unneeded_clusters(None) self.assertEqual(terminate_cluster.call_count, 1) terminate_cluster.assert_has_calls([mock.call(u'1')]) finally: cfg.CONF.clear_override("use_identity_api_v3")
def init_instances_ips(instance): """Extracts internal and management ips. As internal ip will be used the first ip from the nova networks CIDRs. If use_floating_ip flag is set than management ip will be the first non-internal ip. """ server = nova.get_instance_info(instance) management_ip = None internal_ip = None for network_label, addresses in six.iteritems(server.addresses): for address in addresses: if address['OS-EXT-IPS:type'] == 'fixed': internal_ip = internal_ip or address['addr'] else: management_ip = management_ip or address['addr'] if not CONF.use_floating_ips: management_ip = internal_ip conductor.instance_update(context.ctx(), instance, { "management_ip": management_ip, "internal_ip": internal_ip }) return internal_ip and management_ip
def test_cluster_fields(self): ctx = context.ctx() cl_db_obj = self.api.cluster_create(ctx, SAMPLE_CLUSTER) self.assertIsInstance(cl_db_obj, dict) for key, val in SAMPLE_CLUSTER.items(): if key == 'node_groups': #this will be checked separately continue self.assertEqual(val, cl_db_obj.get(key), "Key not found %s" % key) for ng in cl_db_obj["node_groups"]: ng.pop("created_at") ng.pop("updated_at") ng.pop("id") self.assertEqual(ng.pop("cluster_id"), cl_db_obj["id"]) ng.pop("image_id") self.assertEqual(ng.pop("instances"), []) ng.pop("node_configs") ng.pop("node_group_template_id") ng.pop("volume_mount_prefix") ng.pop("volumes_size") ng.pop("volumes_per_node") ng.pop("floating_ip_pool") ng.pop("image_username") ng.pop("tenant_id") self.assertEqual(SAMPLE_CLUSTER["node_groups"], cl_db_obj["node_groups"])
def _set_cluster_info(self, cluster): mng = u.get_instances(cluster, 'manager')[0] nn = u.get_namenode(cluster) jt = u.get_jobtracker(cluster) oozie = u.get_oozie(cluster) #TODO(alazarev) make port configurable (bug #1262895) info = {'IDH Manager': { 'Web UI': 'https://%s:9443' % mng.management_ip }} if jt: #TODO(alazarev) make port configurable (bug #1262895) info['MapReduce'] = { 'Web UI': 'http://%s:50030' % jt.management_ip } #TODO(alazarev) make port configurable (bug #1262895) info['MapReduce']['JobTracker'] = '%s:54311' % jt.hostname() if nn: #TODO(alazarev) make port configurable (bug #1262895) info['HDFS'] = { 'Web UI': 'http://%s:50070' % nn.management_ip } #TODO(alazarev) make port configurable (bug #1262895) info['HDFS']['NameNode'] = 'hdfs://%s:8020' % nn.hostname() if oozie: #TODO(alazarev) make port configurable (bug #1262895) info['JobFlow'] = { 'Oozie': 'http://%s:11000' % oozie.management_ip } ctx = context.ctx() conductor.cluster_update(ctx, cluster, {'info': info})
def test_node_group_add_from_template(self): ctx = context.ctx() # create cluster sample_copy = copy.deepcopy(test_clusters.SAMPLE_CLUSTER) cluster = self.api.cluster_create(ctx, sample_copy) # create node_group_template ng_tmpl = copy.deepcopy(test_templates.SAMPLE_NGT) ng_tmpl['volumes_size'] = 10 ng_tmpl['node_configs']['service_1']['config_2'] = 'value_2' ng_tmpl = self.api.node_group_template_create(ctx, ng_tmpl) # add node group to cluster ng = copy.deepcopy(test_clusters.SAMPLE_CLUSTER['node_groups'][0]) ng['node_group_template_id'] = ng_tmpl['id'] ng['count'] = 5 ng['name'] = 'ng_3' self.api.node_group_add(ctx, cluster['id'], ng) # refetch cluster cluster = self.api.cluster_get(ctx, cluster['id']) for node_group in cluster['node_groups']: if node_group['name'] == 'ng_3': self.assertEqual(['p1', 'p2'], node_group['node_processes']) self.assertEqual(10, node_group['volumes_size']) self.assertEqual(CORRECT_CONF, node_group['node_configs']) self.assertEqual(5, node_group['count'])
def _scale_cluster(cluster, target_count): ctx = context.ctx() rollback_count = _get_ng_counts(cluster) launcher = _ScaleLauncher() try: launcher.launch_instances(ctx, cluster, target_count) except Exception as ex: LOG.warn("Can't scale cluster '%s' (reason: %s)", cluster.name, ex) with excutils.save_and_reraise_exception(): cluster = conductor.cluster_get(ctx, cluster) try: _rollback_cluster_scaling(ctx, cluster, rollback_count, target_count) except Exception: # if something fails during the rollback, we stop # doing anything further cluster = conductor.cluster_update(ctx, cluster, {"status": "Error"}) LOG.info(g.format_cluster_status(cluster)) LOG.error("Unable to complete rollback, aborting") raise cluster = conductor.cluster_update(ctx, cluster, {"status": "Active"}) LOG.info(g.format_cluster_status(cluster)) LOG.warn("Rollback successful. Throwing off an initial exception.") finally: cluster = conductor.cluster_get(ctx, cluster) _clean_cluster_from_empty_ng(cluster) return launcher.inst_ids
def test_clt_fields(self): ctx = context.ctx() clt_db_obj_id = self.api.cluster_template_create(ctx, SAMPLE_CLT)['id'] clt_db_obj = self.api.cluster_template_get(ctx, clt_db_obj_id) self.assertIsInstance(clt_db_obj, dict) for key, val in SAMPLE_CLT.items(): if key == 'node_groups': #this will be checked separately continue self.assertEqual(val, clt_db_obj.get(key), "Key not found %s" % key) for ng in clt_db_obj["node_groups"]: ng.pop("created_at") ng.pop("updated_at") ng.pop("id") self.assertEqual(ng.pop("cluster_template_id"), clt_db_obj_id) ng.pop("image_id") ng.pop("node_configs") ng.pop("node_group_template_id") ng.pop("volume_mount_prefix") ng.pop("volumes_size") ng.pop("volumes_per_node") self.assertListEqual(SAMPLE_CLT["node_groups"], clt_db_obj["node_groups"])
def _await_networks(instances): if not instances: return ips_assigned = set() while len(ips_assigned) != len(instances): if not _check_cluster_exists(instances[0].node_group.cluster): return for instance in instances: if instance.id not in ips_assigned: if networks.init_instances_ips(instance): ips_assigned.add(instance.id) context.sleep(1) ctx = context.ctx() cluster = conductor.cluster_get(ctx, instances[0].node_group.cluster) instances = get_instances(cluster, ips_assigned) accessible_instances = set() while len(accessible_instances) != len(instances): if not _check_cluster_exists(instances[0].node_group.cluster): return for instance in instances: if instance.id not in accessible_instances: if _check_if_accessible(instance): accessible_instances.add(instance.id) context.sleep(1)
def execute_job(job_id, data): # Elements common to all job types cluster_id = data['cluster_id'] configs = data.get('job_configs', {}) ctx = context.current() cluster = conductor.cluster_get(ctx, cluster_id) plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name) instance = plugin.get_oozie_server(cluster) extra = {} info = None if CONF.use_namespaces and not CONF.use_floating_ips: info = instance.remote().get_neutron_info() extra['neutron'] = info # Not in Java job types but present for all others input_id = data.get('input_id', None) output_id = data.get('output_id', None) # Since we will use a unified class in the database, we pass # a superset for all job types job_ex_dict = {'input_id': input_id, 'output_id': output_id, 'job_id': job_id, 'cluster_id': cluster_id, 'info': {'status': 'Pending'}, 'job_configs': configs, 'extra': extra} job_execution = conductor.job_execution_create(context.ctx(), job_ex_dict) context.spawn("Starting Job Execution %s" % job_execution.id, manager.run_job, job_execution) return job_execution
def create_cluster(cluster): ctx = context.ctx() try: # create all instances conductor.cluster_update(ctx, cluster, {"status": "Spawning"}) LOG.info(g.format_cluster_status(cluster)) _create_instances(cluster) # wait for all instances are up and accessible cluster = conductor.cluster_update(ctx, cluster, {"status": "Waiting"}) LOG.info(g.format_cluster_status(cluster)) cluster = _await_instances(cluster) # attach volumes volumes.attach(cluster) # prepare all instances cluster = conductor.cluster_update(ctx, cluster, {"status": "Preparing"}) LOG.info(g.format_cluster_status(cluster)) _configure_instances(cluster) except Exception as ex: LOG.warn("Can't start cluster '%s' (reason: %s)", cluster.name, ex) with excutils.save_and_reraise_exception(): cluster = conductor.cluster_update(ctx, cluster, {"status": "Error", "status_description": str(ex)}) LOG.info(g.format_cluster_status(cluster)) _rollback_cluster_creation(cluster, ex)
def test_one_node_groups_and_no_affinity_group(self, novaclient): node_groups = [ _make_ng_dict('test_group', 'test_flavor', ['data node', 'task tracker'], 2) ] cluster = _create_cluster_mock(node_groups, []) nova = _create_nova_mock(novaclient) instances._create_instances(cluster) userdata = _generate_user_data_script(cluster) nova.servers.create.assert_has_calls([ mock.call("test_cluster-test_group-001", "initial", "test_flavor", scheduler_hints=None, userdata=userdata, key_name='user_keypair'), mock.call("test_cluster-test_group-002", "initial", "test_flavor", scheduler_hints=None, userdata=userdata, key_name='user_keypair') ], any_order=False) ctx = context.ctx() cluster_obj = conductor.cluster_get_all(ctx)[0] self.assertEqual(len(cluster_obj.node_groups[0].instances), 2)
def convert(self, config, plugin_name, version, cluster_template_create): normalized_config = clusterspec.ClusterSpec(config).normalize() #TODO(jspeidel): can we get the name (first arg) from somewhere? node_groups = [] for ng in normalized_config.node_groups: node_group = { "name": ng.name, "flavor_id": ng.flavor, "node_processes": ng.node_processes, "count": ng.count } node_groups.append(node_group) cluster_configs = dict() for entry in normalized_config.cluster_configs: ci = entry.config # get the associated service dictionary target = entry.config.applicable_target service_dict = cluster_configs.get(target, {}) service_dict[ci.name] = entry.value cluster_configs[target] = service_dict ctx = context.ctx() return cluster_template_create(ctx, {"name": uuidutils.generate_uuid(), "plugin_name": plugin_name, "hadoop_version": version, "node_groups": node_groups, "cluster_configs": cluster_configs})
def _run_instance(cluster, node_group, idx, aa_groups, userdata): """Create instance using nova client and persist them into DB.""" session = context.ctx().session name = '%s-%s-%03d' % (cluster.name, node_group.name, idx) # aa_groups: node process -> instance ids aa_ids = [] for node_process in node_group.node_processes: aa_ids += aa_groups.get(node_process) or [] # create instances only at hosts w/ no instances w/ aa-enabled processes hints = {'different_host': list(set(aa_ids))} if aa_ids else None context.model_save(node_group) nova_instance = nova.client().servers.create( name, node_group.get_image_id(), node_group.flavor_id, scheduler_hints=hints, userdata=userdata, key_name=cluster.user_keypair_id) with session.begin(): instance = m.Instance(node_group.id, nova_instance.id, name) node_group.instances.append(instance) session.add(instance) # save instance id to aa_groups to support aa feature for node_process in node_group.node_processes: if node_process in cluster.anti_affinity: aa_group_ids = aa_groups.get(node_process, []) aa_group_ids.append(nova_instance.id) aa_groups[node_process] = aa_group_ids return instance
def _await_networks(cluster, instances): if not instances: return ips_assigned = set() while len(ips_assigned) != len(instances): if not g.check_cluster_exists(instances[0].node_group.cluster): return for instance in instances: if instance.id not in ips_assigned: if networks.init_instances_ips(instance): ips_assigned.add(instance.id) context.sleep(1) LOG.info("Cluster '%s': all instances have IPs assigned" % cluster.id) ctx = context.ctx() cluster = conductor.cluster_get(ctx, instances[0].node_group.cluster) instances = _get_instances(cluster, ips_assigned) with context.ThreadGroup() as tg: for instance in instances: tg.spawn("wait-for-ssh-%s" % instance.instance_name, _wait_until_accessible, instance) LOG.info("Cluster '%s': all instances are accessible" % cluster.id)
def create_hadoop_ssh_keys(cluster): private_key, public_key = crypto.generate_key_pair() extra = { 'hadoop_private_ssh_key': private_key, 'hadoop_public_ssh_key': public_key } return conductor.cluster_update(context.ctx(), cluster, {'extra': extra})
def test_ip_assignment_use_no_floating(self, cfg, novaclient): cfg.CONF.use_floating_ips = False nova = _create_nova_mock(novaclient) node_groups = [_make_ng_dict("test_group_1", "test_flavor", ["data node", "test tracker"], 2, 'pool'), _make_ng_dict("test_group_2", "test_flavor", ["name node", "test tracker"], 1)] ctx = context.ctx() cluster = _create_cluster_mock(node_groups, ["data node"]) instances._create_instances(cluster) cluster = conductor.cluster_get(ctx, cluster) instances_list = instances._get_instances(cluster) instances._assign_floating_ips(instances_list) nova.floating_ips.create.assert_has_calls( [mock.call("pool"), mock.call("pool")], any_order=False ) self.assertEqual(nova.floating_ips.create.call_count, 2, "Not expected floating IPs number found.")
def _await_networks(self, cluster, instances): if not instances: return ips_assigned = set() while len(ips_assigned) != len(instances): if not g.check_cluster_exists(instances[0].node_group.cluster): return for instance in instances: if instance.id not in ips_assigned: if networks.init_instances_ips(instance): ips_assigned.add(instance.id) context.sleep(1) LOG.info("Cluster '%s': all instances have IPs assigned" % cluster.id) ctx = context.ctx() cluster = conductor.cluster_get(ctx, instances[0].node_group.cluster) instances = g.get_instances(cluster, ips_assigned) with context.ThreadGroup() as tg: for instance in instances: tg.spawn("wait-for-ssh-%s" % instance.instance_name, self._wait_until_accessible, instance) LOG.info("Cluster '%s': all instances are accessible" % cluster.id)
def _set_cluster_info(self, cluster): nn = utils.get_namenode(cluster) jt = utils.get_jobtracker(cluster) oozie = utils.get_oozie(cluster) info = {} if jt: address = c_helper.get_config_value( 'MapReduce', 'mapred.job.tracker.http.address', cluster) port = address[address.rfind(':') + 1:] info['MapReduce'] = { 'Web UI': 'http://%s:%s' % (jt.management_ip, port) } if nn: address = c_helper.get_config_value('HDFS', 'dfs.http.address', cluster) port = address[address.rfind(':') + 1:] info['HDFS'] = { 'Web UI': 'http://%s:%s' % (nn.management_ip, port) } if oozie: info['JobFlow'] = { 'Oozie': 'http://%s:11000' % oozie.management_ip } ctx = context.ctx() conductor.cluster_update(ctx, cluster, {'info': info})
def test_one_node_groups_and_no_affinity_group(self, novaclient): node_groups = [m.NodeGroup("test_group", "test_flavor", ["data node", "test tracker"], 2)] node_groups[0]._username = "******" cluster = _create_cluster_mock(node_groups, []) nova = _create_nova_mock(novaclient) instances._create_instances(cluster) userdata = _generate_user_data_script(cluster) nova.servers.create.assert_has_calls( [mock.call("test_cluster-test_group-001", "initial", "test_flavor", scheduler_hints=None, userdata=userdata, key_name='user_keypair'), mock.call("test_cluster-test_group-002", "initial", "test_flavor", scheduler_hints=None, userdata=userdata, key_name='user_keypair')], any_order=False) session = ctx.ctx().session with session.begin(): self.assertEqual(session.query(m.Instance).count(), 2)
def convert(self, config, plugin_name, version, cluster_template_create): normalized_config = clusterspec.ClusterSpec(config).normalize() #TODO(jspeidel): can we get the name (first arg) from somewhere? node_groups = [] for ng in normalized_config.node_groups: node_group = { "name": ng.name, "flavor_id": ng.flavor, "node_processes": ng.node_processes, "count": ng.count } node_groups.append(node_group) cluster_configs = dict() for entry in normalized_config.cluster_configs: ci = entry.config # get the associated service dictionary target = entry.config.applicable_target service_dict = cluster_configs.get(target, {}) service_dict[ci.name] = entry.value cluster_configs[target] = service_dict ctx = context.ctx() return cluster_template_create( ctx, { "name": uuidutils.generate_uuid(), "plugin_name": plugin_name, "hadoop_version": version, "node_groups": node_groups, "cluster_configs": cluster_configs })
def test_remove_instance(self): ctx = context.ctx() cluster_db_obj = self.api.cluster_create(ctx, SAMPLE_CLUSTER) _id = cluster_db_obj["id"] ng_id = cluster_db_obj["node_groups"][-1]["id"] count = cluster_db_obj["node_groups"][-1]["count"] instance_id = self._add_instance(ctx, ng_id) cluster_db_obj = self.api.cluster_get(ctx, _id) for ng in cluster_db_obj["node_groups"]: if ng["id"] != ng_id: continue self.assertEqual(count + 1, ng["count"]) self.api.instance_remove(ctx, instance_id) cluster_db_obj = self.api.cluster_get(ctx, _id) for ng in cluster_db_obj["node_groups"]: if ng["id"] != ng_id: continue self.assertEqual(count, ng["count"]) with self.assertRaises(ex.NotFoundException): self.api.instance_remove(ctx, instance_id)
def test_cluster_create_from_templates(self): ctx = context.ctx() # create node_group_template ng_tmpl = copy.deepcopy(test_templates.SAMPLE_NGT) ng_tmpl['volumes_size'] = '10' ng_tmpl['node_configs']['service_1']['config_2'] = 'value_2' ng_tmpl = self.api.node_group_template_create(ctx, ng_tmpl) # create cluster template cl_tmpl = self.api.cluster_template_create(ctx, test_templates.SAMPLE_CLT) # create cluster cluster_val = copy.deepcopy(test_clusters.SAMPLE_CLUSTER) cluster_val['cluster_template_id'] = cl_tmpl['id'] cluster_val['node_groups'][0]['node_group_template_id'] = ng_tmpl['id'] cluster = self.api.cluster_create(ctx, cluster_val) self.assertEqual(CORRECT_CONF, cluster['cluster_configs']) for node_group in cluster['node_groups']: if node_group['name'] == 'ng_1': self.assertEqual(['p1', 'p2'], node_group['node_processes']) self.assertEqual(10, node_group['volumes_size']) self.assertEqual(CORRECT_CONF, node_group['node_configs'])
def _shutdown_instance(self, instance): ctx = context.ctx() if instance.node_group.floating_ip_pool: try: networks.delete_floating_ip(instance.instance_id) except nova_exceptions.NotFound: LOG.warn( "Attempted to delete non-existent floating IP in " "pool %s from instancie %s", instance.node_group.floating_ip_pool, instance.instance_id) try: volumes.detach_from_instance(instance) except Exception: LOG.warn("Detaching volumes from instance %s failed", instance.instance_id) try: nova.client().servers.delete(instance.instance_id) except nova_exceptions.NotFound: LOG.warn("Attempted to delete non-existent instance %s", instance.instance_id) conductor.instance_remove(ctx, instance)
def test_node_group_add_from_template(self): ctx = context.ctx() # create cluster cluster = self.api.cluster_create(ctx, test_clusters.SAMPLE_CLUSTER) # create node_group_template ng_tmpl = copy.deepcopy(test_templates.SAMPLE_NGT) ng_tmpl['volumes_size'] = '10' ng_tmpl['node_configs']['service_1']['config_2'] = 'value_2' ng_tmpl = self.api.node_group_template_create(ctx, ng_tmpl) # add node group to cluster ng = test_clusters.SAMPLE_CLUSTER['node_groups'][0].copy() ng['node_group_template_id'] = ng_tmpl['id'] ng['count'] = 5 ng['name'] = 'ng_3' self.api.node_group_add(ctx, cluster['id'], ng) # refetch cluster cluster = self.api.cluster_get(ctx, cluster['id']) for node_group in cluster['node_groups']: if node_group['name'] == 'ng_3': self.assertEqual(['p1', 'p2'], node_group['node_processes']) self.assertEqual(10, node_group['volumes_size']) self.assertEqual(CORRECT_CONF, node_group['node_configs']) self.assertEqual(5, node_group['count'])
def test_cluster_create_from_templates(self): ctx = context.ctx() # create node_group_template ng_tmpl = copy.deepcopy(test_templates.SAMPLE_NGT) ng_tmpl['volumes_size'] = 10 ng_tmpl['node_configs']['service_1']['config_2'] = 'value_2' ng_tmpl = self.api.node_group_template_create(ctx, ng_tmpl) # create cluster template cl_tmpl = self.api.cluster_template_create(ctx, test_templates.SAMPLE_CLT) # create cluster cluster_val = copy.deepcopy(test_clusters.SAMPLE_CLUSTER) cluster_val['cluster_template_id'] = cl_tmpl['id'] cluster_val['node_groups'][0]['node_group_template_id'] = ng_tmpl['id'] cluster = self.api.cluster_create(ctx, cluster_val) self.assertEqual(CORRECT_CONF, cluster['cluster_configs']) for node_group in cluster['node_groups']: if node_group['name'] == 'ng_1': self.assertEqual(['p1', 'p2'], node_group['node_processes']) self.assertEqual(10, node_group['volumes_size']) self.assertEqual(CORRECT_CONF, node_group['node_configs'])
def _provision_nodes(id, node_group_id_map): ctx = context.ctx() cluster = conductor.cluster_get(ctx, id) plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name) cluster = conductor.cluster_update(ctx, cluster, {"status": "Scaling"}) LOG.info(g.format_cluster_status(cluster)) instances = i.scale_cluster(cluster, node_group_id_map, plugin) if instances: cluster = conductor.cluster_update(ctx, cluster, {"status": "Configuring"}) LOG.info(g.format_cluster_status(cluster)) try: plugin.scale_cluster(cluster, i.get_instances(cluster, instances)) except Exception as ex: LOG.exception("Can't scale cluster '%s' (reason: %s)", cluster.name, ex) conductor.cluster_update(ctx, cluster, {"status": "Error"}) LOG.info(g.format_cluster_status(cluster)) return # cluster is now up and ready cluster = conductor.cluster_update(ctx, cluster, {"status": "Active"}) LOG.info(g.format_cluster_status(cluster))
def _set_cluster_info(self, cluster): nn = utils.get_namenode(cluster) jt = utils.get_jobtracker(cluster) oozie = utils.get_oozie(cluster) info = {} if jt: address = c_helper.get_config_value( 'MapReduce', 'mapred.job.tracker.http.address', cluster) port = address[address.rfind(':') + 1:] info['MapReduce'] = { 'Web UI': 'http://%s:%s' % (jt.management_ip, port) } #TODO(aignatov) change from hardcode value info['MapReduce']['JobTracker'] = '%s:8021' % jt.hostname() if nn: address = c_helper.get_config_value( 'HDFS', 'dfs.http.address', cluster) port = address[address.rfind(':') + 1:] info['HDFS'] = { 'Web UI': 'http://%s:%s' % (nn.management_ip, port) } #TODO(aignatov) change from hardcode value info['HDFS']['NameNode'] = 'hdfs://%s:8020' % nn.hostname() if oozie: info['JobFlow'] = { 'Oozie': 'http://%s:11000' % oozie.management_ip } ctx = context.ctx() conductor.cluster_update(ctx, cluster, {'info': info})
def test_one_node_groups_and_no_affinity_group(self, novaclient): node_groups = [_make_ng_dict("test_group", "test_flavor", ["data node", "task tracker"], 2)] cluster = _create_cluster_mock(node_groups, []) nova = _create_nova_mock(novaclient) instances._create_instances(cluster) userdata = _generate_user_data_script(cluster) nova.servers.create.assert_has_calls( [ mock.call( "test_cluster-test_group-001", "initial", "test_flavor", scheduler_hints=None, userdata=userdata, key_name="user_keypair", ), mock.call( "test_cluster-test_group-002", "initial", "test_flavor", scheduler_hints=None, userdata=userdata, key_name="user_keypair", ), ], any_order=False, ) ctx = context.ctx() cluster_obj = conductor.cluster_get_all(ctx)[0] self.assertEqual(len(cluster_obj.node_groups[0].instances), 2)
def _run_instance(cluster, node_group, idx, aa_groups, userdata): """Create instance using nova client and persist them into DB.""" ctx = context.ctx() name = '%s-%s-%03d' % (cluster.name, node_group.name, idx) # aa_groups: node process -> instance ids aa_ids = [] for node_process in node_group.node_processes: aa_ids += aa_groups.get(node_process) or [] # create instances only at hosts w/ no instances w/ aa-enabled processes hints = {'different_host': list(set(aa_ids))} if aa_ids else None nova_instance = nova.client().servers.create( name, node_group.get_image_id(), node_group.flavor_id, scheduler_hints=hints, userdata=userdata, key_name=cluster.user_keypair_id) instance_id = conductor.instance_add(ctx, node_group, { "instance_id": nova_instance.id, "instance_name": name }) # save instance id to aa_groups to support aa feature for node_process in node_group.node_processes: if node_process in cluster.anti_affinity: aa_group_ids = aa_groups.get(node_process, []) aa_group_ids.append(nova_instance.id) aa_groups[node_process] = aa_group_ids return instance_id
def init_instances_ips(instance, server): """Extracts internal and management ips. As internal ip will be used the first ip from the nova networks CIDRs. If use_floating_ip flag is set than management ip will be the first non-internal ip. """ ctx = context.ctx() if instance.internal_ip and instance.management_ip: return True management_ip = instance.management_ip internal_ip = instance.internal_ip for network_label in server.networks: nova_network = nova.client().networks.find(label=network_label) network = netaddr.IPNetwork(nova_network.cidr) for ip in server.networks[network_label]: if netaddr.IPAddress(ip) in network: internal_ip = instance.internal_ip or ip else: management_ip = instance.management_ip or ip if not CONF.use_floating_ips: management_ip = internal_ip conductor.instance_update(ctx, instance, {"management_ip": management_ip, "internal_ip": internal_ip}) return internal_ip and management_ip
def create_cluster(cluster): ctx = context.ctx() try: # create all instances conductor.cluster_update(ctx, cluster, {"status": "Spawning"}) LOG.info(g.format_cluster_status(cluster)) _create_instances(cluster) # wait for all instances are up and accessible cluster = conductor.cluster_update(ctx, cluster, {"status": "Waiting"}) LOG.info(g.format_cluster_status(cluster)) cluster = _await_instances(cluster) # attach volumes volumes.attach(cluster) # prepare all instances cluster = conductor.cluster_update(ctx, cluster, {"status": "Preparing"}) LOG.info(g.format_cluster_status(cluster)) _configure_instances(cluster) except Exception as ex: LOG.warn("Can't start cluster '%s' (reason: %s)", cluster.name, ex) with excutils.save_and_reraise_exception(): cluster = conductor.cluster_update(ctx, cluster, { "status": "Error", "status_description": str(ex) }) LOG.info(g.format_cluster_status(cluster)) _rollback_cluster_creation(cluster, ex)
def test_cluster_fields(self): ctx = context.ctx() cl_db_obj = self.api.cluster_create(ctx, SAMPLE_CLUSTER) self.assertIsInstance(cl_db_obj, dict) for key, val in SAMPLE_CLUSTER.items(): if key == 'node_groups': #this will be checked separately continue self.assertEqual(val, cl_db_obj.get(key), "Key not found %s" % key) for ng in cl_db_obj["node_groups"]: ng.pop("created_at") ng.pop("updated_at") ng.pop("id") self.assertEqual(ng.pop("cluster_id"), cl_db_obj["id"]) ng.pop("image_id") self.assertEqual(ng.pop("instances"), []) ng.pop("node_configs") ng.pop("node_group_template_id") ng.pop("volume_mount_prefix") ng.pop("volumes_size") ng.pop("volumes_per_node") self.assertListEqual(SAMPLE_CLUSTER["node_groups"], cl_db_obj["node_groups"])
def test_cluster_terminate(self, terminate_cluster, get_job_status): CONF.use_identity_api_v3 = True ctx = context.ctx() job = self.api.job_create(ctx, te.SAMPLE_JOB) ds = self.api.data_source_create(ctx, te.SAMPLE_DATA_SOURCE) c = tc.SAMPLE_CLUSTER.copy() c["status"] = "Active" c["id"] = "1" c["name"] = "1" self.api.cluster_create(ctx, c) c["id"] = "2" c["name"] = "2" self.api.cluster_create(ctx, c) self._create_job_execution({"end_time": datetime.datetime.now(), "id": 1, "cluster_id": "1"}, job, ds, ds) self._create_job_execution({"end_time": None, "id": 2, "cluster_id": "2"}, job, ds, ds) self._create_job_execution({"end_time": None, "id": 3, "cluster_id": "2"}, job, ds, ds) p.SavannaPeriodicTasks().terminate_unneeded_clusters(None) self.assertEqual(1, len(terminate_cluster.call_args_list)) terminated_cluster_id = terminate_cluster.call_args_list[0][0][0] self.assertEqual('1', terminated_cluster_id)