Example #1
0
    def test_get_hadoop_ssh_keys(self):
        cluster_dict = {
            'name': 'cluster1',
            'plugin_name': 'mock_plugin',
            'hadoop_version': 'mock_version',
            'default_image_id': 'initial',
            'node_groups': [tu._make_ng_dict("ng1", "f1", ["s1"], 1)]}

        cluster1 = conductor.cluster_create(context.ctx(), cluster_dict)
        (private_key1, public_key1) = c_h.get_hadoop_ssh_keys(cluster1)

        #should store keys for old cluster
        cluster1 = conductor.cluster_get(context.ctx(), cluster1)
        (private_key2, public_key2) = c_h.get_hadoop_ssh_keys(cluster1)

        self.assertEqual(public_key1, public_key2)
        self.assertEqual(private_key1, private_key2)

        #should generate new keys for new cluster
        cluster_dict.update({'name': 'cluster2'})
        cluster2 = conductor.cluster_create(context.ctx(), cluster_dict)
        (private_key3, public_key3) = c_h.get_hadoop_ssh_keys(cluster2)

        self.assertNotEqual(public_key1, public_key3)
        self.assertNotEqual(private_key1, private_key3)
Example #2
0
    def test_get_hadoop_ssh_keys(self):
        cluster_dict = {
            'name': 'cluster1',
            'plugin_name': 'mock_plugin',
            'hadoop_version': 'mock_version',
            'default_image_id': 'initial',
            'node_groups': [tu.make_ng_dict("ng1", "f1", ["s1"], 1)]
        }

        cluster1 = conductor.cluster_create(context.ctx(), cluster_dict)
        (private_key1, public_key1) = c_h.get_hadoop_ssh_keys(cluster1)

        #should store keys for old cluster
        cluster1 = conductor.cluster_get(context.ctx(), cluster1)
        (private_key2, public_key2) = c_h.get_hadoop_ssh_keys(cluster1)

        self.assertEqual(public_key1, public_key2)
        self.assertEqual(private_key1, private_key2)

        #should generate new keys for new cluster
        cluster_dict.update({'name': 'cluster2'})
        cluster2 = conductor.cluster_create(context.ctx(), cluster_dict)
        (private_key3, public_key3) = c_h.get_hadoop_ssh_keys(cluster2)

        self.assertNotEqual(public_key1, public_key3)
        self.assertNotEqual(private_key1, private_key3)
Example #3
0
def get_raw_binary(job_binary):
    url = job_binary.url
    if url.startswith("savanna-db://"):
        res = db.get_raw_data(context.ctx(), job_binary)

    if url.startswith(su.SWIFT_INTERNAL_PREFIX):
        res = i_swift.get_raw_data(context.ctx(), job_binary)

    return res
Example #4
0
def get_raw_binary(job_binary):
    url = job_binary.url
    if url.startswith("savanna-db://"):
        res = db.get_raw_data(context.ctx(), job_binary)

    # TODO(mattf): remove support for OLD_SWIFT_INTERNAL_PREFIX
    if url.startswith(su.SWIFT_INTERNAL_PREFIX) or (
            url.startswith(su.OLD_SWIFT_INTERNAL_PREFIX)):
        res = i_swift.get_raw_data(context.ctx(), job_binary)

    return res
Example #5
0
def _provision_cluster(cluster_id):
    ctx = context.ctx()
    cluster = conductor.cluster_get(ctx, cluster_id)
    plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name)

    # updating cluster infra
    cluster = conductor.cluster_update(ctx, cluster,
                                       {"status": "InfraUpdating"})
    LOG.info(g.format_cluster_status(cluster))
    plugin.update_infra(cluster)

    # creating instances and configuring them
    cluster = conductor.cluster_get(ctx, cluster_id)
    i.create_cluster(cluster)

    # configure cluster
    cluster = conductor.cluster_update(ctx, cluster, {"status": "Configuring"})
    LOG.info(g.format_cluster_status(cluster))
    plugin.configure_cluster(cluster)

    # starting prepared and configured cluster
    cluster = conductor.cluster_update(ctx, cluster, {"status": "Starting"})
    LOG.info(g.format_cluster_status(cluster))
    plugin.start_cluster(cluster)

    # cluster is now up and ready
    cluster = conductor.cluster_update(ctx, cluster, {"status": "Active"})
    LOG.info(g.format_cluster_status(cluster))

    # schedule execution pending job for cluster
    for je in conductor.job_execution_get_all(ctx, cluster_id=cluster.id):
        jm.run_job(ctx, je)
Example #6
0
    def _set_cluster_info(self, cluster, cluster_spec, hosts, ambari_info):
        info = {}

        try:
            jobtracker_ip = self._determine_host_for_server_component(
                'JOBTRACKER', cluster_spec, hosts).management_ip
        except Exception:
            pass
        else:
            info['MapReduce'] = {
                'Web UI': 'http://%s:50030' % jobtracker_ip
            }

        try:
            namenode_ip = self._determine_host_for_server_component(
                'NAMENODE', cluster_spec, hosts).management_ip
        except Exception:
            pass
        else:
            info['HDFS'] = {
                'Web UI': 'http://%s:50070' % namenode_ip
            }

        info['Ambari Console'] = {
            'Web UI': 'http://%s' % ambari_info.get_address()
        }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
Example #7
0
    def test_ip_assignment_use_no_floating(self, cfg, novaclient):

        cfg.CONF.use_floating_ips = False
        nova = _create_nova_mock(novaclient)

        node_groups = [
            _make_ng_dict("test_group_1", "test_flavor",
                          ["data node", "test tracker"], 2, 'pool'),
            _make_ng_dict("test_group_2", "test_flavor",
                          ["name node", "test tracker"], 1)
        ]

        ctx = context.ctx()
        cluster = _create_cluster_mock(node_groups, ["data node"])
        instances._create_instances(cluster)

        cluster = conductor.cluster_get(ctx, cluster)
        instances_list = instances.get_instances(cluster)

        instances._assign_floating_ips(instances_list)

        nova.floating_ips.create.assert_has_calls(
            [mock.call("pool"), mock.call("pool")], any_order=False)

        self.assertEqual(nova.floating_ips.create.call_count, 2,
                         "Not expected floating IPs number found.")
Example #8
0
    def validate_scaling(self, cluster, existing, additional):
        orig_existing_count = {}
        ctx = context.ctx()
        try:
            for ng_id in existing:
                node_group = self._get_by_id(cluster.node_groups, ng_id)
                if node_group:
                    orig_existing_count[ng_id] = node_group.count
                    conductor.node_group_update(
                        ctx, node_group, {'count': int(existing[ng_id])})
                else:
                    raise RuntimeError(
                        'Node group not found: {0}'.format(ng_id))
            for ng_id in additional:
                node_group = self._get_by_id(cluster.node_groups, ng_id)
                if node_group:
                    conductor.node_group_update(
                        ctx, node_group, {'count': int(additional[ng_id])})
                else:
                    raise RuntimeError(
                        'Node group not found: {0}'.format(ng_id))

            self.validate(cluster)

        finally:
            for ng_id in additional:
                for ng_id in additional:
                    node_group = self._get_by_id(cluster.node_groups, ng_id)
                    conductor.node_group_update(ctx, node_group, {'count': 0})
            for ng_id in orig_existing_count:
                node_group = self._get_by_id(cluster.node_groups, ng_id)
                conductor.node_group_update(
                    ctx, node_group, {'count': orig_existing_count[ng_id]})
Example #9
0
    def test_remove_instance(self):
        ctx = context.ctx()
        cluster_db_obj = self.api.cluster_create(ctx, SAMPLE_CLUSTER)
        _id = cluster_db_obj["id"]

        ng_id = cluster_db_obj["node_groups"][-1]["id"]
        count = cluster_db_obj["node_groups"][-1]["count"]

        node_group = self._add_instance(ctx, ng_id)
        instance_id = node_group["instances"][-1]["id"]

        cluster_db_obj = self.api.cluster_get(ctx, _id)
        for ng in cluster_db_obj["node_groups"]:
            if ng["id"] != ng_id:
                continue

            self.assertEqual(count + 1, ng["count"])

        self.api.instance_remove(ctx, instance_id)

        cluster_db_obj = self.api.cluster_get(ctx, _id)
        for ng in cluster_db_obj["node_groups"]:
            if ng["id"] != ng_id:
                continue

            self.assertEqual(count, ng["count"])

        with self.assertRaises(RuntimeError):
            self.api.instance_remove(ctx, instance_id)
Example #10
0
def clean_cluster_from_empty_ng(cluster):
    ctx = context.ctx()
    for ng in cluster.node_groups:
        if ng.count == 0:
            conductor.node_group_remove(ctx, ng)

    return conductor.cluster_get(ctx, cluster)
Example #11
0
def init_instances_ips(instance, server):
    """Extracts internal and management ips.

    As internal ip will be used the first ip from the nova networks CIDRs.
    If use_floating_ip flag is set than management ip will be the first
    non-internal ip.
    """
    ctx = context.ctx()

    if instance.internal_ip and instance.management_ip:
        return True

    management_ip = instance.management_ip
    internal_ip = instance.internal_ip

    for network_label in server.networks:
        nova_network = nova.client().networks.find(label=network_label)
        network = netaddr.IPNetwork(nova_network.cidr)
        for ip in server.networks[network_label]:
            if netaddr.IPAddress(ip) in network:
                internal_ip = instance.internal_ip or ip
            else:
                management_ip = instance.management_ip or ip

    if not CONF.use_floating_ips:
        management_ip = internal_ip

    conductor.instance_update(ctx, instance, {
        "management_ip": management_ip,
        "internal_ip": internal_ip
    })

    return internal_ip and management_ip
Example #12
0
    def _set_cluster_info(self, cluster, cluster_spec):
        info = {}
        for service in cluster_spec.services:
            if service.deployed:
                service.register_service_urls(cluster_spec, info)

        conductor.cluster_update(context.ctx(), cluster, {'info': info})
Example #13
0
def init_instances_ips(instance):
    """Extracts internal and management ips.

    As internal ip will be used the first ip from the nova networks CIDRs.
    If use_floating_ip flag is set than management ip will be the first
    non-internal ip.
    """

    server = nova.get_instance_info(instance)

    management_ip = None
    internal_ip = None

    for network_label, addresses in six.iteritems(server.addresses):
        for address in addresses:
            if address['OS-EXT-IPS:type'] == 'fixed':
                internal_ip = internal_ip or address['addr']
            else:
                management_ip = management_ip or address['addr']

    if not CONF.use_floating_ips:
        management_ip = internal_ip

    conductor.instance_update(context.ctx(), instance,
                              {"management_ip": management_ip,
                               "internal_ip": internal_ip})

    return internal_ip and management_ip
Example #14
0
    def _set_cluster_info(self, cluster, cluster_spec, ambari_info):
        info = {}

        try:
            jobtracker_ip = cluster_spec.determine_host_for_server_component(
                'JOBTRACKER').management_ip
        except Exception:
            pass
        else:
            info['MapReduce'] = {
                'Web UI': 'http://%s:50030' % jobtracker_ip
            }

        try:
            namenode_ip = cluster_spec.determine_host_for_server_component(
                'NAMENODE').management_ip
        except Exception:
            pass
        else:
            info['HDFS'] = {
                'Web UI': 'http://%s:50070' % namenode_ip
            }

        info['Ambari Console'] = {
            'Web UI': 'http://%s' % ambari_info.get_address()
        }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
Example #15
0
def _provision_cluster(cluster_id):
    ctx = context.ctx()
    cluster = conductor.cluster_get(ctx, cluster_id)
    plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name)

    # updating cluster infra
    cluster = conductor.cluster_update(ctx, cluster,
                                       {"status": "InfraUpdating"})
    LOG.info(g.format_cluster_status(cluster))
    plugin.update_infra(cluster)

    # creating instances and configuring them
    cluster = conductor.cluster_get(ctx, cluster_id)
    i.create_cluster(cluster)

    # configure cluster
    cluster = conductor.cluster_update(ctx, cluster, {"status": "Configuring"})
    LOG.info(g.format_cluster_status(cluster))
    plugin.configure_cluster(cluster)

    # starting prepared and configured cluster
    cluster = conductor.cluster_update(ctx, cluster, {"status": "Starting"})
    LOG.info(g.format_cluster_status(cluster))
    plugin.start_cluster(cluster)

    # cluster is now up and ready
    cluster = conductor.cluster_update(ctx, cluster, {"status": "Active"})
    LOG.info(g.format_cluster_status(cluster))
Example #16
0
    def test_crud_operation_create_list_delete_update(self):
        ctx = context.ctx()
        job = self.api.job_create(ctx, SAMPLE_JOB)
        ds_input = self.api.data_source_create(ctx, SAMPLE_DATA_SOURCE)
        SAMPLE_DATA_OUTPUT = copy.copy(SAMPLE_DATA_SOURCE)
        SAMPLE_DATA_OUTPUT['name'] = 'output'
        ds_output = self.api.data_source_create(ctx, SAMPLE_DATA_OUTPUT)

        SAMPLE_JOB_EXECUTION['job_id'] = job['id']
        SAMPLE_JOB_EXECUTION['input_id'] = ds_input['id']
        SAMPLE_JOB_EXECUTION['output_id'] = ds_output['id']

        self.api.job_execution_create(ctx, SAMPLE_JOB_EXECUTION)

        lst = self.api.job_execution_get_all(ctx)
        self.assertEqual(len(lst), 1)

        job_ex_id = lst[0]['id']

        self.assertEqual(lst[0]['progress'], 0.1)
        self.api.job_execution_update(ctx, job_ex_id, {'progress': '0.2'})
        updated_job = self.api.job_execution_get(ctx, job_ex_id)
        self.assertEqual(updated_job['progress'], 0.2)

        self.api.job_execution_destroy(ctx, job_ex_id)

        lst = self.api.job_execution_get_all(ctx)
        self.assertEqual(len(lst), 0)
Example #17
0
def scale_cluster(id, data):
    ctx = context.ctx()

    cluster = conductor.cluster_get(ctx, id)
    plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name)
    existing_node_groups = data.get("resize_node_groups", [])
    additional_node_groups = data.get("add_node_groups", [])

    # the next map is the main object we will work with
    # to_be_enlarged : {node_group_id: desired_amount_of_instances}
    to_be_enlarged = {}
    for ng in existing_node_groups:
        ng_id = g.find(cluster.node_groups, name=ng["name"])["id"]
        to_be_enlarged.update({ng_id: ng["count"]})

    additional = construct_ngs_for_scaling(cluster, additional_node_groups)

    try:
        cluster = conductor.cluster_update(ctx, cluster, {"status": "Validating"})
        LOG.info(g.format_cluster_status(cluster))
        plugin.validate_scaling(cluster, to_be_enlarged, additional)
    except Exception:
        with excutils.save_and_reraise_exception():
            i.clean_cluster_from_empty_ng(cluster)
            cluster = conductor.cluster_update(ctx, cluster, {"status": "Active"})
            LOG.info(g.format_cluster_status(cluster))

    # If we are here validation is successful.
    # So let's update to_be_enlarged map:
    to_be_enlarged.update(additional)

    context.spawn("cluster-scaling-%s" % id, _provision_nodes, id, to_be_enlarged)
    return conductor.cluster_get(ctx, id)
Example #18
0
    def test_one_node_groups_and_one_affinity_group(self, novaclient):
        node_groups = [_make_ng_dict('test_group', 'test_flavor',
                                     ['data node'], 2)]
        cluster = _create_cluster_mock(node_groups, ["data node"])
        nova = _create_nova_mock(novaclient)
        instances._create_instances(cluster)
        userdata = _generate_user_data_script(cluster)

        nova.servers.create.assert_has_calls(
            [mock.call("test_cluster-test_group-001",
                       "initial",
                       "test_flavor",
                       scheduler_hints=None,
                       userdata=userdata,
                       key_name='user_keypair'),
             mock.call("test_cluster-test_group-002",
                       "initial",
                       "test_flavor",
                       scheduler_hints={'different_host': ["1"]},
                       userdata=userdata,
                       key_name='user_keypair')],
            any_order=False)

        ctx = context.ctx()
        cluster_obj = conductor.cluster_get_all(ctx)[0]
        self.assertEqual(len(cluster_obj.node_groups[0].instances), 2)
Example #19
0
def create_cluster(values):
    ctx = context.ctx()
    cluster = conductor.cluster_create(ctx, values)
    plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name)

    # validating cluster
    try:
        cluster = conductor.cluster_update(ctx, cluster,
                                           {"status": "Validating"})
        LOG.info(g.format_cluster_status(cluster))

        plugin.validate(cluster)
    except Exception as e:
        with excutils.save_and_reraise_exception():
            cluster = conductor.cluster_update(ctx, cluster,
                                               {"status": "Error",
                                                "status_description": str(e)})
            LOG.info(g.format_cluster_status(cluster))

    context.spawn("cluster-creating-%s" % cluster.id,
                  _provision_cluster, cluster.id)
    if CONF.use_identity_api_v3 and cluster.is_transient:
        trusts.create_trust(cluster)

    return conductor.cluster_get(ctx, cluster.id)
Example #20
0
 def test_cluster_terminate(self, terminate_cluster, get_job_status):
     cfg.CONF.set_override("use_identity_api_v3", True)
     try:
         ctx = context.ctx()
         job = self.api.job_create(ctx, te.SAMPLE_JOB)
         ds = self.api.data_source_create(ctx, te.SAMPLE_DATA_SOURCE)
         c = tc.SAMPLE_CLUSTER.copy()
         c["status"] = "Active"
         c["id"] = "1"
         c["name"] = "1"
         self.api.cluster_create(ctx, c)
         c["id"] = "2"
         c["name"] = "2"
         self.api.cluster_create(ctx, c)
         self._create_job_execution({"end_time": datetime.datetime.now(),
                                     "id": 1,
                                     "cluster_id": "1"},
                                    job, ds, ds)
         self._create_job_execution({"end_time": None,
                                     "id": 2,
                                     "cluster_id": "2"},
                                    job, ds, ds)
         self._create_job_execution({"end_time": None,
                                     "id": 3,
                                     "cluster_id": "2"},
                                    job, ds, ds)
         p.SavannaPeriodicTasks().terminate_unneeded_clusters(None)
         self.assertEqual(terminate_cluster.call_count, 1)
         terminate_cluster.assert_has_calls([mock.call(u'1')])
     finally:
         cfg.CONF.clear_override("use_identity_api_v3")
Example #21
0
def init_instances_ips(instance):
    """Extracts internal and management ips.

    As internal ip will be used the first ip from the nova networks CIDRs.
    If use_floating_ip flag is set than management ip will be the first
    non-internal ip.
    """

    server = nova.get_instance_info(instance)

    management_ip = None
    internal_ip = None

    for network_label, addresses in six.iteritems(server.addresses):
        for address in addresses:
            if address['OS-EXT-IPS:type'] == 'fixed':
                internal_ip = internal_ip or address['addr']
            else:
                management_ip = management_ip or address['addr']

    if not CONF.use_floating_ips:
        management_ip = internal_ip

    conductor.instance_update(context.ctx(), instance, {
        "management_ip": management_ip,
        "internal_ip": internal_ip
    })

    return internal_ip and management_ip
Example #22
0
    def test_cluster_fields(self):
        ctx = context.ctx()
        cl_db_obj = self.api.cluster_create(ctx, SAMPLE_CLUSTER)
        self.assertIsInstance(cl_db_obj, dict)

        for key, val in SAMPLE_CLUSTER.items():
            if key == 'node_groups':
                #this will be checked separately
                continue
            self.assertEqual(val, cl_db_obj.get(key), "Key not found %s" % key)

        for ng in cl_db_obj["node_groups"]:
            ng.pop("created_at")
            ng.pop("updated_at")
            ng.pop("id")
            self.assertEqual(ng.pop("cluster_id"), cl_db_obj["id"])
            ng.pop("image_id")
            self.assertEqual(ng.pop("instances"), [])
            ng.pop("node_configs")
            ng.pop("node_group_template_id")
            ng.pop("volume_mount_prefix")
            ng.pop("volumes_size")
            ng.pop("volumes_per_node")
            ng.pop("floating_ip_pool")
            ng.pop("image_username")
            ng.pop("tenant_id")

        self.assertEqual(SAMPLE_CLUSTER["node_groups"],
                         cl_db_obj["node_groups"])
Example #23
0
    def _set_cluster_info(self, cluster):
        mng = u.get_instances(cluster, 'manager')[0]
        nn = u.get_namenode(cluster)
        jt = u.get_jobtracker(cluster)
        oozie = u.get_oozie(cluster)

        #TODO(alazarev) make port configurable (bug #1262895)
        info = {'IDH Manager': {
            'Web UI': 'https://%s:9443' % mng.management_ip
        }}

        if jt:
            #TODO(alazarev) make port configurable (bug #1262895)
            info['MapReduce'] = {
                'Web UI': 'http://%s:50030' % jt.management_ip
            }
            #TODO(alazarev) make port configurable (bug #1262895)
            info['MapReduce']['JobTracker'] = '%s:54311' % jt.hostname()
        if nn:
            #TODO(alazarev) make port configurable (bug #1262895)
            info['HDFS'] = {
                'Web UI': 'http://%s:50070' % nn.management_ip
            }
            #TODO(alazarev) make port configurable (bug #1262895)
            info['HDFS']['NameNode'] = 'hdfs://%s:8020' % nn.hostname()

        if oozie:
            #TODO(alazarev) make port configurable (bug #1262895)
            info['JobFlow'] = {
                'Oozie': 'http://%s:11000' % oozie.management_ip
            }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
Example #24
0
    def test_node_group_add_from_template(self):
        ctx = context.ctx()

        # create cluster
        sample_copy = copy.deepcopy(test_clusters.SAMPLE_CLUSTER)
        cluster = self.api.cluster_create(ctx, sample_copy)

        # create node_group_template
        ng_tmpl = copy.deepcopy(test_templates.SAMPLE_NGT)
        ng_tmpl['volumes_size'] = 10
        ng_tmpl['node_configs']['service_1']['config_2'] = 'value_2'
        ng_tmpl = self.api.node_group_template_create(ctx, ng_tmpl)

        # add node group to cluster
        ng = copy.deepcopy(test_clusters.SAMPLE_CLUSTER['node_groups'][0])
        ng['node_group_template_id'] = ng_tmpl['id']
        ng['count'] = 5
        ng['name'] = 'ng_3'
        self.api.node_group_add(ctx, cluster['id'], ng)

        # refetch cluster
        cluster = self.api.cluster_get(ctx, cluster['id'])

        for node_group in cluster['node_groups']:
            if node_group['name'] == 'ng_3':
                self.assertEqual(['p1', 'p2'], node_group['node_processes'])
                self.assertEqual(10, node_group['volumes_size'])
                self.assertEqual(CORRECT_CONF, node_group['node_configs'])
                self.assertEqual(5, node_group['count'])
Example #25
0
def _scale_cluster(cluster, target_count):
    ctx = context.ctx()

    rollback_count = _get_ng_counts(cluster)

    launcher = _ScaleLauncher()

    try:
        launcher.launch_instances(ctx, cluster, target_count)
    except Exception as ex:
        LOG.warn("Can't scale cluster '%s' (reason: %s)", cluster.name, ex)
        with excutils.save_and_reraise_exception():
            cluster = conductor.cluster_get(ctx, cluster)

            try:
                _rollback_cluster_scaling(ctx, cluster, rollback_count, target_count)
            except Exception:
                # if something fails during the rollback, we stop
                # doing anything further
                cluster = conductor.cluster_update(ctx, cluster, {"status": "Error"})
                LOG.info(g.format_cluster_status(cluster))
                LOG.error("Unable to complete rollback, aborting")
                raise

            cluster = conductor.cluster_update(ctx, cluster, {"status": "Active"})
            LOG.info(g.format_cluster_status(cluster))
            LOG.warn("Rollback successful. Throwing off an initial exception.")
    finally:
        cluster = conductor.cluster_get(ctx, cluster)
        _clean_cluster_from_empty_ng(cluster)

    return launcher.inst_ids
Example #26
0
    def test_clt_fields(self):
        ctx = context.ctx()
        clt_db_obj_id = self.api.cluster_template_create(ctx, SAMPLE_CLT)['id']

        clt_db_obj = self.api.cluster_template_get(ctx, clt_db_obj_id)
        self.assertIsInstance(clt_db_obj, dict)

        for key, val in SAMPLE_CLT.items():
            if key == 'node_groups':
                #this will be checked separately
                continue
            self.assertEqual(val, clt_db_obj.get(key),
                             "Key not found %s" % key)

        for ng in clt_db_obj["node_groups"]:
            ng.pop("created_at")
            ng.pop("updated_at")
            ng.pop("id")
            self.assertEqual(ng.pop("cluster_template_id"), clt_db_obj_id)
            ng.pop("image_id")
            ng.pop("node_configs")
            ng.pop("node_group_template_id")
            ng.pop("volume_mount_prefix")
            ng.pop("volumes_size")
            ng.pop("volumes_per_node")

        self.assertListEqual(SAMPLE_CLT["node_groups"],
                             clt_db_obj["node_groups"])
Example #27
0
def _await_networks(instances):
    if not instances:
        return

    ips_assigned = set()
    while len(ips_assigned) != len(instances):
        if not _check_cluster_exists(instances[0].node_group.cluster):
            return
        for instance in instances:
            if instance.id not in ips_assigned:
                if networks.init_instances_ips(instance):
                    ips_assigned.add(instance.id)

        context.sleep(1)

    ctx = context.ctx()
    cluster = conductor.cluster_get(ctx, instances[0].node_group.cluster)
    instances = get_instances(cluster, ips_assigned)

    accessible_instances = set()
    while len(accessible_instances) != len(instances):
        if not _check_cluster_exists(instances[0].node_group.cluster):
            return
        for instance in instances:
            if instance.id not in accessible_instances:
                if _check_if_accessible(instance):
                    accessible_instances.add(instance.id)

        context.sleep(1)
Example #28
0
def execute_job(job_id, data):

    # Elements common to all job types
    cluster_id = data['cluster_id']
    configs = data.get('job_configs', {})

    ctx = context.current()
    cluster = conductor.cluster_get(ctx, cluster_id)
    plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name)
    instance = plugin.get_oozie_server(cluster)

    extra = {}
    info = None
    if CONF.use_namespaces and not CONF.use_floating_ips:
        info = instance.remote().get_neutron_info()
        extra['neutron'] = info

    # Not in Java job types but present for all others
    input_id = data.get('input_id', None)
    output_id = data.get('output_id', None)

    # Since we will use a unified class in the database, we pass
    # a superset for all job types
    job_ex_dict = {'input_id': input_id, 'output_id': output_id,
                   'job_id': job_id, 'cluster_id': cluster_id,
                   'info': {'status': 'Pending'}, 'job_configs': configs,
                   'extra': extra}
    job_execution = conductor.job_execution_create(context.ctx(), job_ex_dict)

    context.spawn("Starting Job Execution %s" % job_execution.id,
                  manager.run_job, job_execution)
    return job_execution
Example #29
0
def create_cluster(cluster):
    ctx = context.ctx()
    try:
        # create all instances
        conductor.cluster_update(ctx, cluster, {"status": "Spawning"})
        LOG.info(g.format_cluster_status(cluster))
        _create_instances(cluster)

        # wait for all instances are up and accessible
        cluster = conductor.cluster_update(ctx, cluster, {"status": "Waiting"})
        LOG.info(g.format_cluster_status(cluster))
        cluster = _await_instances(cluster)

        # attach volumes
        volumes.attach(cluster)

        # prepare all instances
        cluster = conductor.cluster_update(ctx, cluster,
                                           {"status": "Preparing"})
        LOG.info(g.format_cluster_status(cluster))

        _configure_instances(cluster)
    except Exception as ex:
        LOG.warn("Can't start cluster '%s' (reason: %s)", cluster.name, ex)
        with excutils.save_and_reraise_exception():
            cluster = conductor.cluster_update(ctx, cluster,
                                               {"status": "Error",
                                                "status_description": str(ex)})
            LOG.info(g.format_cluster_status(cluster))
            _rollback_cluster_creation(cluster, ex)
Example #30
0
    def _set_cluster_info(self, cluster, cluster_spec):
        info = {}
        for service in cluster_spec.services:
            if service.deployed:
                service.register_service_urls(cluster_spec, info)

        conductor.cluster_update(context.ctx(), cluster, {'info': info})
Example #31
0
def clean_cluster_from_empty_ng(cluster):
    ctx = context.ctx()
    for ng in cluster.node_groups:
        if ng.count == 0:
            conductor.node_group_remove(ctx, ng)

    return conductor.cluster_get(ctx, cluster)
Example #32
0
    def test_one_node_groups_and_no_affinity_group(self, novaclient):
        node_groups = [
            _make_ng_dict('test_group', 'test_flavor',
                          ['data node', 'task tracker'], 2)
        ]

        cluster = _create_cluster_mock(node_groups, [])
        nova = _create_nova_mock(novaclient)
        instances._create_instances(cluster)
        userdata = _generate_user_data_script(cluster)

        nova.servers.create.assert_has_calls([
            mock.call("test_cluster-test_group-001",
                      "initial",
                      "test_flavor",
                      scheduler_hints=None,
                      userdata=userdata,
                      key_name='user_keypair'),
            mock.call("test_cluster-test_group-002",
                      "initial",
                      "test_flavor",
                      scheduler_hints=None,
                      userdata=userdata,
                      key_name='user_keypair')
        ],
                                             any_order=False)

        ctx = context.ctx()
        cluster_obj = conductor.cluster_get_all(ctx)[0]
        self.assertEqual(len(cluster_obj.node_groups[0].instances), 2)
Example #33
0
    def convert(self, config, plugin_name, version, cluster_template_create):
        normalized_config = clusterspec.ClusterSpec(config).normalize()

        #TODO(jspeidel):  can we get the name (first arg) from somewhere?

        node_groups = []
        for ng in normalized_config.node_groups:
            node_group = {
                "name": ng.name,
                "flavor_id": ng.flavor,
                "node_processes": ng.node_processes,
                "count": ng.count
            }
            node_groups.append(node_group)

        cluster_configs = dict()
        for entry in normalized_config.cluster_configs:
            ci = entry.config
            # get the associated service dictionary
            target = entry.config.applicable_target
            service_dict = cluster_configs.get(target, {})
            service_dict[ci.name] = entry.value
            cluster_configs[target] = service_dict

        ctx = context.ctx()
        return cluster_template_create(ctx,
                                       {"name": uuidutils.generate_uuid(),
                                        "plugin_name": plugin_name,
                                        "hadoop_version": version,
                                        "node_groups": node_groups,
                                        "cluster_configs": cluster_configs})
Example #34
0
def _run_instance(cluster, node_group, idx, aa_groups, userdata):
    """Create instance using nova client and persist them into DB."""
    session = context.ctx().session
    name = '%s-%s-%03d' % (cluster.name, node_group.name, idx)

    # aa_groups: node process -> instance ids
    aa_ids = []
    for node_process in node_group.node_processes:
        aa_ids += aa_groups.get(node_process) or []

    # create instances only at hosts w/ no instances w/ aa-enabled processes
    hints = {'different_host': list(set(aa_ids))} if aa_ids else None

    context.model_save(node_group)

    nova_instance = nova.client().servers.create(
        name, node_group.get_image_id(), node_group.flavor_id,
        scheduler_hints=hints, userdata=userdata,
        key_name=cluster.user_keypair_id)

    with session.begin():
        instance = m.Instance(node_group.id, nova_instance.id, name)
        node_group.instances.append(instance)
        session.add(instance)

    # save instance id to aa_groups to support aa feature
    for node_process in node_group.node_processes:
        if node_process in cluster.anti_affinity:
            aa_group_ids = aa_groups.get(node_process, [])
            aa_group_ids.append(nova_instance.id)
            aa_groups[node_process] = aa_group_ids

    return instance
Example #35
0
    def _set_cluster_info(self, cluster):
        mng = u.get_instances(cluster, 'manager')[0]
        nn = u.get_namenode(cluster)
        jt = u.get_jobtracker(cluster)
        oozie = u.get_oozie(cluster)

        #TODO(alazarev) make port configurable (bug #1262895)
        info = {'IDH Manager': {
            'Web UI': 'https://%s:9443' % mng.management_ip
        }}

        if jt:
            #TODO(alazarev) make port configurable (bug #1262895)
            info['MapReduce'] = {
                'Web UI': 'http://%s:50030' % jt.management_ip
            }
            #TODO(alazarev) make port configurable (bug #1262895)
            info['MapReduce']['JobTracker'] = '%s:54311' % jt.hostname()
        if nn:
            #TODO(alazarev) make port configurable (bug #1262895)
            info['HDFS'] = {
                'Web UI': 'http://%s:50070' % nn.management_ip
            }
            #TODO(alazarev) make port configurable (bug #1262895)
            info['HDFS']['NameNode'] = 'hdfs://%s:8020' % nn.hostname()

        if oozie:
            #TODO(alazarev) make port configurable (bug #1262895)
            info['JobFlow'] = {
                'Oozie': 'http://%s:11000' % oozie.management_ip
            }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
Example #36
0
def _await_networks(cluster, instances):
    if not instances:
        return

    ips_assigned = set()
    while len(ips_assigned) != len(instances):
        if not g.check_cluster_exists(instances[0].node_group.cluster):
            return
        for instance in instances:
            if instance.id not in ips_assigned:
                if networks.init_instances_ips(instance):
                    ips_assigned.add(instance.id)

        context.sleep(1)

    LOG.info("Cluster '%s': all instances have IPs assigned" % cluster.id)

    ctx = context.ctx()
    cluster = conductor.cluster_get(ctx, instances[0].node_group.cluster)
    instances = _get_instances(cluster, ips_assigned)

    with context.ThreadGroup() as tg:
        for instance in instances:
            tg.spawn("wait-for-ssh-%s" % instance.instance_name,
                     _wait_until_accessible, instance)

    LOG.info("Cluster '%s': all instances are accessible" % cluster.id)
Example #37
0
def create_hadoop_ssh_keys(cluster):
    private_key, public_key = crypto.generate_key_pair()
    extra = {
        'hadoop_private_ssh_key': private_key,
        'hadoop_public_ssh_key': public_key
    }
    return conductor.cluster_update(context.ctx(), cluster, {'extra': extra})
Example #38
0
    def test_ip_assignment_use_no_floating(self, cfg, novaclient):

        cfg.CONF.use_floating_ips = False
        nova = _create_nova_mock(novaclient)

        node_groups = [_make_ng_dict("test_group_1", "test_flavor",
                                     ["data node", "test tracker"], 2, 'pool'),
                       _make_ng_dict("test_group_2", "test_flavor",
                                     ["name node", "test tracker"], 1)]

        ctx = context.ctx()
        cluster = _create_cluster_mock(node_groups, ["data node"])
        instances._create_instances(cluster)

        cluster = conductor.cluster_get(ctx, cluster)
        instances_list = instances._get_instances(cluster)

        instances._assign_floating_ips(instances_list)

        nova.floating_ips.create.assert_has_calls(
            [mock.call("pool"),
             mock.call("pool")],
            any_order=False
        )

        self.assertEqual(nova.floating_ips.create.call_count, 2,
                         "Not expected floating IPs number found.")
Example #39
0
    def _await_networks(self, cluster, instances):
        if not instances:
            return

        ips_assigned = set()
        while len(ips_assigned) != len(instances):
            if not g.check_cluster_exists(instances[0].node_group.cluster):
                return
            for instance in instances:
                if instance.id not in ips_assigned:
                    if networks.init_instances_ips(instance):
                        ips_assigned.add(instance.id)

            context.sleep(1)

        LOG.info("Cluster '%s': all instances have IPs assigned" % cluster.id)

        ctx = context.ctx()
        cluster = conductor.cluster_get(ctx, instances[0].node_group.cluster)
        instances = g.get_instances(cluster, ips_assigned)

        with context.ThreadGroup() as tg:
            for instance in instances:
                tg.spawn("wait-for-ssh-%s" % instance.instance_name,
                         self._wait_until_accessible, instance)

        LOG.info("Cluster '%s': all instances are accessible" % cluster.id)
Example #40
0
    def _set_cluster_info(self, cluster):
        nn = utils.get_namenode(cluster)
        jt = utils.get_jobtracker(cluster)
        oozie = utils.get_oozie(cluster)
        info = {}

        if jt:
            address = c_helper.get_config_value(
                'MapReduce', 'mapred.job.tracker.http.address', cluster)
            port = address[address.rfind(':') + 1:]
            info['MapReduce'] = {
                'Web UI': 'http://%s:%s' % (jt.management_ip, port)
            }

        if nn:
            address = c_helper.get_config_value('HDFS', 'dfs.http.address',
                                                cluster)
            port = address[address.rfind(':') + 1:]
            info['HDFS'] = {
                'Web UI': 'http://%s:%s' % (nn.management_ip, port)
            }

        if oozie:
            info['JobFlow'] = {
                'Oozie': 'http://%s:11000' % oozie.management_ip
            }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
Example #41
0
def create_hadoop_ssh_keys(cluster):
    private_key, public_key = crypto.generate_key_pair()
    extra = {
        'hadoop_private_ssh_key': private_key,
        'hadoop_public_ssh_key': public_key
    }
    return conductor.cluster_update(context.ctx(), cluster, {'extra': extra})
Example #42
0
    def test_one_node_groups_and_no_affinity_group(self, novaclient):
        node_groups = [m.NodeGroup("test_group", "test_flavor",
                                   ["data node", "test tracker"], 2)]
        node_groups[0]._username = "******"
        cluster = _create_cluster_mock(node_groups, [])
        nova = _create_nova_mock(novaclient)
        instances._create_instances(cluster)
        userdata = _generate_user_data_script(cluster)

        nova.servers.create.assert_has_calls(
            [mock.call("test_cluster-test_group-001",
                       "initial",
                       "test_flavor",
                       scheduler_hints=None,
                       userdata=userdata,
                       key_name='user_keypair'),
             mock.call("test_cluster-test_group-002",
                       "initial",
                       "test_flavor",
                       scheduler_hints=None,
                       userdata=userdata,
                       key_name='user_keypair')],
            any_order=False)

        session = ctx.ctx().session
        with session.begin():
            self.assertEqual(session.query(m.Instance).count(), 2)
Example #43
0
    def convert(self, config, plugin_name, version, cluster_template_create):
        normalized_config = clusterspec.ClusterSpec(config).normalize()

        #TODO(jspeidel):  can we get the name (first arg) from somewhere?

        node_groups = []
        for ng in normalized_config.node_groups:
            node_group = {
                "name": ng.name,
                "flavor_id": ng.flavor,
                "node_processes": ng.node_processes,
                "count": ng.count
            }
            node_groups.append(node_group)

        cluster_configs = dict()
        for entry in normalized_config.cluster_configs:
            ci = entry.config
            # get the associated service dictionary
            target = entry.config.applicable_target
            service_dict = cluster_configs.get(target, {})
            service_dict[ci.name] = entry.value
            cluster_configs[target] = service_dict

        ctx = context.ctx()
        return cluster_template_create(
            ctx, {
                "name": uuidutils.generate_uuid(),
                "plugin_name": plugin_name,
                "hadoop_version": version,
                "node_groups": node_groups,
                "cluster_configs": cluster_configs
            })
Example #44
0
    def test_remove_instance(self):
        ctx = context.ctx()
        cluster_db_obj = self.api.cluster_create(ctx, SAMPLE_CLUSTER)
        _id = cluster_db_obj["id"]

        ng_id = cluster_db_obj["node_groups"][-1]["id"]
        count = cluster_db_obj["node_groups"][-1]["count"]

        instance_id = self._add_instance(ctx, ng_id)

        cluster_db_obj = self.api.cluster_get(ctx, _id)
        for ng in cluster_db_obj["node_groups"]:
            if ng["id"] != ng_id:
                continue

            self.assertEqual(count + 1, ng["count"])

        self.api.instance_remove(ctx, instance_id)

        cluster_db_obj = self.api.cluster_get(ctx, _id)
        for ng in cluster_db_obj["node_groups"]:
            if ng["id"] != ng_id:
                continue

            self.assertEqual(count, ng["count"])

        with self.assertRaises(ex.NotFoundException):
            self.api.instance_remove(ctx, instance_id)
Example #45
0
    def test_cluster_create_from_templates(self):
        ctx = context.ctx()

        # create node_group_template
        ng_tmpl = copy.deepcopy(test_templates.SAMPLE_NGT)
        ng_tmpl['volumes_size'] = '10'
        ng_tmpl['node_configs']['service_1']['config_2'] = 'value_2'
        ng_tmpl = self.api.node_group_template_create(ctx, ng_tmpl)

        # create cluster template
        cl_tmpl = self.api.cluster_template_create(ctx,
                                                   test_templates.SAMPLE_CLT)

        # create cluster
        cluster_val = copy.deepcopy(test_clusters.SAMPLE_CLUSTER)
        cluster_val['cluster_template_id'] = cl_tmpl['id']
        cluster_val['node_groups'][0]['node_group_template_id'] = ng_tmpl['id']
        cluster = self.api.cluster_create(ctx, cluster_val)
        self.assertEqual(CORRECT_CONF, cluster['cluster_configs'])

        for node_group in cluster['node_groups']:
            if node_group['name'] == 'ng_1':
                self.assertEqual(['p1', 'p2'], node_group['node_processes'])
                self.assertEqual(10, node_group['volumes_size'])
                self.assertEqual(CORRECT_CONF, node_group['node_configs'])
Example #46
0
    def _shutdown_instance(self, instance):
        ctx = context.ctx()

        if instance.node_group.floating_ip_pool:
            try:
                networks.delete_floating_ip(instance.instance_id)
            except nova_exceptions.NotFound:
                LOG.warn(
                    "Attempted to delete non-existent floating IP in "
                    "pool %s from instancie %s",
                    instance.node_group.floating_ip_pool, instance.instance_id)

        try:
            volumes.detach_from_instance(instance)
        except Exception:
            LOG.warn("Detaching volumes from instance %s failed",
                     instance.instance_id)

        try:
            nova.client().servers.delete(instance.instance_id)
        except nova_exceptions.NotFound:
            LOG.warn("Attempted to delete non-existent instance %s",
                     instance.instance_id)

        conductor.instance_remove(ctx, instance)
Example #47
0
    def test_node_group_add_from_template(self):
        ctx = context.ctx()

        # create cluster
        cluster = self.api.cluster_create(ctx, test_clusters.SAMPLE_CLUSTER)

        # create node_group_template
        ng_tmpl = copy.deepcopy(test_templates.SAMPLE_NGT)
        ng_tmpl['volumes_size'] = '10'
        ng_tmpl['node_configs']['service_1']['config_2'] = 'value_2'
        ng_tmpl = self.api.node_group_template_create(ctx, ng_tmpl)

        # add node group to cluster
        ng = test_clusters.SAMPLE_CLUSTER['node_groups'][0].copy()
        ng['node_group_template_id'] = ng_tmpl['id']
        ng['count'] = 5
        ng['name'] = 'ng_3'
        self.api.node_group_add(ctx, cluster['id'], ng)

        # refetch cluster
        cluster = self.api.cluster_get(ctx, cluster['id'])

        for node_group in cluster['node_groups']:
            if node_group['name'] == 'ng_3':
                self.assertEqual(['p1', 'p2'], node_group['node_processes'])
                self.assertEqual(10, node_group['volumes_size'])
                self.assertEqual(CORRECT_CONF, node_group['node_configs'])
                self.assertEqual(5, node_group['count'])
Example #48
0
    def test_cluster_create_from_templates(self):
        ctx = context.ctx()

        # create node_group_template
        ng_tmpl = copy.deepcopy(test_templates.SAMPLE_NGT)
        ng_tmpl['volumes_size'] = 10
        ng_tmpl['node_configs']['service_1']['config_2'] = 'value_2'
        ng_tmpl = self.api.node_group_template_create(ctx, ng_tmpl)

        # create cluster template
        cl_tmpl = self.api.cluster_template_create(ctx,
                                                   test_templates.SAMPLE_CLT)

        # create cluster
        cluster_val = copy.deepcopy(test_clusters.SAMPLE_CLUSTER)
        cluster_val['cluster_template_id'] = cl_tmpl['id']
        cluster_val['node_groups'][0]['node_group_template_id'] = ng_tmpl['id']
        cluster = self.api.cluster_create(ctx, cluster_val)
        self.assertEqual(CORRECT_CONF, cluster['cluster_configs'])

        for node_group in cluster['node_groups']:
            if node_group['name'] == 'ng_1':
                self.assertEqual(['p1', 'p2'], node_group['node_processes'])
                self.assertEqual(10, node_group['volumes_size'])
                self.assertEqual(CORRECT_CONF, node_group['node_configs'])
Example #49
0
    def test_crud_operation_create_list_delete_update(self):
        ctx = context.ctx()
        job = self.api.job_create(ctx, SAMPLE_JOB)
        ds_input = self.api.data_source_create(ctx, SAMPLE_DATA_SOURCE)
        SAMPLE_DATA_OUTPUT = copy.copy(SAMPLE_DATA_SOURCE)
        SAMPLE_DATA_OUTPUT['name'] = 'output'
        ds_output = self.api.data_source_create(ctx, SAMPLE_DATA_OUTPUT)

        SAMPLE_JOB_EXECUTION['job_id'] = job['id']
        SAMPLE_JOB_EXECUTION['input_id'] = ds_input['id']
        SAMPLE_JOB_EXECUTION['output_id'] = ds_output['id']

        self.api.job_execution_create(ctx, SAMPLE_JOB_EXECUTION)

        lst = self.api.job_execution_get_all(ctx)
        self.assertEqual(len(lst), 1)

        job_ex_id = lst[0]['id']

        self.assertEqual(lst[0]['progress'], 0.1)
        self.api.job_execution_update(ctx, job_ex_id, {'progress': '0.2'})
        updated_job = self.api.job_execution_get(ctx, job_ex_id)
        self.assertEqual(updated_job['progress'], 0.2)

        self.api.job_execution_destroy(ctx, job_ex_id)

        lst = self.api.job_execution_get_all(ctx)
        self.assertEqual(len(lst), 0)
Example #50
0
def _provision_nodes(id, node_group_id_map):
    ctx = context.ctx()
    cluster = conductor.cluster_get(ctx, id)
    plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name)

    cluster = conductor.cluster_update(ctx, cluster, {"status": "Scaling"})
    LOG.info(g.format_cluster_status(cluster))
    instances = i.scale_cluster(cluster, node_group_id_map, plugin)

    if instances:
        cluster = conductor.cluster_update(ctx, cluster,
                                           {"status": "Configuring"})
        LOG.info(g.format_cluster_status(cluster))
        try:
            plugin.scale_cluster(cluster, i.get_instances(cluster, instances))
        except Exception as ex:
            LOG.exception("Can't scale cluster '%s' (reason: %s)",
                          cluster.name, ex)
            conductor.cluster_update(ctx, cluster, {"status": "Error"})
            LOG.info(g.format_cluster_status(cluster))
            return

    # cluster is now up and ready
    cluster = conductor.cluster_update(ctx, cluster, {"status": "Active"})
    LOG.info(g.format_cluster_status(cluster))
Example #51
0
    def _set_cluster_info(self, cluster):
        nn = utils.get_namenode(cluster)
        jt = utils.get_jobtracker(cluster)
        oozie = utils.get_oozie(cluster)
        info = {}

        if jt:
            address = c_helper.get_config_value(
                'MapReduce', 'mapred.job.tracker.http.address', cluster)
            port = address[address.rfind(':') + 1:]
            info['MapReduce'] = {
                'Web UI': 'http://%s:%s' % (jt.management_ip, port)
            }
            #TODO(aignatov) change from hardcode value
            info['MapReduce']['JobTracker'] = '%s:8021' % jt.hostname()

        if nn:
            address = c_helper.get_config_value(
                'HDFS', 'dfs.http.address', cluster)
            port = address[address.rfind(':') + 1:]
            info['HDFS'] = {
                'Web UI': 'http://%s:%s' % (nn.management_ip, port)
            }
            #TODO(aignatov) change from hardcode value
            info['HDFS']['NameNode'] = 'hdfs://%s:8020' % nn.hostname()

        if oozie:
            info['JobFlow'] = {
                'Oozie': 'http://%s:11000' % oozie.management_ip
            }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
Example #52
0
    def test_one_node_groups_and_no_affinity_group(self, novaclient):
        node_groups = [_make_ng_dict("test_group", "test_flavor", ["data node", "task tracker"], 2)]

        cluster = _create_cluster_mock(node_groups, [])
        nova = _create_nova_mock(novaclient)
        instances._create_instances(cluster)
        userdata = _generate_user_data_script(cluster)

        nova.servers.create.assert_has_calls(
            [
                mock.call(
                    "test_cluster-test_group-001",
                    "initial",
                    "test_flavor",
                    scheduler_hints=None,
                    userdata=userdata,
                    key_name="user_keypair",
                ),
                mock.call(
                    "test_cluster-test_group-002",
                    "initial",
                    "test_flavor",
                    scheduler_hints=None,
                    userdata=userdata,
                    key_name="user_keypair",
                ),
            ],
            any_order=False,
        )

        ctx = context.ctx()
        cluster_obj = conductor.cluster_get_all(ctx)[0]
        self.assertEqual(len(cluster_obj.node_groups[0].instances), 2)
Example #53
0
def _run_instance(cluster, node_group, idx, aa_groups, userdata):
    """Create instance using nova client and persist them into DB."""
    ctx = context.ctx()
    name = '%s-%s-%03d' % (cluster.name, node_group.name, idx)

    # aa_groups: node process -> instance ids
    aa_ids = []
    for node_process in node_group.node_processes:
        aa_ids += aa_groups.get(node_process) or []

    # create instances only at hosts w/ no instances w/ aa-enabled processes
    hints = {'different_host': list(set(aa_ids))} if aa_ids else None

    nova_instance = nova.client().servers.create(
        name,
        node_group.get_image_id(),
        node_group.flavor_id,
        scheduler_hints=hints,
        userdata=userdata,
        key_name=cluster.user_keypair_id)

    instance_id = conductor.instance_add(ctx, node_group, {
        "instance_id": nova_instance.id,
        "instance_name": name
    })
    # save instance id to aa_groups to support aa feature
    for node_process in node_group.node_processes:
        if node_process in cluster.anti_affinity:
            aa_group_ids = aa_groups.get(node_process, [])
            aa_group_ids.append(nova_instance.id)
            aa_groups[node_process] = aa_group_ids

    return instance_id
Example #54
0
def init_instances_ips(instance, server):
    """Extracts internal and management ips.

    As internal ip will be used the first ip from the nova networks CIDRs.
    If use_floating_ip flag is set than management ip will be the first
    non-internal ip.
    """
    ctx = context.ctx()

    if instance.internal_ip and instance.management_ip:
        return True

    management_ip = instance.management_ip
    internal_ip = instance.internal_ip

    for network_label in server.networks:
        nova_network = nova.client().networks.find(label=network_label)
        network = netaddr.IPNetwork(nova_network.cidr)
        for ip in server.networks[network_label]:
            if netaddr.IPAddress(ip) in network:
                internal_ip = instance.internal_ip or ip
            else:
                management_ip = instance.management_ip or ip

    if not CONF.use_floating_ips:
        management_ip = internal_ip

    conductor.instance_update(ctx, instance, {"management_ip": management_ip,
                                              "internal_ip": internal_ip})

    return internal_ip and management_ip
Example #55
0
def create_cluster(cluster):
    ctx = context.ctx()
    try:
        # create all instances
        conductor.cluster_update(ctx, cluster, {"status": "Spawning"})
        LOG.info(g.format_cluster_status(cluster))
        _create_instances(cluster)

        # wait for all instances are up and accessible
        cluster = conductor.cluster_update(ctx, cluster, {"status": "Waiting"})
        LOG.info(g.format_cluster_status(cluster))
        cluster = _await_instances(cluster)

        # attach volumes
        volumes.attach(cluster)

        # prepare all instances
        cluster = conductor.cluster_update(ctx, cluster,
                                           {"status": "Preparing"})
        LOG.info(g.format_cluster_status(cluster))

        _configure_instances(cluster)
    except Exception as ex:
        LOG.warn("Can't start cluster '%s' (reason: %s)", cluster.name, ex)
        with excutils.save_and_reraise_exception():
            cluster = conductor.cluster_update(ctx, cluster, {
                "status": "Error",
                "status_description": str(ex)
            })
            LOG.info(g.format_cluster_status(cluster))
            _rollback_cluster_creation(cluster, ex)
Example #56
0
    def test_cluster_fields(self):
        ctx = context.ctx()
        cl_db_obj = self.api.cluster_create(ctx, SAMPLE_CLUSTER)
        self.assertIsInstance(cl_db_obj, dict)

        for key, val in SAMPLE_CLUSTER.items():
            if key == 'node_groups':
                #this will be checked separately
                continue
            self.assertEqual(val, cl_db_obj.get(key),
                             "Key not found %s" % key)

        for ng in cl_db_obj["node_groups"]:
            ng.pop("created_at")
            ng.pop("updated_at")
            ng.pop("id")
            self.assertEqual(ng.pop("cluster_id"), cl_db_obj["id"])
            ng.pop("image_id")
            self.assertEqual(ng.pop("instances"), [])
            ng.pop("node_configs")
            ng.pop("node_group_template_id")
            ng.pop("volume_mount_prefix")
            ng.pop("volumes_size")
            ng.pop("volumes_per_node")

        self.assertListEqual(SAMPLE_CLUSTER["node_groups"],
                             cl_db_obj["node_groups"])
Example #57
0
 def test_cluster_terminate(self, terminate_cluster, get_job_status):
     CONF.use_identity_api_v3 = True
     ctx = context.ctx()
     job = self.api.job_create(ctx, te.SAMPLE_JOB)
     ds = self.api.data_source_create(ctx, te.SAMPLE_DATA_SOURCE)
     c = tc.SAMPLE_CLUSTER.copy()
     c["status"] = "Active"
     c["id"] = "1"
     c["name"] = "1"
     self.api.cluster_create(ctx, c)
     c["id"] = "2"
     c["name"] = "2"
     self.api.cluster_create(ctx, c)
     self._create_job_execution({"end_time": datetime.datetime.now(),
                                 "id": 1,
                                 "cluster_id": "1"},
                                job, ds, ds)
     self._create_job_execution({"end_time": None,
                                 "id": 2,
                                 "cluster_id": "2"},
                                job, ds, ds)
     self._create_job_execution({"end_time": None,
                                 "id": 3,
                                 "cluster_id": "2"},
                                job, ds, ds)
     p.SavannaPeriodicTasks().terminate_unneeded_clusters(None)
     self.assertEqual(1, len(terminate_cluster.call_args_list))
     terminated_cluster_id = terminate_cluster.call_args_list[0][0][0]
     self.assertEqual('1', terminated_cluster_id)