Beispiel #1
0
    def test_is_data_locality_enabled(self, get_config_value):
        self.CONF.enable_data_locality = False
        enabled = c_helper.is_data_locality_enabled(self.pctx, self.cluster)
        self.assertEqual(enabled, False)

        self.CONF.enable_data_locality = True
        target = c_helper.ENABLE_DATA_LOCALITY.applicable_target
        name = c_helper.ENABLE_DATA_LOCALITY.name
        c_helper.is_data_locality_enabled(self.pctx, self.cluster)
        get_config_value.assert_called_once_with(self.pctx, target,
                                                 name, self.cluster)
Beispiel #2
0
def _post_configuration(pctx, instance):
    dirs = _get_hadoop_dirs(instance)
    args = {
        "hadoop_user": HADOOP_USER,
        "hadoop_group": HADOOP_GROUP,
        "hadoop_conf_dir": HADOOP_CONF_DIR,
        "oozie_conf_dir": OOZIE_CONF_DIR,
        "hadoop_name_dirs": " ".join(dirs["hadoop_name_dirs"]),
        "hadoop_data_dirs": " ".join(dirs["hadoop_data_dirs"]),
        "hadoop_log_dir": dirs["hadoop_log_dir"],
        "hadoop_secure_dn_log_dir": dirs["hadoop_secure_dn_log_dir"],
        "yarn_log_dir": dirs["yarn_log_dir"],
    }
    post_conf_script = f.get_file_text("plugins/vanilla/hadoop2/resources/post_conf.template")
    post_conf_script = post_conf_script.format(**args)

    with instance.remote() as r:
        r.write_file_to("/tmp/post_conf.sh", post_conf_script)
        r.execute_command("chmod +x /tmp/post_conf.sh")
        r.execute_command("sudo /tmp/post_conf.sh")

        if c_helper.is_data_locality_enabled(pctx, instance.cluster):
            t_script = HADOOP_CONF_DIR + "/topology.sh"
            r.write_file_to(
                t_script, f.get_file_text("plugins/vanilla/hadoop2/resources/topology.sh"), run_as_root=True
            )
            r.execute_command("chmod +x " + t_script, run_as_root=True)
Beispiel #3
0
def _post_configuration(pctx, instance):
    node_group = instance.node_group
    dirs = _get_hadoop_dirs(node_group)
    args = {
        'hadoop_user': HADOOP_USER,
        'hadoop_group': HADOOP_GROUP,
        'hadoop_conf_dir': HADOOP_CONF_DIR,
        'oozie_conf_dir': OOZIE_CONF_DIR,
        'hadoop_name_dirs': " ".join(dirs['hadoop_name_dirs']),
        'hadoop_data_dirs': " ".join(dirs['hadoop_data_dirs']),
        'hadoop_log_dir': dirs['hadoop_log_dir'],
        'hadoop_secure_dn_log_dir': dirs['hadoop_secure_dn_log_dir'],
        'yarn_log_dir': dirs['yarn_log_dir']
    }
    post_conf_script = f.get_file_text(
        'plugins/vanilla/hadoop2/resources/post_conf.template')
    post_conf_script = post_conf_script.format(**args)

    with instance.remote() as r:
        r.write_file_to('/tmp/post_conf.sh', post_conf_script)
        r.execute_command('chmod +x /tmp/post_conf.sh')
        r.execute_command('sudo /tmp/post_conf.sh')

        if c_helper.is_data_locality_enabled(pctx,
                                             instance.node_group.cluster):
            t_script = HADOOP_CONF_DIR + '/topology.sh'
            r.write_file_to(t_script, f.get_file_text(
                            'plugins/vanilla/hadoop2/resources/topology.sh'),
                            run_as_root=True)
            r.execute_command('chmod +x ' + t_script, run_as_root=True)
Beispiel #4
0
def _post_configuration(pctx, instance):
    dirs = _get_hadoop_dirs(instance)
    args = {
        'hadoop_user': HADOOP_USER,
        'hadoop_group': HADOOP_GROUP,
        'hadoop_conf_dir': HADOOP_CONF_DIR,
        'oozie_conf_dir': OOZIE_CONF_DIR,
        'hadoop_name_dirs': " ".join(dirs['hadoop_name_dirs']),
        'hadoop_data_dirs': " ".join(dirs['hadoop_data_dirs']),
        'hadoop_log_dir': dirs['hadoop_log_dir'],
        'hadoop_secure_dn_log_dir': dirs['hadoop_secure_dn_log_dir'],
        'yarn_log_dir': dirs['yarn_log_dir']
    }
    post_conf_script = f.get_file_text(
        'plugins/vanilla/hadoop2/resources/post_conf.template')
    post_conf_script = post_conf_script.format(**args)

    with instance.remote() as r:
        r.write_file_to('/tmp/post_conf.sh', post_conf_script)
        r.execute_command('chmod +x /tmp/post_conf.sh')
        r.execute_command('sudo /tmp/post_conf.sh')

        if c_helper.is_data_locality_enabled(pctx,
                                             instance.cluster):
            t_script = HADOOP_CONF_DIR + '/topology.sh'
            r.write_file_to(t_script, f.get_file_text(
                            'plugins/vanilla/hadoop2/resources/topology.sh'),
                            run_as_root=True)
            r.execute_command('chmod +x ' + t_script, run_as_root=True)
Beispiel #5
0
def configure_topology_data(pctx, cluster):
    if c_helper.is_data_locality_enabled(pctx, cluster):
        LOG.info(_LI("Node group awareness is not implemented in YARN yet "
                     "so enable_hypervisor_awareness set to False explicitly"))
        tpl_map = th.generate_topology_map(cluster, is_node_awareness=False)
        topology_data = "\n".join(
            [k + " " + v for k, v in tpl_map.items()]) + "\n"
        for ng in cluster.node_groups:
            for i in ng.instances:
                i.remote().write_file_to(HADOOP_CONF_DIR + "/topology.data",
                                         topology_data, run_as_root=True)
Beispiel #6
0
def configure_topology_data(pctx, cluster):
    if c_helper.is_data_locality_enabled(pctx, cluster):
        LOG.info(_LI("Node group awareness is not implemented in YARN yet "
                     "so enable_hypervisor_awareness set to False explicitly"))
        tpl_map = th.generate_topology_map(cluster, is_node_awareness=False)
        topology_data = "\n".join(
            [k + " " + v for k, v in tpl_map.items()]) + "\n"
        for ng in cluster.node_groups:
            for i in ng.instances:
                i.remote().write_file_to(HADOOP_CONF_DIR + "/topology.data",
                                         topology_data, run_as_root=True)
Beispiel #7
0
def _get_hadoop_configs(pctx, instance):
    cluster = instance.node_group.cluster
    nn_hostname = vu.get_instance_hostname(vu.get_namenode(cluster))
    dirs = _get_hadoop_dirs(instance)
    confs = {
        "Hadoop": {"fs.defaultFS": "hdfs://%s:9000" % nn_hostname},
        "HDFS": {
            "dfs.namenode.name.dir": ",".join(dirs["hadoop_name_dirs"]),
            "dfs.datanode.data.dir": ",".join(dirs["hadoop_data_dirs"]),
            "dfs.hosts": "%s/dn-include" % HADOOP_CONF_DIR,
            "dfs.hosts.exclude": "%s/dn-exclude" % HADOOP_CONF_DIR,
        },
    }

    res_hostname = vu.get_instance_hostname(vu.get_resourcemanager(cluster))
    if res_hostname:
        confs["YARN"] = {
            "yarn.nodemanager.aux-services": "mapreduce_shuffle",
            "yarn.resourcemanager.hostname": "%s" % res_hostname,
            "yarn.resourcemanager.nodes.include-path": "%s/nm-include" % (HADOOP_CONF_DIR),
            "yarn.resourcemanager.nodes.exclude-path": "%s/nm-exclude" % (HADOOP_CONF_DIR),
        }
        confs["MapReduce"] = {"mapreduce.framework.name": "yarn"}
        hs_hostname = vu.get_instance_hostname(vu.get_historyserver(cluster))
        if hs_hostname:
            confs["MapReduce"]["mapreduce.jobhistory.address"] = "%s:10020" % hs_hostname

    oozie = vu.get_oozie(cluster)
    if oozie:
        hadoop_cfg = {"hadoop.proxyuser.hadoop.hosts": "*", "hadoop.proxyuser.hadoop.groups": "hadoop"}
        confs["Hadoop"].update(hadoop_cfg)

        oozie_cfg = o_helper.get_oozie_required_xml_configs(HADOOP_CONF_DIR)
        if c_helper.is_mysql_enabled(pctx, cluster):
            oozie_cfg.update(o_helper.get_oozie_mysql_configs())

        confs["JobFlow"] = oozie_cfg

    if c_helper.is_swift_enabled(pctx, cluster):
        swift_configs = {}
        for config in swift.get_swift_configs():
            swift_configs[config["name"]] = config["value"]

        confs["Hadoop"].update(swift_configs)

    if c_helper.is_data_locality_enabled(pctx, cluster):
        confs["Hadoop"].update(th.TOPOLOGY_CONFIG)
        confs["Hadoop"].update({"topology.script.file.name": HADOOP_CONF_DIR + "/topology.sh"})

    hive_hostname = vu.get_instance_hostname(vu.get_hiveserver(cluster))
    if hive_hostname:
        hive_cfg = {
            "hive.warehouse.subdir.inherit.perms": True,
            "javax.jdo.option.ConnectionURL": "jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true",
        }

        if c_helper.is_mysql_enabled(pctx, cluster):
            hive_cfg.update(
                {
                    "javax.jdo.option.ConnectionURL": "jdbc:mysql://%s/metastore" % hive_hostname,
                    "javax.jdo.option.ConnectionDriverName": "com.mysql.jdbc.Driver",
                    "javax.jdo.option.ConnectionUserName": "******",
                    "javax.jdo.option.ConnectionPassword": "******",
                    "datanucleus.autoCreateSchema": "false",
                    "datanucleus.fixedDatastore": "true",
                    "hive.metastore.uris": "thrift://%s:9083" % hive_hostname,
                }
            )

        proxy_configs = cluster.cluster_configs.get("proxy_configs")
        if proxy_configs and c_helper.is_swift_enabled(pctx, cluster):
            key = key_manager.API().get(context.current(), proxy_configs["proxy_password"])
            password = key.get_encoded()
            hive_cfg.update(
                {
                    swift.HADOOP_SWIFT_USERNAME: proxy_configs["proxy_username"],
                    swift.HADOOP_SWIFT_PASSWORD: password,
                    swift.HADOOP_SWIFT_TRUST_ID: proxy_configs["proxy_trust_id"],
                    swift.HADOOP_SWIFT_DOMAIN_NAME: CONF.proxy_user_domain_name,
                }
            )

        confs["Hive"] = hive_cfg

    return confs
Beispiel #8
0
def _get_hadoop_configs(pctx, node_group):
    cluster = node_group.cluster
    nn_hostname = vu.get_instance_hostname(vu.get_namenode(cluster))
    dirs = _get_hadoop_dirs(node_group)
    confs = {
        'Hadoop': {
            'fs.defaultFS': 'hdfs://%s:9000' % nn_hostname
        },
        'HDFS': {
            'dfs.namenode.name.dir': ','.join(dirs['hadoop_name_dirs']),
            'dfs.namenode.data.dir': ','.join(dirs['hadoop_data_dirs']),
            'dfs.hosts': '%s/dn-include' % HADOOP_CONF_DIR,
            'dfs.hosts.exclude': '%s/dn-exclude' % HADOOP_CONF_DIR
        }
    }

    res_hostname = vu.get_instance_hostname(vu.get_resourcemanager(cluster))
    if res_hostname:
        confs['YARN'] = {
            'yarn.nodemanager.aux-services': 'mapreduce_shuffle',
            'yarn.resourcemanager.hostname': '%s' % res_hostname,
            'yarn.resourcemanager.nodes.include-path': '%s/nm-include' % (
                HADOOP_CONF_DIR),
            'yarn.resourcemanager.nodes.exclude-path': '%s/nm-exclude' % (
                HADOOP_CONF_DIR)
        }
        confs['MapReduce'] = {
            'mapreduce.framework.name': 'yarn'
        }
        hs_hostname = vu.get_instance_hostname(vu.get_historyserver(cluster))
        if hs_hostname:
            confs['MapReduce']['mapreduce.jobhistory.address'] = (
                "%s:10020" % hs_hostname)

    oozie = vu.get_oozie(cluster)
    if oozie:
        hadoop_cfg = {
            'hadoop.proxyuser.hadoop.hosts': '*',
            'hadoop.proxyuser.hadoop.groups': 'hadoop'
        }
        confs['Hadoop'].update(hadoop_cfg)

        oozie_cfg = o_helper.get_oozie_required_xml_configs(HADOOP_CONF_DIR)
        if c_helper.is_mysql_enabled(pctx, cluster):
            oozie_cfg.update(o_helper.get_oozie_mysql_configs())

        confs['JobFlow'] = oozie_cfg

    if c_helper.is_swift_enabled(pctx, cluster):
        swift_configs = {}
        for config in swift.get_swift_configs():
            swift_configs[config['name']] = config['value']

        confs['Hadoop'].update(swift_configs)

    if c_helper.is_data_locality_enabled(pctx, cluster):
        confs['Hadoop'].update(th.TOPOLOGY_CONFIG)
        confs['Hadoop'].update({"topology.script.file.name":
                                HADOOP_CONF_DIR + "/topology.sh"})

    hive_hostname = vu.get_instance_hostname(vu.get_hiveserver(cluster))
    if hive_hostname:
        hive_cfg = {
            'hive.warehouse.subdir.inherit.perms': True,
            'javax.jdo.option.ConnectionURL':
            'jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true'
        }

        if c_helper.is_mysql_enabled(pctx, cluster):
            hive_cfg.update({
                'javax.jdo.option.ConnectionURL':
                'jdbc:mysql://%s/metastore' % hive_hostname,
                'javax.jdo.option.ConnectionDriverName':
                'com.mysql.jdbc.Driver',
                'javax.jdo.option.ConnectionUserName': '******',
                'javax.jdo.option.ConnectionPassword': '******',
                'datanucleus.autoCreateSchema': 'false',
                'datanucleus.fixedDatastore': 'true',
                'hive.metastore.uris': 'thrift://%s:9083' % hive_hostname,
            })

        proxy_configs = cluster.cluster_configs.get('proxy_configs')
        if proxy_configs and c_helper.is_swift_enabled(pctx, cluster):
            hive_cfg.update({
                swift.HADOOP_SWIFT_USERNAME: proxy_configs['proxy_username'],
                swift.HADOOP_SWIFT_PASSWORD: proxy_configs['proxy_password'],
                swift.HADOOP_SWIFT_TRUST_ID: proxy_configs['proxy_trust_id'],
                swift.HADOOP_SWIFT_DOMAIN_NAME: CONF.proxy_user_domain_name
            })

        confs['Hive'] = hive_cfg

    return confs
Beispiel #9
0
def _get_hadoop_configs(pctx, instance):
    cluster = instance.node_group.cluster
    nn_hostname = vu.get_instance_hostname(vu.get_namenode(cluster))
    dirs = _get_hadoop_dirs(instance)
    confs = {
        'Hadoop': {
            'fs.defaultFS': 'hdfs://%s:9000' % nn_hostname
        },
        'HDFS': {
            'dfs.namenode.name.dir': ','.join(dirs['hadoop_name_dirs']),
            'dfs.datanode.data.dir': ','.join(dirs['hadoop_data_dirs']),
            'dfs.hosts': '%s/dn-include' % HADOOP_CONF_DIR,
            'dfs.hosts.exclude': '%s/dn-exclude' % HADOOP_CONF_DIR
        }
    }

    res_hostname = vu.get_instance_hostname(vu.get_resourcemanager(cluster))
    if res_hostname:
        confs['YARN'] = {
            'yarn.nodemanager.aux-services': 'mapreduce_shuffle',
            'yarn.resourcemanager.hostname': '%s' % res_hostname,
            'yarn.resourcemanager.nodes.include-path': '%s/nm-include' % (
                HADOOP_CONF_DIR),
            'yarn.resourcemanager.nodes.exclude-path': '%s/nm-exclude' % (
                HADOOP_CONF_DIR)
        }
        confs['MapReduce'] = {
            'mapreduce.framework.name': 'yarn'
        }
        hs_hostname = vu.get_instance_hostname(vu.get_historyserver(cluster))
        if hs_hostname:
            confs['MapReduce']['mapreduce.jobhistory.address'] = (
                "%s:10020" % hs_hostname)

    oozie = vu.get_oozie(cluster)
    if oozie:
        hadoop_cfg = {
            'hadoop.proxyuser.hadoop.hosts': '*',
            'hadoop.proxyuser.hadoop.groups': 'hadoop'
        }
        confs['Hadoop'].update(hadoop_cfg)

        oozie_cfg = o_helper.get_oozie_required_xml_configs(HADOOP_CONF_DIR)
        if c_helper.is_mysql_enabled(pctx, cluster):
            oozie_cfg.update(o_helper.get_oozie_mysql_configs())

        confs['JobFlow'] = oozie_cfg

    if c_helper.is_swift_enabled(pctx, cluster):
        swift_configs = {}
        for config in swift.get_swift_configs():
            swift_configs[config['name']] = config['value']

        confs['Hadoop'].update(swift_configs)

    if c_helper.is_data_locality_enabled(pctx, cluster):
        confs['Hadoop'].update(th.TOPOLOGY_CONFIG)
        confs['Hadoop'].update({"topology.script.file.name":
                                HADOOP_CONF_DIR + "/topology.sh"})

    hive_hostname = vu.get_instance_hostname(vu.get_hiveserver(cluster))
    if hive_hostname:
        hive_cfg = {
            'hive.warehouse.subdir.inherit.perms': True,
            'javax.jdo.option.ConnectionURL':
            'jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true'
        }

        if c_helper.is_mysql_enabled(pctx, cluster):
            hive_cfg.update({
                'javax.jdo.option.ConnectionURL':
                'jdbc:mysql://%s/metastore' % hive_hostname,
                'javax.jdo.option.ConnectionDriverName':
                'com.mysql.jdbc.Driver',
                'javax.jdo.option.ConnectionUserName': '******',
                'javax.jdo.option.ConnectionPassword': '******',
                'datanucleus.autoCreateSchema': 'false',
                'datanucleus.fixedDatastore': 'true',
                'hive.metastore.uris': 'thrift://%s:9083' % hive_hostname,
            })

        proxy_configs = cluster.cluster_configs.get('proxy_configs')
        if proxy_configs and c_helper.is_swift_enabled(pctx, cluster):
            hive_cfg.update({
                swift.HADOOP_SWIFT_USERNAME: proxy_configs['proxy_username'],
                swift.HADOOP_SWIFT_PASSWORD: proxy_configs['proxy_password'],
                swift.HADOOP_SWIFT_TRUST_ID: proxy_configs['proxy_trust_id'],
                swift.HADOOP_SWIFT_DOMAIN_NAME: CONF.proxy_user_domain_name
            })

        confs['Hive'] = hive_cfg

    return confs
Beispiel #10
0
def _get_hadoop_configs(pctx, node_group):
    cluster = node_group.cluster
    nn_hostname = vu.get_instance_hostname(vu.get_namenode(cluster))
    dirs = _get_hadoop_dirs(node_group)
    confs = {
        'Hadoop': {
            'fs.defaultFS': 'hdfs://%s:9000' % nn_hostname
        },
        'HDFS': {
            'dfs.namenode.name.dir': ','.join(dirs['hadoop_name_dirs']),
            'dfs.namenode.data.dir': ','.join(dirs['hadoop_data_dirs']),
            'dfs.hosts': '%s/dn-include' % HADOOP_CONF_DIR,
            'dfs.hosts.exclude': '%s/dn-exclude' % HADOOP_CONF_DIR
        }
    }

    res_hostname = vu.get_instance_hostname(vu.get_resourcemanager(cluster))
    if res_hostname:
        confs['YARN'] = {
            'yarn.nodemanager.aux-services': 'mapreduce_shuffle',
            'yarn.resourcemanager.hostname': '%s' % res_hostname,
            'yarn.resourcemanager.nodes.include-path': '%s/nm-include' % (
                HADOOP_CONF_DIR),
            'yarn.resourcemanager.nodes.exclude-path': '%s/nm-exclude' % (
                HADOOP_CONF_DIR)
        }
        confs['MapReduce'] = {
            'mapreduce.framework.name': 'yarn'
        }
        hs_hostname = vu.get_instance_hostname(vu.get_historyserver(cluster))
        if hs_hostname:
            confs['MapReduce']['mapreduce.jobhistory.address'] = (
                "%s:10020" % hs_hostname)

    oozie = vu.get_oozie(cluster)
    if oozie:
        hadoop_cfg = {
            'hadoop.proxyuser.hadoop.hosts': '*',
            'hadoop.proxyuser.hadoop.groups': 'hadoop'
        }
        confs['Hadoop'].update(hadoop_cfg)

        oozie_cfg = o_helper.get_oozie_required_xml_configs(HADOOP_CONF_DIR)
        if c_helper.is_mysql_enabled(pctx, cluster):
            oozie_cfg.update(o_helper.get_oozie_mysql_configs())

        confs['JobFlow'] = oozie_cfg

    if c_helper.is_swift_enabled(pctx, cluster):
        swift_configs = {}
        for config in swift.get_swift_configs():
            swift_configs[config['name']] = config['value']

        confs['Hadoop'].update(swift_configs)

    if c_helper.is_data_locality_enabled(pctx, cluster):
        confs['Hadoop'].update(th.TOPOLOGY_CONFIG)
        confs['Hadoop'].update({"topology.script.file.name":
                                HADOOP_CONF_DIR + "/topology.sh"})

    return confs
Beispiel #11
0
def _get_hadoop_configs(pctx, node_group):
    cluster = node_group.cluster
    nn_hostname = vu.get_instance_hostname(vu.get_namenode(cluster))
    dirs = _get_hadoop_dirs(node_group)
    confs = {
        'Hadoop': {
            'fs.defaultFS': 'hdfs://%s:9000' % nn_hostname
        },
        'HDFS': {
            'dfs.namenode.name.dir': ','.join(dirs['hadoop_name_dirs']),
            'dfs.namenode.data.dir': ','.join(dirs['hadoop_data_dirs']),
            'dfs.hosts': '%s/dn-include' % HADOOP_CONF_DIR,
            'dfs.hosts.exclude': '%s/dn-exclude' % HADOOP_CONF_DIR
        }
    }

    res_hostname = vu.get_instance_hostname(vu.get_resourcemanager(cluster))
    if res_hostname:
        confs['YARN'] = {
            'yarn.nodemanager.aux-services':
            'mapreduce_shuffle',
            'yarn.resourcemanager.hostname':
            '%s' % res_hostname,
            'yarn.resourcemanager.nodes.include-path':
            '%s/nm-include' % (HADOOP_CONF_DIR),
            'yarn.resourcemanager.nodes.exclude-path':
            '%s/nm-exclude' % (HADOOP_CONF_DIR)
        }
        confs['MapReduce'] = {'mapreduce.framework.name': 'yarn'}
        hs_hostname = vu.get_instance_hostname(vu.get_historyserver(cluster))
        if hs_hostname:
            confs['MapReduce']['mapreduce.jobhistory.address'] = ("%s:10020" %
                                                                  hs_hostname)

    oozie = vu.get_oozie(cluster)
    if oozie:
        hadoop_cfg = {
            'hadoop.proxyuser.hadoop.hosts': '*',
            'hadoop.proxyuser.hadoop.groups': 'hadoop'
        }
        confs['Hadoop'].update(hadoop_cfg)

        oozie_cfg = o_helper.get_oozie_required_xml_configs(HADOOP_CONF_DIR)
        if c_helper.is_mysql_enabled(pctx, cluster):
            oozie_cfg.update(o_helper.get_oozie_mysql_configs())

        confs['JobFlow'] = oozie_cfg

    if c_helper.is_swift_enabled(pctx, cluster):
        swift_configs = {}
        for config in swift.get_swift_configs():
            swift_configs[config['name']] = config['value']

        confs['Hadoop'].update(swift_configs)

    if c_helper.is_data_locality_enabled(pctx, cluster):
        confs['Hadoop'].update(th.TOPOLOGY_CONFIG)
        confs['Hadoop'].update(
            {"topology.script.file.name": HADOOP_CONF_DIR + "/topology.sh"})

    return confs