Beispiel #1
0
 def _get_mapred_site_node_aware_props(self):
     result = topo.vm_awareness_mapred_config()
     result = {c['name']: c['value'] for c in result}
     # This config causes failure
     result.pop('mapred.task.cache.levels')
     result['mapreduce.jobtracker.taskcache.levels'] = '3'
     return result
Beispiel #2
0
    def finalize_configuration(self, cluster_spec):
        jt_hosts = cluster_spec.determine_component_hosts('JOBTRACKER')
        if jt_hosts:
            props = {'mapred-site': ['mapred.job.tracker',
                                     'mapred.job.tracker.http.address',
                                     'mapreduce.history.server.http.address']}

            self._replace_config_token(
                cluster_spec, '%JT_HOST%', jt_hosts.pop().fqdn(), props)

        # data locality/rack awareness prop processing
        mapred_site_config = cluster_spec.configurations['mapred-site']
        if CONF.enable_data_locality:
            for prop in th.vm_awareness_mapred_config():
                mapred_site_config[prop['name']] = prop['value']

        # process storage paths to accommodate ephemeral or cinder storage
        # NOTE:  mapred.system.dir is an HDFS namespace path (not a filesystem
        # path) so the default path should suffice
        tt_node_groups = cluster_spec.get_node_groups_containing_component(
            'TASKTRACKER')
        if tt_node_groups:
            global_config = cluster_spec.configurations['global']
            common_paths = self._get_common_paths(tt_node_groups)
            mapred_site_config['mapred.local.dir'] = (
                self._generate_storage_path(common_paths, '/hadoop/mapred'))
            global_config['mapred_local_dir'] = self._generate_storage_path(
                common_paths, '/hadoop/mapred')
Beispiel #3
0
 def _get_mapred_site_node_aware_props(self):
     result = topo.vm_awareness_mapred_config()
     result = {c['name']: c['value'] for c in result}
     # This config causes failure
     result.pop('mapred.task.cache.levels')
     result['mapreduce.jobtracker.taskcache.levels'] = '3'
     return result
Beispiel #4
0
    def finalize_configuration(self, cluster_spec):
        rm_hosts = cluster_spec.determine_component_hosts('RESOURCEMANAGER')
        if rm_hosts:
            props = {'yarn-site': ['yarn.resourcemanager.'
                                   'resource-tracker.address',
                                   'yarn.resourcemanager.hostname',
                                   'yarn.resourcemanager.address',
                                   'yarn.resourcemanager.scheduler.address',
                                   'yarn.resourcemanager.webapp.address',
                                   'yarn.log.server.url',
                                   'yarn.resourcemanager.admin.address']}

            self._replace_config_token(
                cluster_spec, '%RM_HOST%', rm_hosts.pop().fqdn(), props)

        # data locality/rack awareness prop processing
        mapred_site_config = cluster_spec.configurations['mapred-site']
        if CONF.enable_data_locality:
            for prop in th.vm_awareness_mapred_config():
                mapred_site_config[prop['name']] = prop['value']

        # process storage paths to accommodate ephemeral or cinder storage
        nm_node_groups = cluster_spec.get_node_groups_containing_component(
            'NODEMANAGER')
        if nm_node_groups:
            common_paths = self._get_common_paths(nm_node_groups)
            mapred_site_config['yarn.nodemanager.local-dirs'] = (
                self._generate_storage_path(common_paths,
                                            '/hadoop/yarn/local'))
Beispiel #5
0
    def finalize_configuration(self, cluster_spec):
        rm_hosts = cluster_spec.determine_component_hosts('RESOURCEMANAGER')
        if rm_hosts:
            props = {
                'yarn-site': [
                    'yarn.resourcemanager.'
                    'resource-tracker.address',
                    'yarn.resourcemanager.hostname',
                    'yarn.resourcemanager.address',
                    'yarn.resourcemanager.scheduler.address',
                    'yarn.resourcemanager.webapp.address',
                    'yarn.log.server.url', 'yarn.resourcemanager.admin.address'
                ]
            }

            self._replace_config_token(cluster_spec, '%RM_HOST%',
                                       rm_hosts.pop().fqdn(), props)

        # data locality/rack awareness prop processing
        mapred_site_config = cluster_spec.configurations['mapred-site']
        if CONF.enable_data_locality:
            for prop in th.vm_awareness_mapred_config():
                mapred_site_config[prop['name']] = prop['value']

        # process storage paths to accommodate ephemeral or cinder storage
        yarn_site_config = cluster_spec.configurations['yarn-site']
        nm_node_groups = cluster_spec.get_node_groups_containing_component(
            'NODEMANAGER')
        if nm_node_groups:
            common_paths = self._get_common_paths(nm_node_groups)
            yarn_site_config['yarn.nodemanager.local-dirs'] = (
                self._generate_storage_path(common_paths,
                                            '/hadoop/yarn/local'))
Beispiel #6
0
def generate_xml_configs(cluster, node_group, hive_mysql_passwd):
    oozie_hostname = vu.get_instance_hostname(vu.get_oozie(cluster))
    hive_hostname = vu.get_instance_hostname(vu.get_hiveserver(cluster))

    ng_configs = node_group.configuration()

    general_cfg = get_general_configs(hive_hostname, hive_mysql_passwd)

    all_cfg = generate_sahara_configs(cluster, node_group)

    # inserting user-defined configs
    for key, value in extract_xml_confs(ng_configs):
        all_cfg[key] = value

    # applying swift configs if user enabled it
    swift_xml_confs = swift.get_swift_configs()
    all_cfg = generate_cfg_from_general(all_cfg, ng_configs, general_cfg)

    # invoking applied configs to appropriate xml files
    core_all = CORE_DEFAULT + swift_xml_confs
    mapred_all = MAPRED_DEFAULT

    if CONF.enable_data_locality:
        all_cfg.update(topology.TOPOLOGY_CONFIG)

        # applying vm awareness configs
        core_all += topology.vm_awareness_core_config()
        mapred_all += topology.vm_awareness_mapred_config()

    xml_configs = {
        'core-site': x.create_hadoop_xml(all_cfg, core_all),
        'mapred-site': x.create_hadoop_xml(all_cfg, mapred_all),
        'hdfs-site': x.create_hadoop_xml(all_cfg, HDFS_DEFAULT)
    }

    if hive_hostname:
        cfg = all_cfg
        cfg_filter = HIVE_DEFAULT
        proxy_configs = cluster.cluster_configs.get('proxy_configs')
        if CONF.use_identity_api_v3 and proxy_configs:
            cfg, cfg_filter = _inject_swift_trust_info(cfg,
                                                       cfg_filter,
                                                       proxy_configs)
        xml_configs.update({'hive-site':
                            x.create_hadoop_xml(cfg, cfg_filter)})
        LOG.debug('Generated hive-site.xml for hive {host}'.format(
            host=hive_hostname))

    if oozie_hostname:
        xml_configs.update({'oozie-site':
                            x.create_hadoop_xml(all_cfg, o_h.OOZIE_DEFAULT)})
        LOG.debug('Generated oozie-site.xml for oozie {host}'.format(
            host=oozie_hostname))

    return xml_configs
Beispiel #7
0
def generate_xml_configs(cluster, node_group, hive_mysql_passwd):
    oozie_hostname = vu.get_instance_hostname(vu.get_oozie(cluster))
    hive_hostname = vu.get_instance_hostname(vu.get_hiveserver(cluster))

    ng_configs = node_group.configuration()

    general_cfg = get_general_configs(hive_hostname, hive_mysql_passwd)

    all_cfg = generate_sahara_configs(cluster, node_group)

    # inserting user-defined configs
    for key, value in extract_xml_confs(ng_configs):
        all_cfg[key] = value

    # applying swift configs if user enabled it
    swift_xml_confs = swift.get_swift_configs()
    all_cfg = generate_cfg_from_general(all_cfg, ng_configs, general_cfg)

    # invoking applied configs to appropriate xml files
    core_all = CORE_DEFAULT + swift_xml_confs
    mapred_all = MAPRED_DEFAULT

    if CONF.enable_data_locality:
        all_cfg.update(topology.TOPOLOGY_CONFIG)

        # applying vm awareness configs
        core_all += topology.vm_awareness_core_config()
        mapred_all += topology.vm_awareness_mapred_config()

    xml_configs = {
        'core-site': x.create_hadoop_xml(all_cfg, core_all),
        'mapred-site': x.create_hadoop_xml(all_cfg, mapred_all),
        'hdfs-site': x.create_hadoop_xml(all_cfg, HDFS_DEFAULT)
    }

    if hive_hostname:
        cfg = all_cfg
        cfg_filter = HIVE_DEFAULT
        proxy_configs = cluster.cluster_configs.get('proxy_configs')
        if CONF.use_identity_api_v3 and proxy_configs:
            cfg, cfg_filter = _inject_swift_trust_info(cfg, cfg_filter,
                                                       proxy_configs)
        xml_configs.update({'hive-site': x.create_hadoop_xml(cfg, cfg_filter)})
        LOG.debug('Generated hive-site.xml for hive {host}'.format(
            host=hive_hostname))

    if oozie_hostname:
        xml_configs.update(
            {'oozie-site': x.create_hadoop_xml(all_cfg, o_h.OOZIE_DEFAULT)})
        LOG.debug('Generated oozie-site.xml for oozie {host}'.format(
            host=oozie_hostname))

    return xml_configs
Beispiel #8
0
    def finalize_configuration(self, cluster_spec):
        hs_hosts = cluster_spec.determine_component_hosts('HISTORYSERVER')
        if hs_hosts:
            props = {'mapred-site': ['mapreduce.jobhistory.webapp.address',
                                     'mapreduce.jobhistory.address']}

            self._replace_config_token(
                cluster_spec, '%HS_HOST%', hs_hosts.pop().fqdn(), props)

        # data locality/rack awareness prop processing
        mapred_site_config = cluster_spec.configurations['mapred-site']
        if CONF.enable_data_locality:
            for prop in th.vm_awareness_mapred_config():
                mapred_site_config[prop['name']] = prop['value']
Beispiel #9
0
    def test_map_red_config(self):
        result = th.vm_awareness_mapred_config()
        self.assertEqual(3, len(result))
        for item in result:
            del item['description']

        self.assertIn({'name': "mapred.jobtracker.nodegroup.aware",
                       'value': 'true'},
                      result)

        self.assertIn({'name': "mapred.task.cache.levels",
                       'value': '3'},
                      result)
        className = 'org.apache.hadoop.mapred.JobSchedulableWithNodeGroup'
        self.assertIn({'name': "mapred.jobtracker.jobSchedulable",
                       'value': className},
                      result)
Beispiel #10
0
def generate_xml_configs(cluster, node_group, hive_mysql_passwd):
    oozie_hostname = _get_hostname(utils.get_oozie(cluster))
    hive_hostname = _get_hostname(utils.get_hiveserver(cluster))

    ng_configs = node_group.configuration()

    general_cfg = get_general_configs(hive_hostname, hive_mysql_passwd)

    all_cfg = generate_sahara_configs(cluster, node_group)

    # inserting user-defined configs
    for key, value in extract_xml_confs(ng_configs):
        all_cfg[key] = value

    # applying swift configs if user enabled it
    swift_xml_confs = swift.get_swift_configs()
    all_cfg = generate_cfg_from_general(all_cfg, ng_configs, general_cfg)

    # invoking applied configs to appropriate xml files
    core_all = CORE_DEFAULT + swift_xml_confs
    mapred_all = MAPRED_DEFAULT

    if CONF.enable_data_locality:
        all_cfg.update(topology.TOPOLOGY_CONFIG)

        # applying vm awareness configs
        core_all += topology.vm_awareness_core_config()
        mapred_all += topology.vm_awareness_mapred_config()

    xml_configs = {
        'core-site': x.create_hadoop_xml(all_cfg, core_all),
        'mapred-site': x.create_hadoop_xml(all_cfg, mapred_all),
        'hdfs-site': x.create_hadoop_xml(all_cfg, HDFS_DEFAULT)
    }

    if hive_hostname:
        xml_configs.update({'hive-site':
                            x.create_hadoop_xml(all_cfg, HIVE_DEFAULT)})
        LOG.debug('Generated hive-site.xml for hive % s', hive_hostname)

    if oozie_hostname:
        xml_configs.update({'oozie-site':
                            x.create_hadoop_xml(all_cfg, o_h.OOZIE_DEFAULT)})
        LOG.debug('Generated oozie-site.xml for oozie % s', oozie_hostname)

    return xml_configs
Beispiel #11
0
    def test_map_red_config(self):
        result = th.vm_awareness_mapred_config()
        self.assertEqual(3, len(result))
        for item in result:
            del item['description']

        self.assertIn({'name': "mapred.jobtracker.nodegroup.aware",
                       'value': 'true'},
                      result)

        self.assertIn({'name': "mapred.task.cache.levels",
                       'value': '3'},
                      result)
        className = 'org.apache.hadoop.mapred.JobSchedulableWithNodeGroup'
        self.assertIn({'name': "mapred.jobtracker.jobSchedulable",
                       'value': className},
                      result)
Beispiel #12
0
def generate_xml_configs(cluster, node_group, hive_mysql_passwd):
    oozie_hostname = _get_hostname(utils.get_oozie(cluster))
    hive_hostname = _get_hostname(utils.get_hiveserver(cluster))

    ng_configs = node_group.configuration()

    general_cfg = get_general_configs(hive_hostname, hive_mysql_passwd)

    all_cfg = generate_sahara_configs(cluster, node_group)

    # inserting user-defined configs
    for key, value in extract_xml_confs(ng_configs):
        all_cfg[key] = value

    # applying swift configs if user enabled it
    swift_xml_confs = swift.get_swift_configs()
    all_cfg = generate_cfg_from_general(all_cfg, ng_configs, general_cfg)

    # invoking applied configs to appropriate xml files
    core_all = CORE_DEFAULT + swift_xml_confs
    mapred_all = MAPRED_DEFAULT

    if CONF.enable_data_locality:
        all_cfg.update(topology.TOPOLOGY_CONFIG)

        # applying vm awareness configs
        core_all += topology.vm_awareness_core_config()
        mapred_all += topology.vm_awareness_mapred_config()

    xml_configs = {
        'core-site': x.create_hadoop_xml(all_cfg, core_all),
        'mapred-site': x.create_hadoop_xml(all_cfg, mapred_all),
        'hdfs-site': x.create_hadoop_xml(all_cfg, HDFS_DEFAULT)
    }

    if hive_hostname:
        xml_configs.update(
            {'hive-site': x.create_hadoop_xml(all_cfg, HIVE_DEFAULT)})
        LOG.debug('Generated hive-site.xml for hive % s', hive_hostname)

    if oozie_hostname:
        xml_configs.update(
            {'oozie-site': x.create_hadoop_xml(all_cfg, o_h.OOZIE_DEFAULT)})
        LOG.debug('Generated oozie-site.xml for oozie % s', oozie_hostname)

    return xml_configs
Beispiel #13
0
    def finalize_configuration(self, cluster_spec):
        hs_hosts = cluster_spec.determine_component_hosts('HISTORYSERVER')
        if hs_hosts:
            props = {
                'mapred-site': [
                    'mapreduce.jobhistory.webapp.address',
                    'mapreduce.jobhistory.address'
                ]
            }

            self._replace_config_token(cluster_spec, '%HS_HOST%',
                                       hs_hosts.pop().fqdn(), props)

        # data locality/rack awareness prop processing
        mapred_site_config = cluster_spec.configurations['mapred-site']
        if CONF.enable_data_locality:
            for prop in th.vm_awareness_mapred_config():
                mapred_site_config[prop['name']] = prop['value']
Beispiel #14
0
    def finalize_configuration(self, cluster_spec):
        jt_hosts = cluster_spec.determine_component_hosts('JOBTRACKER')
        if jt_hosts:
            props = {
                'mapred-site': [
                    'mapred.job.tracker', 'mapred.job.tracker.http.address',
                    'mapreduce.history.server.http.address'
                ]
            }

            self._replace_config_token(cluster_spec, '%JT_HOST%',
                                       jt_hosts.pop().fqdn(), props)

        # HISTORYSERVER component now a part of MapReduce 1 in Ambari 1.6.0
        hs_hosts = cluster_spec.determine_component_hosts('HISTORYSERVER')
        if hs_hosts:
            props = {'mapred-site': ['mapreduce.jobhistory.webapp.address']}

            self._replace_config_token(cluster_spec, '%HS_HOST%',
                                       hs_hosts.pop().fqdn(), props)

        # data locality/rack awareness prop processing
        mapred_site_config = cluster_spec.configurations['mapred-site']
        if CONF.enable_data_locality:
            for prop in th.vm_awareness_mapred_config():
                mapred_site_config[prop['name']] = prop['value']

        # process storage paths to accommodate ephemeral or cinder storage
        # NOTE:  mapred.system.dir is an HDFS namespace path (not a filesystem
        # path) so the default path should suffice
        tt_node_groups = cluster_spec.get_node_groups_containing_component(
            'TASKTRACKER')
        if tt_node_groups:
            global_config = cluster_spec.configurations['global']
            common_paths = self._get_common_paths(tt_node_groups)
            mapred_site_config['mapred.local.dir'] = (
                self._generate_storage_path(common_paths, '/hadoop/mapred'))
            global_config['mapred_local_dir'] = self._generate_storage_path(
                common_paths, '/hadoop/mapred')
Beispiel #15
0
 def _get_mapred_site_props(self, context):
     result = {}
     if context.is_node_aware:
         for conf in topo.vm_awareness_mapred_config():
             result[conf['name']] = conf['value']
     return result