def finalize_configuration(self, cluster_spec): rm_hosts = cluster_spec.determine_component_hosts('RESOURCEMANAGER') if rm_hosts: props = { 'yarn-site': [ 'yarn.resourcemanager.' 'resource-tracker.address', 'yarn.resourcemanager.hostname', 'yarn.resourcemanager.address', 'yarn.resourcemanager.scheduler.address', 'yarn.resourcemanager.webapp.address', 'yarn.log.server.url', 'yarn.resourcemanager.admin.address' ] } self._replace_config_token(cluster_spec, '%RM_HOST%', rm_hosts.pop().fqdn(), props) # data locality/rack awareness prop processing mapred_site_config = cluster_spec.configurations['mapred-site'] if CONF.enable_data_locality: for prop in th.vm_awareness_mapred_config(): mapred_site_config[prop['name']] = prop['value'] # process storage paths to accommodate ephemeral or cinder storage nm_node_groups = cluster_spec.get_node_groups_containing_component( 'NODEMANAGER') if nm_node_groups: common_paths = self._get_common_paths(nm_node_groups) mapred_site_config['yarn.nodemanager.local-dirs'] = ( self._generate_storage_path(common_paths, '/hadoop/yarn/local'))
def finalize_configuration(self, cluster_spec): jt_hosts = cluster_spec.determine_component_hosts('JOBTRACKER') if jt_hosts: props = {'mapred-site': ['mapred.job.tracker', 'mapred.job.tracker.http.address', 'mapreduce.history.server.http.address']} self._replace_config_token( cluster_spec, '%JT_HOST%', jt_hosts.pop().fqdn(), props) # data locality/rack awareness prop processing mapred_site_config = cluster_spec.configurations['mapred-site'] if CONF.enable_data_locality: for prop in th.vm_awareness_mapred_config(): mapred_site_config[prop['name']] = prop['value'] # process storage paths to accommodate ephemeral or cinder storage # NOTE: mapred.system.dir is an HDFS namespace path (not a filesystem # path) so the default path should suffice tt_node_groups = cluster_spec.get_node_groups_containing_component( 'TASKTRACKER') if tt_node_groups: global_config = cluster_spec.configurations['global'] common_paths = self._get_common_paths(tt_node_groups) mapred_site_config['mapred.local.dir'] = \ self._generate_storage_path(common_paths, '/hadoop/mapred') global_config['mapred_local_dir'] = self._generate_storage_path( common_paths, '/hadoop/mapred')
def finalize_configuration(self, cluster_spec): jt_hosts = cluster_spec.determine_component_hosts('JOBTRACKER') if jt_hosts: props = {'mapred-site': ['mapred.job.tracker', 'mapred.job.tracker.http.address', 'mapreduce.history.server.http.address']} self._replace_config_token( cluster_spec, '%JT_HOST%', jt_hosts.pop().fqdn(), props) # data locality/rack awareness prop processing mapred_site_config = cluster_spec.configurations['mapred-site'] if CONF.enable_data_locality: for prop in th.vm_awareness_mapred_config(): mapred_site_config[prop['name']] = prop['value'] # process storage paths to accommodate ephemeral or cinder storage # NOTE: mapred.system.dir is an HDFS namespace path (not a filesystem # path) so the default path should suffice tt_node_groups = cluster_spec.get_node_groups_containing_component( 'TASKTRACKER') if tt_node_groups: global_config = cluster_spec.configurations['global'] common_paths = self._get_common_paths(tt_node_groups) mapred_site_config['mapred.local.dir'] = ( self._generate_storage_path(common_paths, '/hadoop/mapred')) global_config['mapred_local_dir'] = self._generate_storage_path( common_paths, '/hadoop/mapred')
def test_map_red_config(self): result = th.vm_awareness_mapred_config() self.assertEqual(3, len(result)) for item in result: del item['description'] self.assertIn({'name': "mapred.jobtracker.nodegroup.aware", 'value': 'true'}, result) self.assertIn({'name': "mapred.task.cache.levels", 'value': '3'}, result) className = 'org.apache.hadoop.mapred.JobSchedulableWithNodeGroup' self.assertIn({'name': "mapred.jobtracker.jobSchedulable", 'value': className}, result)
def generate_xml_configs(cluster, node_group, hive_mysql_passwd): oozie_hostname = _get_hostname(utils.get_oozie(cluster)) hive_hostname = _get_hostname(utils.get_hiveserver(cluster)) ng_configs = node_group.configuration() general_cfg = get_general_configs(hive_hostname, hive_mysql_passwd) all_cfg = generate_savanna_configs(cluster, node_group) # inserting user-defined configs for key, value in extract_xml_confs(ng_configs): all_cfg[key] = value # applying swift configs if user enabled it swift_xml_confs = swift.get_swift_configs() all_cfg = generate_cfg_from_general(all_cfg, ng_configs, general_cfg) # invoking applied configs to appropriate xml files core_all = CORE_DEFAULT + swift_xml_confs mapred_all = MAPRED_DEFAULT if CONF.enable_data_locality: all_cfg.update(topology.TOPOLOGY_CONFIG) # applying vm awareness configs core_all += topology.vm_awareness_core_config() mapred_all += topology.vm_awareness_mapred_config() xml_configs = { 'core-site': x.create_hadoop_xml(all_cfg, core_all), 'mapred-site': x.create_hadoop_xml(all_cfg, mapred_all), 'hdfs-site': x.create_hadoop_xml(all_cfg, HDFS_DEFAULT) } if hive_hostname: xml_configs.update({'hive-site': x.create_hadoop_xml(all_cfg, HIVE_DEFAULT)}) LOG.debug('Generated hive-site.xml for hive % s', hive_hostname) if oozie_hostname: xml_configs.update({'oozie-site': x.create_hadoop_xml(all_cfg, o_h.OOZIE_DEFAULT)}) LOG.debug('Generated oozie-site.xml for oozie % s', oozie_hostname) return xml_configs
def test_map_red_config(self): result = th.vm_awareness_mapred_config() self.assertEqual(3, len(result)) for item in result: del item['description'] self.assertIn({'name': "mapred.jobtracker.nodegroup.aware", 'value': 'true'}, result) self.assertIn({'name': "mapred.task.cache.levels", 'value': '3'}, result) className = 'org.apache.hadoop.mapred.JobSchedulableWithNodeGroup' self.assertIn({'name': "mapred.jobtracker.jobSchedulable", 'value': className}, result)
def generate_xml_configs(cluster, node_group, hive_mysql_passwd): oozie_hostname = _get_hostname(utils.get_oozie(cluster)) hive_hostname = _get_hostname(utils.get_hiveserver(cluster)) ng_configs = node_group.configuration() general_cfg = get_general_configs(hive_hostname, hive_mysql_passwd) all_cfg = generate_savanna_configs(cluster, node_group) # inserting user-defined configs for key, value in extract_xml_confs(ng_configs): all_cfg[key] = value # applying swift configs if user enabled it swift_xml_confs = swift.get_swift_configs() all_cfg = generate_cfg_from_general(all_cfg, ng_configs, general_cfg) # invoking applied configs to appropriate xml files core_all = CORE_DEFAULT + swift_xml_confs mapred_all = MAPRED_DEFAULT if CONF.enable_data_locality: all_cfg.update(topology.TOPOLOGY_CONFIG) # applying vm awareness configs core_all += topology.vm_awareness_core_config() mapred_all += topology.vm_awareness_mapred_config() xml_configs = { 'core-site': x.create_hadoop_xml(all_cfg, core_all), 'mapred-site': x.create_hadoop_xml(all_cfg, mapred_all), 'hdfs-site': x.create_hadoop_xml(all_cfg, HDFS_DEFAULT) } if hive_hostname: xml_configs.update( {'hive-site': x.create_hadoop_xml(all_cfg, HIVE_DEFAULT)}) LOG.debug('Generated hive-site.xml for hive % s', hive_hostname) if oozie_hostname: xml_configs.update( {'oozie-site': x.create_hadoop_xml(all_cfg, o_h.OOZIE_DEFAULT)}) LOG.debug('Generated oozie-site.xml for oozie % s', oozie_hostname) return xml_configs
def finalize_configuration(self, cluster_spec): hs_hosts = cluster_spec.determine_component_hosts('HISTORYSERVER') if hs_hosts: props = { 'mapred-site': [ 'mapreduce.jobhistory.webapp.address', 'mapreduce.jobhistory.address' ] } self._replace_config_token(cluster_spec, '%HS_HOST%', hs_hosts.pop().fqdn(), props) # data locality/rack awareness prop processing mapred_site_config = cluster_spec.configurations['mapred-site'] if CONF.enable_data_locality: for prop in th.vm_awareness_mapred_config(): mapred_site_config[prop['name']] = prop['value']
def generate_xml_configs(configs, storage_path, nn_hostname, jt_hostname, oozie_hostname, hive_hostname, passwd_hive_mysql): general_cfg = get_general_configs(hive_hostname, passwd_hive_mysql) # inserting common configs depends on provisioned VMs and HDFS placement # TODO(aignatov): should be moved to cluster context cfg = { 'fs.default.name': 'hdfs://%s:8020' % nn_hostname, 'dfs.name.dir': extract_hadoop_path(storage_path, '/lib/hadoop/hdfs/namenode'), 'dfs.data.dir': extract_hadoop_path(storage_path, '/lib/hadoop/hdfs/datanode'), 'dfs.hosts': '/etc/hadoop/dn.incl', 'dfs.hosts.exclude': '/etc/hadoop/dn.excl', } if jt_hostname: mr_cfg = { 'mapred.job.tracker': '%s:8021' % jt_hostname, 'mapred.system.dir': extract_hadoop_path(storage_path, '/mapred/mapredsystem'), 'mapred.local.dir': extract_hadoop_path(storage_path, '/lib/hadoop/mapred'), 'mapred.hosts': '/etc/hadoop/tt.incl', 'mapred.hosts.exclude': '/etc/hadoop/tt.excl', } cfg.update(mr_cfg) if oozie_hostname: o_cfg = { 'hadoop.proxyuser.hadoop.hosts': "localhost," + oozie_hostname, 'hadoop.proxyuser.hadoop.groups': 'hadoop', } cfg.update(o_cfg) LOG.debug('Applied Oozie configs for core-site.xml') cfg.update(o_h.get_oozie_required_xml_configs()) LOG.debug('Applied Oozie configs for oozie-site.xml') if hive_hostname: h_cfg = { 'hive.warehouse.subdir.inherit.perms': True, 'javax.jdo.option.ConnectionURL': 'jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true' } cfg.update(h_cfg) LOG.debug('Applied Hive config for hive metastore server') # inserting user-defined configs for key, value in extract_xml_confs(configs): cfg[key] = value # applying swift configs if user enabled it swift_xml_confs = swift.get_swift_configs() cfg = generate_cfg_from_general(cfg, configs, general_cfg) # invoking applied configs to appropriate xml files core_all = CORE_DEFAULT + swift_xml_confs mapred_all = MAPRED_DEFAULT if CONF.enable_data_locality: cfg.update(topology.TOPOLOGY_CONFIG) # applying vm awareness configs core_all += topology.vm_awareness_core_config() mapred_all += topology.vm_awareness_mapred_config() xml_configs = { 'core-site': x.create_hadoop_xml(cfg, core_all), 'mapred-site': x.create_hadoop_xml(cfg, mapred_all), 'hdfs-site': x.create_hadoop_xml(cfg, HDFS_DEFAULT) } if hive_hostname: xml_configs.update( {'hive-site': x.create_hadoop_xml(cfg, HIVE_DEFAULT)}) LOG.debug('Generated hive-site.xml for hive % s', hive_hostname) if oozie_hostname: xml_configs.update( {'oozie-site': x.create_hadoop_xml(cfg, o_h.OOZIE_DEFAULT)}) LOG.debug('Generated oozie-site.xml for oozie % s', oozie_hostname) return xml_configs
def generate_xml_configs(configs, storage_path, nn_hostname, jt_hostname, oozie_hostname, hive_hostname, passwd_hive_mysql): general_cfg = get_general_configs(hive_hostname, passwd_hive_mysql) # inserting common configs depends on provisioned VMs and HDFS placement # TODO(aignatov): should be moved to cluster context cfg = { 'fs.default.name': 'hdfs://%s:8020' % nn_hostname, 'dfs.name.dir': extract_hadoop_path(storage_path, '/lib/hadoop/hdfs/namenode'), 'dfs.data.dir': extract_hadoop_path(storage_path, '/lib/hadoop/hdfs/datanode'), 'dfs.hosts': '/etc/hadoop/dn.incl', 'dfs.hosts.exclude': '/etc/hadoop/dn.excl', } if jt_hostname: mr_cfg = { 'mapred.job.tracker': '%s:8021' % jt_hostname, 'mapred.system.dir': extract_hadoop_path(storage_path, '/mapred/mapredsystem'), 'mapred.local.dir': extract_hadoop_path(storage_path, '/lib/hadoop/mapred'), 'mapred.hosts': '/etc/hadoop/tt.incl', 'mapred.hosts.exclude': '/etc/hadoop/tt.excl', } cfg.update(mr_cfg) if oozie_hostname: o_cfg = { 'hadoop.proxyuser.hadoop.hosts': "localhost," + oozie_hostname, 'hadoop.proxyuser.hadoop.groups': 'hadoop', } cfg.update(o_cfg) LOG.debug('Applied Oozie configs for core-site.xml') cfg.update(o_h.get_oozie_required_xml_configs()) LOG.debug('Applied Oozie configs for oozie-site.xml') if hive_hostname: h_cfg = { 'hive.warehouse.subdir.inherit.perms': True, 'javax.jdo.option.ConnectionURL': 'jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true' } cfg.update(h_cfg) LOG.debug('Applied Hive config for hive metastore server') # inserting user-defined configs for key, value in extract_xml_confs(configs): cfg[key] = value # applying swift configs if user enabled it swift_xml_confs = swift.get_swift_configs() cfg = generate_cfg_from_general(cfg, configs, general_cfg) # invoking applied configs to appropriate xml files core_all = CORE_DEFAULT + swift_xml_confs mapred_all = MAPRED_DEFAULT if CONF.enable_data_locality: cfg.update(topology.TOPOLOGY_CONFIG) # applying vm awareness configs core_all += topology.vm_awareness_core_config() mapred_all += topology.vm_awareness_mapred_config() xml_configs = { 'core-site': x.create_hadoop_xml(cfg, core_all), 'mapred-site': x.create_hadoop_xml(cfg, mapred_all), 'hdfs-site': x.create_hadoop_xml(cfg, HDFS_DEFAULT) } if hive_hostname: xml_configs.update({'hive-site': x.create_hadoop_xml(cfg, HIVE_DEFAULT)}) LOG.debug('Generated hive-site.xml for hive % s', hive_hostname) if oozie_hostname: xml_configs.update({'oozie-site': x.create_hadoop_xml(cfg, o_h.OOZIE_DEFAULT)}) LOG.debug('Generated oozie-site.xml for oozie % s', oozie_hostname) return xml_configs