def generate_xml_configs(configs, storage_path, nn_hostname, jt_hostname, oozies_hostnames): # inserting common configs depends on provisioned VMs and HDFS placement # TODO(aignatov): should be moved to cluster context cfg = { 'fs.default.name': 'hdfs://%s:8020' % nn_hostname, 'dfs.name.dir': extract_hadoop_path(storage_path, '/lib/hadoop/hdfs/namenode'), 'dfs.data.dir': extract_hadoop_path(storage_path, '/lib/hadoop/hdfs/datanode'), 'dfs.hosts': '/etc/hadoop/dn.incl', 'dfs.hosts.exclude': '/etc/hadoop/dn.excl', } if jt_hostname: mr_cfg = { 'mapred.job.tracker': '%s:8021' % jt_hostname, 'mapred.system.dir': extract_hadoop_path(storage_path, '/mapred/mapredsystem'), 'mapred.local.dir': extract_hadoop_path(storage_path, '/lib/hadoop/mapred'), 'mapred.hosts': '/etc/hadoop/tt.incl', 'mapred.hosts.exclude': '/etc/hadoop/tt.excl', } cfg.update(mr_cfg) if oozies_hostnames: o_cfg = { 'hadoop.proxyuser.hadoop.hosts': ",".join( ["localhost"] + oozies_hostnames), 'hadoop.proxyuser.hadoop.groups': 'hadoop' } cfg.update(o_cfg) LOG.debug('Applied Oozie configs for core-site.xml') # inserting user-defined configs for key, value in extract_xml_confs(configs): cfg[key] = value # applying swift configs if user enabled it swift_xml_confs = [] #TODO(aignatov): should be changed. General configs not only Swift swift_in_config = False if ('general' in configs and ENABLE_SWIFT.name in configs['general']): swift_in_config = True if ((swift_in_config and configs['general'][ENABLE_SWIFT.name]) or (not swift_in_config and ENABLE_SWIFT.default_value)): swift_xml_confs = swift.get_swift_configs() cfg.update(extract_name_values(swift_xml_confs)) LOG.info("Swift integration is enabled") # invoking applied configs to appropriate xml files xml_configs = { 'core-site': x.create_hadoop_xml(cfg, CORE_DEFAULT + swift_xml_confs), 'mapred-site': x.create_hadoop_xml(cfg, MAPRED_DEFAULT), 'hdfs-site': x.create_hadoop_xml(cfg, HDFS_DEFAULT) } return xml_configs
def generate_xml_configs(configs, storage_path, nn_hostname, jt_hostname, oozie_hostname): # inserting common configs depends on provisioned VMs and HDFS placement # TODO(aignatov): should be moved to cluster context cfg = { "fs.default.name": "hdfs://%s:8020" % nn_hostname, "dfs.name.dir": extract_hadoop_path(storage_path, "/lib/hadoop/hdfs/namenode"), "dfs.data.dir": extract_hadoop_path(storage_path, "/lib/hadoop/hdfs/datanode"), "dfs.hosts": "/etc/hadoop/dn.incl", "dfs.hosts.exclude": "/etc/hadoop/dn.excl", } if jt_hostname: mr_cfg = { "mapred.job.tracker": "%s:8021" % jt_hostname, "mapred.system.dir": extract_hadoop_path(storage_path, "/mapred/mapredsystem"), "mapred.local.dir": extract_hadoop_path(storage_path, "/lib/hadoop/mapred"), "mapred.hosts": "/etc/hadoop/tt.incl", "mapred.hosts.exclude": "/etc/hadoop/tt.excl", } cfg.update(mr_cfg) if oozie_hostname: o_cfg = { "hadoop.proxyuser.hadoop.hosts": "localhost," + oozie_hostname, "hadoop.proxyuser.hadoop.groups": "hadoop", } cfg.update(o_cfg) LOG.debug("Applied Oozie configs for core-site.xml") cfg.update(o_h.get_oozie_required_xml_configs()) LOG.debug("Applied Oozie configs for oozie-site.xml") # inserting user-defined configs for key, value in extract_xml_confs(configs): cfg[key] = value # applying swift configs if user enabled it swift_xml_confs = [] # TODO(aignatov): should be changed. General configs not only Swift swift_in_config = False if "general" in configs and ENABLE_SWIFT.name in configs["general"]: swift_in_config = True if (swift_in_config and configs["general"][ENABLE_SWIFT.name]) or ( not swift_in_config and ENABLE_SWIFT.default_value ): swift_xml_confs = swift.get_swift_configs() cfg.update(extract_name_values(swift_xml_confs)) LOG.info("Swift integration is enabled") # invoking applied configs to appropriate xml files xml_configs = { "core-site": x.create_hadoop_xml(cfg, CORE_DEFAULT + swift_xml_confs), "mapred-site": x.create_hadoop_xml(cfg, MAPRED_DEFAULT), "hdfs-site": x.create_hadoop_xml(cfg, HDFS_DEFAULT), } if oozie_hostname: xml_configs.update({"oozie-site": x.create_hadoop_xml(cfg, o_h.OOZIE_DEFAULT)}) LOG.debug("Generated oozie-site.xml for oozie % s", oozie_hostname) return xml_configs
def generate_xml_configs(cluster, node_group, hive_mysql_passwd): oozie_hostname = _get_hostname(utils.get_oozie(cluster)) hive_hostname = _get_hostname(utils.get_hiveserver(cluster)) ng_configs = node_group.configuration() general_cfg = get_general_configs(hive_hostname, hive_mysql_passwd) all_cfg = generate_savanna_configs(cluster, node_group) # inserting user-defined configs for key, value in extract_xml_confs(ng_configs): all_cfg[key] = value # applying swift configs if user enabled it swift_xml_confs = swift.get_swift_configs() all_cfg = generate_cfg_from_general(all_cfg, ng_configs, general_cfg) # invoking applied configs to appropriate xml files core_all = CORE_DEFAULT + swift_xml_confs mapred_all = MAPRED_DEFAULT if CONF.enable_data_locality: all_cfg.update(topology.TOPOLOGY_CONFIG) # applying vm awareness configs core_all += topology.vm_awareness_core_config() mapred_all += topology.vm_awareness_mapred_config() xml_configs = { 'core-site': x.create_hadoop_xml(all_cfg, core_all), 'mapred-site': x.create_hadoop_xml(all_cfg, mapred_all), 'hdfs-site': x.create_hadoop_xml(all_cfg, HDFS_DEFAULT) } if hive_hostname: xml_configs.update({'hive-site': x.create_hadoop_xml(all_cfg, HIVE_DEFAULT)}) LOG.debug('Generated hive-site.xml for hive % s', hive_hostname) if oozie_hostname: xml_configs.update({'oozie-site': x.create_hadoop_xml(all_cfg, o_h.OOZIE_DEFAULT)}) LOG.debug('Generated oozie-site.xml for oozie % s', oozie_hostname) return xml_configs
def generate_xml_configs(cluster, node_group, hive_mysql_passwd): oozie_hostname = _get_hostname(utils.get_oozie(cluster)) hive_hostname = _get_hostname(utils.get_hiveserver(cluster)) ng_configs = node_group.configuration() general_cfg = get_general_configs(hive_hostname, hive_mysql_passwd) all_cfg = generate_savanna_configs(cluster, node_group) # inserting user-defined configs for key, value in extract_xml_confs(ng_configs): all_cfg[key] = value # applying swift configs if user enabled it swift_xml_confs = swift.get_swift_configs() all_cfg = generate_cfg_from_general(all_cfg, ng_configs, general_cfg) # invoking applied configs to appropriate xml files core_all = CORE_DEFAULT + swift_xml_confs mapred_all = MAPRED_DEFAULT if CONF.enable_data_locality: all_cfg.update(topology.TOPOLOGY_CONFIG) # applying vm awareness configs core_all += topology.vm_awareness_core_config() mapred_all += topology.vm_awareness_mapred_config() xml_configs = { 'core-site': x.create_hadoop_xml(all_cfg, core_all), 'mapred-site': x.create_hadoop_xml(all_cfg, mapred_all), 'hdfs-site': x.create_hadoop_xml(all_cfg, HDFS_DEFAULT) } if hive_hostname: xml_configs.update( {'hive-site': x.create_hadoop_xml(all_cfg, HIVE_DEFAULT)}) LOG.debug('Generated hive-site.xml for hive % s', hive_hostname) if oozie_hostname: xml_configs.update( {'oozie-site': x.create_hadoop_xml(all_cfg, o_h.OOZIE_DEFAULT)}) LOG.debug('Generated oozie-site.xml for oozie % s', oozie_hostname) return xml_configs
def run_job(job_execution): ctx = context.ctx() cluster = conductor.cluster_get(ctx, job_execution.cluster_id) if cluster.status != 'Active': return job_execution job = conductor.job_get(ctx, job_execution.job_id) input_source = conductor.data_source_get(ctx, job_execution.input_id) output_source = conductor.data_source_get(ctx, job_execution.output_id) #TODO(nprivalova): should be removed after all features implemented validate(input_source, output_source, job) plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name) hdfs_user = plugin.get_hdfs_user() wf_dir = create_workflow_dir(u.get_jobtracker(cluster), job, hdfs_user) upload_job_files(u.get_jobtracker(cluster), wf_dir, job, hdfs_user) creator = workflow_factory.get_creator(job) # Do other job type specific setup here, for example # uploading hive configuration creator.configure_workflow_if_needed(cluster, wf_dir) wf_xml = creator.get_workflow_xml(job_execution.job_configs, input_source, output_source) path_to_workflow = upload_workflow_file(u.get_jobtracker(cluster), wf_dir, wf_xml, hdfs_user) jt_path = '%s:8021' % u.get_jobtracker(cluster).hostname nn_path = 'hdfs://%s:8020' % u.get_namenode(cluster).hostname client = o.OozieClient(cluster['info']['JobFlow']['Oozie'] + "/oozie/") job_parameters = { "jobTracker": jt_path, "nameNode": nn_path, "user.name": "hadoop", "oozie.wf.application.path": "%s%s" % (nn_path, path_to_workflow), "oozie.use.system.libpath": "true" } oozie_job_id = client.add_job(x.create_hadoop_xml(job_parameters)) client.run_job(oozie_job_id) job_execution = conductor.job_execution_update( ctx, job_execution, { 'oozie_job_id': oozie_job_id, 'start_time': datetime.datetime.now() }) return job_execution
def run_job(job_execution): ctx = context.ctx() cluster = conductor.cluster_get(ctx, job_execution.cluster_id) if cluster.status != 'Active': return job_execution job = conductor.job_get(ctx, job_execution.job_id) input_source = conductor.data_source_get(ctx, job_execution.input_id) output_source = conductor.data_source_get(ctx, job_execution.output_id) #TODO(nprivalova): should be removed after all features implemented validate(input_source, output_source, job) plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name) hdfs_user = plugin.get_hdfs_user() wf_dir = create_workflow_dir(u.get_jobtracker(cluster), job, hdfs_user) upload_job_files(u.get_jobtracker(cluster), wf_dir, job, hdfs_user) creator = workflow_factory.get_creator(job) # Do other job type specific setup here, for example # uploading hive configuration creator.configure_workflow_if_needed(cluster, wf_dir) wf_xml = creator.get_workflow_xml(job_execution.job_configs, input_source, output_source) path_to_workflow = upload_workflow_file(u.get_jobtracker(cluster), wf_dir, wf_xml, hdfs_user) jt_path = cluster['info']['MapReduce']['JobTracker'] nn_path = cluster['info']['HDFS']['NameNode'] client = o.OozieClient(cluster['info']['JobFlow']['Oozie'] + "/oozie/") job_parameters = {"jobTracker": jt_path, "nameNode": nn_path, "user.name": hdfs_user, "oozie.wf.application.path": "%s%s" % (nn_path, path_to_workflow), "oozie.use.system.libpath": "true"} oozie_job_id = client.add_job(x.create_hadoop_xml(job_parameters)) client.run_job(oozie_job_id) job_execution = conductor.job_execution_update(ctx, job_execution, {'oozie_job_id': oozie_job_id, 'start_time': datetime.datetime.now()}) return job_execution
def test_create_hadoop_xml(self): conf = x.load_hadoop_xml_defaults( 'tests/unit/resources/test-default.xml') self.assertEquals(x.create_hadoop_xml({'name1': 'some_val1', 'name2': 2}, conf), """<?xml version="1.0" ?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>name2</name> <value>2</value> </property> <property> <name>name1</name> <value>some_val1</value> </property> </configuration> """)
def run_job(ctx, job_execution): cluster = conductor.cluster_get(ctx, job_execution.cluster_id) if cluster.status != "Active": return job_execution job = conductor.job_get(ctx, job_execution.job_id) job_origin = conductor.job_origin_get(context.ctx(), job.job_origin_id) input_source = conductor.data_source_get(ctx, job_execution.input_id) output_source = conductor.data_source_get(ctx, job_execution.output_id) # TODO(nprivalova): should be removed after all features implemented validate(input_source, output_source, job) wf_dir = create_workflow_dir(u.get_jobtracker(cluster), job) upload_job_files(u.get_jobtracker(cluster), wf_dir, job_origin) creator = workflow_factory.get_creator(job.type, job_origin) # Do other job type specific setup here, for example # uploading hive configuration creator.configure_workflow_if_needed(cluster, wf_dir) wf_xml = creator.get_workflow_xml(job_execution.job_configs, input_source, output_source) path_to_workflow = upload_workflow_file(u.get_jobtracker(cluster), wf_dir, wf_xml) jt_path = "%s:8021" % u.get_jobtracker(cluster).hostname nn_path = "hdfs://%s:8020" % u.get_namenode(cluster).hostname client = o.OozieClient(cluster["info"]["JobFlow"]["Oozie"] + "/oozie/") job_parameters = { "jobTracker": jt_path, "nameNode": nn_path, "user.name": "hadoop", "oozie.wf.application.path": "%s%s" % (nn_path, path_to_workflow), "oozie.use.system.libpath": "true", } oozie_job_id = client.add_job(x.create_hadoop_xml(job_parameters)) client.run_job(oozie_job_id) job_execution = conductor.job_execution_update( ctx, job_execution, {"oozie_job_id": oozie_job_id, "start_time": datetime.datetime.now()} ) return job_execution
def run_job(ctx, job_execution): cluster = conductor.cluster_get(ctx, job_execution.cluster_id) if cluster.status != 'Active': return job_execution job = conductor.job_get(ctx, job_execution.job_id) job_origin = conductor.job_origin_get(context.ctx(), job.job_origin_id) input_source = conductor.data_source_get(ctx, job_execution.input_id) output_source = conductor.data_source_get(ctx, job_execution.output_id) #TODO(nprivalova): should be removed after all features implemented validate(input_source, output_source, job) wf_dir = create_workflow_dir(u.get_jobtracker(cluster), job) upload_job_files(u.get_jobtracker(cluster), wf_dir, job_origin) if job.type == 'Hive': upload_hive_site(cluster, wf_dir) wf_xml = build_workflow_for_job(job.type, job_execution, job_origin, input_source, output_source) path_to_workflow = upload_workflow_file(u.get_jobtracker(cluster), wf_dir, wf_xml) jt_path = '%s:8021' % u.get_jobtracker(cluster).hostname nn_path = 'hdfs://%s:8020' % u.get_namenode(cluster).hostname client = o.OozieClient(cluster['info']['JobFlow']['Oozie'] + "/oozie/") job_parameters = {"jobTracker": jt_path, "nameNode": nn_path, "user.name": "hadoop", "oozie.wf.application.path": "%s%s" % (nn_path, path_to_workflow), "oozie.use.system.libpath": "true"} oozie_job_id = client.add_job(x.create_hadoop_xml(job_parameters)) client.run_job(oozie_job_id) job_execution = conductor.job_execution_update(ctx, job_execution, {'oozie_job_id': oozie_job_id, 'start_time': datetime.datetime.now()}) return job_execution
def test_create_hadoop_xml(self): conf = x.load_hadoop_xml_defaults( 'tests/unit/resources/test-default.xml') self.assertEqual( x.create_hadoop_xml({ 'name1': 'some_val1', 'name2': 2 }, conf), """<?xml version="1.0" ?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>name2</name> <value>2</value> </property> <property> <name>name1</name> <value>some_val1</value> </property> </configuration> """)
def run_job(ctx, job_execution): cluster = conductor.cluster_get(ctx, job_execution.cluster_id) if cluster.status != 'Active': return job_execution.status job = conductor.job_get(ctx, job_execution.job_id) job_origin = conductor.job_origin_get(context.ctx(), job.job_origin_id) input_source = conductor.data_source_get(ctx, job_execution.input_id) output_source = conductor.data_source_get(ctx, job_execution.output_id) #TODO(nprivalova): should be removed after all features implemented validate(input_source, output_source, job) wf_dir = create_workflow_dir(u.get_jobtracker(cluster), job) upload_job_file(u.get_jobtracker(cluster), wf_dir, job_origin, job) wf_xml = build_workflow_for_job(job.type, input_source, output_source) path_to_workflow = upload_workflow_file(u.get_jobtracker(cluster), wf_dir, wf_xml) jt_path = '%s:8021' % u.get_jobtracker(cluster).hostname nn_path = 'hdfs://%s:8020' % u.get_namenode(cluster).hostname client = o.OozieClient(cluster['info']['JobFlow']['Oozie'] + "/oozie/") job_parameters = { "jobTracker": jt_path, "nameNode": nn_path, "user.name": "hadoop", "oozie.wf.application.path": "%s%s" % (nn_path, path_to_workflow), "oozie.use.system.libpath": "true" } oozie_job_id = client.add_job(x.create_hadoop_xml(job_parameters)) client.run_job(oozie_job_id) job_execution = conductor.job_execution_update( ctx, job_execution, { 'oozie_job_id': oozie_job_id, 'start_time': datetime.datetime.now() }) return job_execution
def generate_xml_configs(configs, storage_path, nn_hostname, jt_hostname, oozie_hostname, hive_hostname): set_general_configs(hive_hostname) # inserting common configs depends on provisioned VMs and HDFS placement # TODO(aignatov): should be moved to cluster context cfg = { 'fs.default.name': 'hdfs://%s:8020' % nn_hostname, 'dfs.name.dir': extract_hadoop_path(storage_path, '/lib/hadoop/hdfs/namenode'), 'dfs.data.dir': extract_hadoop_path(storage_path, '/lib/hadoop/hdfs/datanode'), 'dfs.hosts': '/etc/hadoop/dn.incl', 'dfs.hosts.exclude': '/etc/hadoop/dn.excl', } if jt_hostname: mr_cfg = { 'mapred.job.tracker': '%s:8021' % jt_hostname, 'mapred.system.dir': extract_hadoop_path(storage_path, '/mapred/mapredsystem'), 'mapred.local.dir': extract_hadoop_path(storage_path, '/lib/hadoop/mapred'), 'mapred.hosts': '/etc/hadoop/tt.incl', 'mapred.hosts.exclude': '/etc/hadoop/tt.excl', } cfg.update(mr_cfg) if oozie_hostname: o_cfg = { 'hadoop.proxyuser.hadoop.hosts': "localhost," + oozie_hostname, 'hadoop.proxyuser.hadoop.groups': 'hadoop', } cfg.update(o_cfg) LOG.debug('Applied Oozie configs for core-site.xml') cfg.update(o_h.get_oozie_required_xml_configs()) LOG.debug('Applied Oozie configs for oozie-site.xml') if hive_hostname: h_cfg = { 'hive.warehouse.subdir.inherit.perms': True, 'javax.jdo.option.ConnectionURL': 'jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true' } cfg.update(h_cfg) LOG.debug('Applied Hive config for hive metastore server') # inserting user-defined configs for key, value in extract_xml_confs(configs): cfg[key] = value # applying swift configs if user enabled it swift_xml_confs = swift.get_swift_configs() cfg = generate_cfg_from_general(cfg, configs, GENERAL_CONFS) # invoking applied configs to appropriate xml files xml_configs = { 'core-site': x.create_hadoop_xml(cfg, CORE_DEFAULT + swift_xml_confs), 'mapred-site': x.create_hadoop_xml(cfg, MAPRED_DEFAULT), 'hdfs-site': x.create_hadoop_xml(cfg, HDFS_DEFAULT) } if hive_hostname: xml_configs.update({'hive-site': x.create_hadoop_xml(cfg, HIVE_DEFAULT)}) LOG.debug('Generated hive-site.xml for hive % s', hive_hostname) if oozie_hostname: xml_configs.update({'oozie-site': x.create_hadoop_xml(cfg, o_h.OOZIE_DEFAULT)}) LOG.debug('Generated oozie-site.xml for oozie % s', oozie_hostname) return xml_configs
def generate_xml_configs(configs, storage_path, nn_hostname, jt_hostname, oozie_hostname): # inserting common configs depends on provisioned VMs and HDFS placement # TODO(aignatov): should be moved to cluster context cfg = { 'fs.default.name': 'hdfs://%s:8020' % nn_hostname, 'dfs.name.dir': extract_hadoop_path(storage_path, '/lib/hadoop/hdfs/namenode'), 'dfs.data.dir': extract_hadoop_path(storage_path, '/lib/hadoop/hdfs/datanode'), 'dfs.hosts': '/etc/hadoop/dn.incl', 'dfs.hosts.exclude': '/etc/hadoop/dn.excl', } if jt_hostname: mr_cfg = { 'mapred.job.tracker': '%s:8021' % jt_hostname, 'mapred.system.dir': extract_hadoop_path(storage_path, '/mapred/mapredsystem'), 'mapred.local.dir': extract_hadoop_path(storage_path, '/lib/hadoop/mapred'), 'mapred.hosts': '/etc/hadoop/tt.incl', 'mapred.hosts.exclude': '/etc/hadoop/tt.excl', } cfg.update(mr_cfg) if oozie_hostname: o_cfg = { 'hadoop.proxyuser.hadoop.hosts': "localhost," + oozie_hostname, 'hadoop.proxyuser.hadoop.groups': 'hadoop', } cfg.update(o_cfg) LOG.debug('Applied Oozie configs for core-site.xml') cfg.update(o_h.get_oozie_required_xml_configs()) LOG.debug('Applied Oozie configs for oozie-site.xml') # inserting user-defined configs for key, value in extract_xml_confs(configs): cfg[key] = value # applying swift configs if user enabled it swift_xml_confs = [] #TODO(aignatov): should be changed. General configs not only Swift swift_in_config = False if ('general' in configs and ENABLE_SWIFT.name in configs['general']): swift_in_config = True if ((swift_in_config and configs['general'][ENABLE_SWIFT.name]) or (not swift_in_config and ENABLE_SWIFT.default_value)): swift_xml_confs = swift.get_swift_configs() cfg.update(extract_name_values(swift_xml_confs)) LOG.info("Swift integration is enabled") # invoking applied configs to appropriate xml files xml_configs = { 'core-site': x.create_hadoop_xml(cfg, CORE_DEFAULT + swift_xml_confs), 'mapred-site': x.create_hadoop_xml(cfg, MAPRED_DEFAULT), 'hdfs-site': x.create_hadoop_xml(cfg, HDFS_DEFAULT) } if oozie_hostname: xml_configs.update( {'oozie-site': x.create_hadoop_xml(cfg, o_h.OOZIE_DEFAULT)}) LOG.debug('Generated oozie-site.xml for oozie % s', oozie_hostname) return xml_configs
def generate_xml_configs(configs, storage_path, nn_hostname, jt_hostname, oozie_hostname, hive_hostname, passwd_hive_mysql): general_cfg = get_general_configs(hive_hostname, passwd_hive_mysql) # inserting common configs depends on provisioned VMs and HDFS placement # TODO(aignatov): should be moved to cluster context cfg = { 'fs.default.name': 'hdfs://%s:8020' % nn_hostname, 'dfs.name.dir': extract_hadoop_path(storage_path, '/lib/hadoop/hdfs/namenode'), 'dfs.data.dir': extract_hadoop_path(storage_path, '/lib/hadoop/hdfs/datanode'), 'dfs.hosts': '/etc/hadoop/dn.incl', 'dfs.hosts.exclude': '/etc/hadoop/dn.excl', } if jt_hostname: mr_cfg = { 'mapred.job.tracker': '%s:8021' % jt_hostname, 'mapred.system.dir': extract_hadoop_path(storage_path, '/mapred/mapredsystem'), 'mapred.local.dir': extract_hadoop_path(storage_path, '/lib/hadoop/mapred'), 'mapred.hosts': '/etc/hadoop/tt.incl', 'mapred.hosts.exclude': '/etc/hadoop/tt.excl', } cfg.update(mr_cfg) if oozie_hostname: o_cfg = { 'hadoop.proxyuser.hadoop.hosts': "localhost," + oozie_hostname, 'hadoop.proxyuser.hadoop.groups': 'hadoop', } cfg.update(o_cfg) LOG.debug('Applied Oozie configs for core-site.xml') cfg.update(o_h.get_oozie_required_xml_configs()) LOG.debug('Applied Oozie configs for oozie-site.xml') if hive_hostname: h_cfg = { 'hive.warehouse.subdir.inherit.perms': True, 'javax.jdo.option.ConnectionURL': 'jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true' } cfg.update(h_cfg) LOG.debug('Applied Hive config for hive metastore server') # inserting user-defined configs for key, value in extract_xml_confs(configs): cfg[key] = value # applying swift configs if user enabled it swift_xml_confs = swift.get_swift_configs() cfg = generate_cfg_from_general(cfg, configs, general_cfg) # invoking applied configs to appropriate xml files core_all = CORE_DEFAULT + swift_xml_confs mapred_all = MAPRED_DEFAULT if CONF.enable_data_locality: cfg.update(topology.TOPOLOGY_CONFIG) # applying vm awareness configs core_all += topology.vm_awareness_core_config() mapred_all += topology.vm_awareness_mapred_config() xml_configs = { 'core-site': x.create_hadoop_xml(cfg, core_all), 'mapred-site': x.create_hadoop_xml(cfg, mapred_all), 'hdfs-site': x.create_hadoop_xml(cfg, HDFS_DEFAULT) } if hive_hostname: xml_configs.update( {'hive-site': x.create_hadoop_xml(cfg, HIVE_DEFAULT)}) LOG.debug('Generated hive-site.xml for hive % s', hive_hostname) if oozie_hostname: xml_configs.update( {'oozie-site': x.create_hadoop_xml(cfg, o_h.OOZIE_DEFAULT)}) LOG.debug('Generated oozie-site.xml for oozie % s', oozie_hostname) return xml_configs
def run_job(job_execution): ctx = context.ctx() cluster = conductor.cluster_get(ctx, job_execution.cluster_id) if cluster.status != 'Active': return job_execution job = conductor.job_get(ctx, job_execution.job_id) if not edp.compare_job_type(job.type, 'Java'): input_source = conductor.data_source_get(ctx, job_execution.input_id) output_source = conductor.data_source_get(ctx, job_execution.output_id) else: input_source = None output_source = None #TODO(nprivalova): should be removed after all features implemented validate(input_source, output_source, job) for data_source in [input_source, output_source]: if data_source and data_source.type == 'hdfs': h.configure_cluster_for_hdfs(cluster, data_source) hdfs_user = _get_hdfs_user(cluster) oozie_server = _get_oozie_server(cluster) wf_dir = create_workflow_dir(oozie_server, job, hdfs_user) upload_job_files(oozie_server, wf_dir, job, hdfs_user) creator = workflow_factory.get_creator(job) # Do other job type specific setup here, for example # uploading hive configuration creator.configure_workflow_if_needed(cluster, wf_dir) wf_xml = creator.get_workflow_xml(job_execution, input_source, output_source) path_to_workflow = upload_workflow_file(oozie_server, wf_dir, wf_xml, hdfs_user) rm_path = _get_resource_manager_path(cluster) nn_path = cluster['info']['HDFS']['NameNode'] client = o.OozieClient(cluster['info']['JobFlow']['Oozie'] + "/oozie/", _get_oozie_server(cluster)) job_parameters = { "jobTracker": rm_path, "nameNode": nn_path, "user.name": hdfs_user, "oozie.wf.application.path": "%s%s" % (nn_path, path_to_workflow), "oozie.use.system.libpath": "true" } oozie_job_id = client.add_job(x.create_hadoop_xml(job_parameters), job_execution) job_execution = conductor.job_execution_update( ctx, job_execution, { 'oozie_job_id': oozie_job_id, 'start_time': datetime.datetime.now() }) client.run_job(job_execution, oozie_job_id) return job_execution