Example #1
0
def generate_xml_configs(configs, storage_path, nn_hostname,
                         jt_hostname, oozies_hostnames):
    # inserting common configs depends on provisioned VMs and HDFS placement
    # TODO(aignatov): should be moved to cluster context
    cfg = {
        'fs.default.name': 'hdfs://%s:8020' % nn_hostname,
        'dfs.name.dir': extract_hadoop_path(storage_path,
                                            '/lib/hadoop/hdfs/namenode'),
        'dfs.data.dir': extract_hadoop_path(storage_path,
                                            '/lib/hadoop/hdfs/datanode'),
        'dfs.hosts': '/etc/hadoop/dn.incl',
        'dfs.hosts.exclude': '/etc/hadoop/dn.excl',
    }

    if jt_hostname:
        mr_cfg = {
            'mapred.job.tracker': '%s:8021' % jt_hostname,
            'mapred.system.dir': extract_hadoop_path(storage_path,
                                                     '/mapred/mapredsystem'),
            'mapred.local.dir': extract_hadoop_path(storage_path,
                                                    '/lib/hadoop/mapred'),
            'mapred.hosts': '/etc/hadoop/tt.incl',
            'mapred.hosts.exclude': '/etc/hadoop/tt.excl',
        }
        cfg.update(mr_cfg)

    if oozies_hostnames:
        o_cfg = {
            'hadoop.proxyuser.hadoop.hosts': ",".join(
                ["localhost"] + oozies_hostnames),
            'hadoop.proxyuser.hadoop.groups': 'hadoop'
        }
        cfg.update(o_cfg)
        LOG.debug('Applied Oozie configs for core-site.xml')

    # inserting user-defined configs
    for key, value in extract_xml_confs(configs):
        cfg[key] = value

    # applying swift configs if user enabled it
    swift_xml_confs = []
    #TODO(aignatov): should be changed. General configs not only Swift
    swift_in_config = False
    if ('general' in configs and
            ENABLE_SWIFT.name in configs['general']):
        swift_in_config = True
    if ((swift_in_config and configs['general'][ENABLE_SWIFT.name]) or
            (not swift_in_config and ENABLE_SWIFT.default_value)):
        swift_xml_confs = swift.get_swift_configs()
        cfg.update(extract_name_values(swift_xml_confs))
        LOG.info("Swift integration is enabled")

    # invoking applied configs to appropriate xml files
    xml_configs = {
        'core-site': x.create_hadoop_xml(cfg, CORE_DEFAULT + swift_xml_confs),
        'mapred-site': x.create_hadoop_xml(cfg, MAPRED_DEFAULT),
        'hdfs-site': x.create_hadoop_xml(cfg, HDFS_DEFAULT)
    }

    return xml_configs
Example #2
0
def generate_xml_configs(configs, storage_path, nn_hostname, jt_hostname, oozie_hostname):
    # inserting common configs depends on provisioned VMs and HDFS placement
    # TODO(aignatov): should be moved to cluster context
    cfg = {
        "fs.default.name": "hdfs://%s:8020" % nn_hostname,
        "dfs.name.dir": extract_hadoop_path(storage_path, "/lib/hadoop/hdfs/namenode"),
        "dfs.data.dir": extract_hadoop_path(storage_path, "/lib/hadoop/hdfs/datanode"),
        "dfs.hosts": "/etc/hadoop/dn.incl",
        "dfs.hosts.exclude": "/etc/hadoop/dn.excl",
    }

    if jt_hostname:
        mr_cfg = {
            "mapred.job.tracker": "%s:8021" % jt_hostname,
            "mapred.system.dir": extract_hadoop_path(storage_path, "/mapred/mapredsystem"),
            "mapred.local.dir": extract_hadoop_path(storage_path, "/lib/hadoop/mapred"),
            "mapred.hosts": "/etc/hadoop/tt.incl",
            "mapred.hosts.exclude": "/etc/hadoop/tt.excl",
        }
        cfg.update(mr_cfg)

    if oozie_hostname:
        o_cfg = {
            "hadoop.proxyuser.hadoop.hosts": "localhost," + oozie_hostname,
            "hadoop.proxyuser.hadoop.groups": "hadoop",
        }
        cfg.update(o_cfg)
        LOG.debug("Applied Oozie configs for core-site.xml")
        cfg.update(o_h.get_oozie_required_xml_configs())
        LOG.debug("Applied Oozie configs for oozie-site.xml")

    # inserting user-defined configs
    for key, value in extract_xml_confs(configs):
        cfg[key] = value

    # applying swift configs if user enabled it
    swift_xml_confs = []
    # TODO(aignatov): should be changed. General configs not only Swift
    swift_in_config = False
    if "general" in configs and ENABLE_SWIFT.name in configs["general"]:
        swift_in_config = True
    if (swift_in_config and configs["general"][ENABLE_SWIFT.name]) or (
        not swift_in_config and ENABLE_SWIFT.default_value
    ):
        swift_xml_confs = swift.get_swift_configs()
        cfg.update(extract_name_values(swift_xml_confs))
        LOG.info("Swift integration is enabled")

    # invoking applied configs to appropriate xml files
    xml_configs = {
        "core-site": x.create_hadoop_xml(cfg, CORE_DEFAULT + swift_xml_confs),
        "mapred-site": x.create_hadoop_xml(cfg, MAPRED_DEFAULT),
        "hdfs-site": x.create_hadoop_xml(cfg, HDFS_DEFAULT),
    }

    if oozie_hostname:
        xml_configs.update({"oozie-site": x.create_hadoop_xml(cfg, o_h.OOZIE_DEFAULT)})
        LOG.debug("Generated oozie-site.xml for oozie % s", oozie_hostname)

    return xml_configs
Example #3
0
def generate_xml_configs(cluster, node_group, hive_mysql_passwd):
    oozie_hostname = _get_hostname(utils.get_oozie(cluster))
    hive_hostname = _get_hostname(utils.get_hiveserver(cluster))

    ng_configs = node_group.configuration()

    general_cfg = get_general_configs(hive_hostname, hive_mysql_passwd)

    all_cfg = generate_savanna_configs(cluster, node_group)

    # inserting user-defined configs
    for key, value in extract_xml_confs(ng_configs):
        all_cfg[key] = value

    # applying swift configs if user enabled it
    swift_xml_confs = swift.get_swift_configs()
    all_cfg = generate_cfg_from_general(all_cfg, ng_configs, general_cfg)

    # invoking applied configs to appropriate xml files
    core_all = CORE_DEFAULT + swift_xml_confs
    mapred_all = MAPRED_DEFAULT

    if CONF.enable_data_locality:
        all_cfg.update(topology.TOPOLOGY_CONFIG)

        # applying vm awareness configs
        core_all += topology.vm_awareness_core_config()
        mapred_all += topology.vm_awareness_mapred_config()

    xml_configs = {
        'core-site': x.create_hadoop_xml(all_cfg, core_all),
        'mapred-site': x.create_hadoop_xml(all_cfg, mapred_all),
        'hdfs-site': x.create_hadoop_xml(all_cfg, HDFS_DEFAULT)
    }

    if hive_hostname:
        xml_configs.update({'hive-site':
                            x.create_hadoop_xml(all_cfg, HIVE_DEFAULT)})
        LOG.debug('Generated hive-site.xml for hive % s', hive_hostname)

    if oozie_hostname:
        xml_configs.update({'oozie-site':
                            x.create_hadoop_xml(all_cfg, o_h.OOZIE_DEFAULT)})
        LOG.debug('Generated oozie-site.xml for oozie % s', oozie_hostname)

    return xml_configs
Example #4
0
def generate_xml_configs(cluster, node_group, hive_mysql_passwd):
    oozie_hostname = _get_hostname(utils.get_oozie(cluster))
    hive_hostname = _get_hostname(utils.get_hiveserver(cluster))

    ng_configs = node_group.configuration()

    general_cfg = get_general_configs(hive_hostname, hive_mysql_passwd)

    all_cfg = generate_savanna_configs(cluster, node_group)

    # inserting user-defined configs
    for key, value in extract_xml_confs(ng_configs):
        all_cfg[key] = value

    # applying swift configs if user enabled it
    swift_xml_confs = swift.get_swift_configs()
    all_cfg = generate_cfg_from_general(all_cfg, ng_configs, general_cfg)

    # invoking applied configs to appropriate xml files
    core_all = CORE_DEFAULT + swift_xml_confs
    mapred_all = MAPRED_DEFAULT

    if CONF.enable_data_locality:
        all_cfg.update(topology.TOPOLOGY_CONFIG)

        # applying vm awareness configs
        core_all += topology.vm_awareness_core_config()
        mapred_all += topology.vm_awareness_mapred_config()

    xml_configs = {
        'core-site': x.create_hadoop_xml(all_cfg, core_all),
        'mapred-site': x.create_hadoop_xml(all_cfg, mapred_all),
        'hdfs-site': x.create_hadoop_xml(all_cfg, HDFS_DEFAULT)
    }

    if hive_hostname:
        xml_configs.update(
            {'hive-site': x.create_hadoop_xml(all_cfg, HIVE_DEFAULT)})
        LOG.debug('Generated hive-site.xml for hive % s', hive_hostname)

    if oozie_hostname:
        xml_configs.update(
            {'oozie-site': x.create_hadoop_xml(all_cfg, o_h.OOZIE_DEFAULT)})
        LOG.debug('Generated oozie-site.xml for oozie % s', oozie_hostname)

    return xml_configs
Example #5
0
def run_job(job_execution):
    ctx = context.ctx()

    cluster = conductor.cluster_get(ctx, job_execution.cluster_id)
    if cluster.status != 'Active':
        return job_execution

    job = conductor.job_get(ctx, job_execution.job_id)
    input_source = conductor.data_source_get(ctx, job_execution.input_id)
    output_source = conductor.data_source_get(ctx, job_execution.output_id)
    #TODO(nprivalova): should be removed after all features implemented
    validate(input_source, output_source, job)

    plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name)
    hdfs_user = plugin.get_hdfs_user()
    wf_dir = create_workflow_dir(u.get_jobtracker(cluster), job, hdfs_user)
    upload_job_files(u.get_jobtracker(cluster), wf_dir, job, hdfs_user)

    creator = workflow_factory.get_creator(job)

    # Do other job type specific setup here, for example
    # uploading hive configuration
    creator.configure_workflow_if_needed(cluster, wf_dir)

    wf_xml = creator.get_workflow_xml(job_execution.job_configs, input_source,
                                      output_source)

    path_to_workflow = upload_workflow_file(u.get_jobtracker(cluster), wf_dir,
                                            wf_xml, hdfs_user)

    jt_path = '%s:8021' % u.get_jobtracker(cluster).hostname
    nn_path = 'hdfs://%s:8020' % u.get_namenode(cluster).hostname

    client = o.OozieClient(cluster['info']['JobFlow']['Oozie'] + "/oozie/")
    job_parameters = {
        "jobTracker": jt_path,
        "nameNode": nn_path,
        "user.name": "hadoop",
        "oozie.wf.application.path": "%s%s" % (nn_path, path_to_workflow),
        "oozie.use.system.libpath": "true"
    }

    oozie_job_id = client.add_job(x.create_hadoop_xml(job_parameters))
    client.run_job(oozie_job_id)
    job_execution = conductor.job_execution_update(
        ctx, job_execution, {
            'oozie_job_id': oozie_job_id,
            'start_time': datetime.datetime.now()
        })

    return job_execution
Example #6
0
def run_job(job_execution):
    ctx = context.ctx()

    cluster = conductor.cluster_get(ctx, job_execution.cluster_id)
    if cluster.status != 'Active':
        return job_execution

    job = conductor.job_get(ctx, job_execution.job_id)

    input_source = conductor.data_source_get(ctx,  job_execution.input_id)
    output_source = conductor.data_source_get(ctx,  job_execution.output_id)
    #TODO(nprivalova): should be removed after all features implemented
    validate(input_source, output_source, job)

    plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name)
    hdfs_user = plugin.get_hdfs_user()
    wf_dir = create_workflow_dir(u.get_jobtracker(cluster), job, hdfs_user)
    upload_job_files(u.get_jobtracker(cluster), wf_dir, job, hdfs_user)

    creator = workflow_factory.get_creator(job)

    # Do other job type specific setup here, for example
    # uploading hive configuration
    creator.configure_workflow_if_needed(cluster, wf_dir)

    wf_xml = creator.get_workflow_xml(job_execution.job_configs,
                                      input_source, output_source)

    path_to_workflow = upload_workflow_file(u.get_jobtracker(cluster),
                                            wf_dir, wf_xml, hdfs_user)

    jt_path = cluster['info']['MapReduce']['JobTracker']
    nn_path = cluster['info']['HDFS']['NameNode']

    client = o.OozieClient(cluster['info']['JobFlow']['Oozie'] + "/oozie/")
    job_parameters = {"jobTracker": jt_path,
                      "nameNode": nn_path,
                      "user.name": hdfs_user,
                      "oozie.wf.application.path":
                      "%s%s" % (nn_path, path_to_workflow),
                      "oozie.use.system.libpath": "true"}

    oozie_job_id = client.add_job(x.create_hadoop_xml(job_parameters))
    client.run_job(oozie_job_id)
    job_execution = conductor.job_execution_update(ctx, job_execution,
                                                   {'oozie_job_id':
                                                    oozie_job_id,
                                                    'start_time':
                                                    datetime.datetime.now()})

    return job_execution
Example #7
0
    def test_create_hadoop_xml(self):
        conf = x.load_hadoop_xml_defaults(
            'tests/unit/resources/test-default.xml')
        self.assertEquals(x.create_hadoop_xml({'name1': 'some_val1',
                                               'name2': 2}, conf),
                          """<?xml version="1.0" ?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
  <property>
    <name>name2</name>
    <value>2</value>
  </property>
  <property>
    <name>name1</name>
    <value>some_val1</value>
  </property>
</configuration>
""")
Example #8
0
def run_job(ctx, job_execution):
    cluster = conductor.cluster_get(ctx, job_execution.cluster_id)
    if cluster.status != "Active":
        return job_execution

    job = conductor.job_get(ctx, job_execution.job_id)
    job_origin = conductor.job_origin_get(context.ctx(), job.job_origin_id)
    input_source = conductor.data_source_get(ctx, job_execution.input_id)
    output_source = conductor.data_source_get(ctx, job_execution.output_id)
    # TODO(nprivalova): should be removed after all features implemented
    validate(input_source, output_source, job)

    wf_dir = create_workflow_dir(u.get_jobtracker(cluster), job)
    upload_job_files(u.get_jobtracker(cluster), wf_dir, job_origin)

    creator = workflow_factory.get_creator(job.type, job_origin)

    # Do other job type specific setup here, for example
    # uploading hive configuration
    creator.configure_workflow_if_needed(cluster, wf_dir)

    wf_xml = creator.get_workflow_xml(job_execution.job_configs, input_source, output_source)

    path_to_workflow = upload_workflow_file(u.get_jobtracker(cluster), wf_dir, wf_xml)

    jt_path = "%s:8021" % u.get_jobtracker(cluster).hostname
    nn_path = "hdfs://%s:8020" % u.get_namenode(cluster).hostname

    client = o.OozieClient(cluster["info"]["JobFlow"]["Oozie"] + "/oozie/")
    job_parameters = {
        "jobTracker": jt_path,
        "nameNode": nn_path,
        "user.name": "hadoop",
        "oozie.wf.application.path": "%s%s" % (nn_path, path_to_workflow),
        "oozie.use.system.libpath": "true",
    }

    oozie_job_id = client.add_job(x.create_hadoop_xml(job_parameters))
    client.run_job(oozie_job_id)
    job_execution = conductor.job_execution_update(
        ctx, job_execution, {"oozie_job_id": oozie_job_id, "start_time": datetime.datetime.now()}
    )

    return job_execution
Example #9
0
def run_job(ctx, job_execution):
    cluster = conductor.cluster_get(ctx, job_execution.cluster_id)
    if cluster.status != 'Active':
        return job_execution

    job = conductor.job_get(ctx, job_execution.job_id)
    job_origin = conductor.job_origin_get(context.ctx(), job.job_origin_id)
    input_source = conductor.data_source_get(ctx,  job_execution.input_id)
    output_source = conductor.data_source_get(ctx,  job_execution.output_id)
    #TODO(nprivalova): should be removed after all features implemented
    validate(input_source, output_source, job)

    wf_dir = create_workflow_dir(u.get_jobtracker(cluster), job)
    upload_job_files(u.get_jobtracker(cluster), wf_dir, job_origin)

    if job.type == 'Hive':
        upload_hive_site(cluster, wf_dir)

    wf_xml = build_workflow_for_job(job.type, job_execution, job_origin,
                                    input_source, output_source)
    path_to_workflow = upload_workflow_file(u.get_jobtracker(cluster),
                                            wf_dir, wf_xml)

    jt_path = '%s:8021' % u.get_jobtracker(cluster).hostname
    nn_path = 'hdfs://%s:8020' % u.get_namenode(cluster).hostname

    client = o.OozieClient(cluster['info']['JobFlow']['Oozie'] + "/oozie/")
    job_parameters = {"jobTracker": jt_path,
                      "nameNode": nn_path,
                      "user.name": "hadoop",
                      "oozie.wf.application.path":
                      "%s%s" % (nn_path, path_to_workflow),
                      "oozie.use.system.libpath": "true"}

    oozie_job_id = client.add_job(x.create_hadoop_xml(job_parameters))
    client.run_job(oozie_job_id)
    job_execution = conductor.job_execution_update(ctx, job_execution,
                                                   {'oozie_job_id':
                                                   oozie_job_id,
                                                    'start_time':
                                                    datetime.datetime.now()})

    return job_execution
Example #10
0
    def test_create_hadoop_xml(self):
        conf = x.load_hadoop_xml_defaults(
            'tests/unit/resources/test-default.xml')
        self.assertEqual(
            x.create_hadoop_xml({
                'name1': 'some_val1',
                'name2': 2
            }, conf), """<?xml version="1.0" ?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
  <property>
    <name>name2</name>
    <value>2</value>
  </property>
  <property>
    <name>name1</name>
    <value>some_val1</value>
  </property>
</configuration>
""")
Example #11
0
def run_job(ctx, job_execution):
    cluster = conductor.cluster_get(ctx, job_execution.cluster_id)
    if cluster.status != 'Active':
        return job_execution.status

    job = conductor.job_get(ctx, job_execution.job_id)
    job_origin = conductor.job_origin_get(context.ctx(), job.job_origin_id)
    input_source = conductor.data_source_get(ctx, job_execution.input_id)
    output_source = conductor.data_source_get(ctx, job_execution.output_id)
    #TODO(nprivalova): should be removed after all features implemented
    validate(input_source, output_source, job)

    wf_dir = create_workflow_dir(u.get_jobtracker(cluster), job)
    upload_job_file(u.get_jobtracker(cluster), wf_dir, job_origin, job)

    wf_xml = build_workflow_for_job(job.type, input_source, output_source)
    path_to_workflow = upload_workflow_file(u.get_jobtracker(cluster), wf_dir,
                                            wf_xml)

    jt_path = '%s:8021' % u.get_jobtracker(cluster).hostname
    nn_path = 'hdfs://%s:8020' % u.get_namenode(cluster).hostname

    client = o.OozieClient(cluster['info']['JobFlow']['Oozie'] + "/oozie/")
    job_parameters = {
        "jobTracker": jt_path,
        "nameNode": nn_path,
        "user.name": "hadoop",
        "oozie.wf.application.path": "%s%s" % (nn_path, path_to_workflow),
        "oozie.use.system.libpath": "true"
    }

    oozie_job_id = client.add_job(x.create_hadoop_xml(job_parameters))
    client.run_job(oozie_job_id)
    job_execution = conductor.job_execution_update(
        ctx, job_execution, {
            'oozie_job_id': oozie_job_id,
            'start_time': datetime.datetime.now()
        })

    return job_execution
Example #12
0
def generate_xml_configs(configs, storage_path, nn_hostname,
                         jt_hostname, oozie_hostname, hive_hostname):
    set_general_configs(hive_hostname)
    # inserting common configs depends on provisioned VMs and HDFS placement
    # TODO(aignatov): should be moved to cluster context
    cfg = {
        'fs.default.name': 'hdfs://%s:8020' % nn_hostname,
        'dfs.name.dir': extract_hadoop_path(storage_path,
                                            '/lib/hadoop/hdfs/namenode'),
        'dfs.data.dir': extract_hadoop_path(storage_path,
                                            '/lib/hadoop/hdfs/datanode'),
        'dfs.hosts': '/etc/hadoop/dn.incl',
        'dfs.hosts.exclude': '/etc/hadoop/dn.excl',
    }

    if jt_hostname:
        mr_cfg = {
            'mapred.job.tracker': '%s:8021' % jt_hostname,
            'mapred.system.dir': extract_hadoop_path(storage_path,
                                                     '/mapred/mapredsystem'),
            'mapred.local.dir': extract_hadoop_path(storage_path,
                                                    '/lib/hadoop/mapred'),
            'mapred.hosts': '/etc/hadoop/tt.incl',
            'mapred.hosts.exclude': '/etc/hadoop/tt.excl',
        }
        cfg.update(mr_cfg)

    if oozie_hostname:
        o_cfg = {
            'hadoop.proxyuser.hadoop.hosts': "localhost," + oozie_hostname,
            'hadoop.proxyuser.hadoop.groups': 'hadoop',
        }
        cfg.update(o_cfg)
        LOG.debug('Applied Oozie configs for core-site.xml')
        cfg.update(o_h.get_oozie_required_xml_configs())
        LOG.debug('Applied Oozie configs for oozie-site.xml')

    if hive_hostname:
        h_cfg = {
            'hive.warehouse.subdir.inherit.perms': True,
            'javax.jdo.option.ConnectionURL':
            'jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true'
        }
        cfg.update(h_cfg)
        LOG.debug('Applied Hive config for hive metastore server')

    # inserting user-defined configs
    for key, value in extract_xml_confs(configs):
        cfg[key] = value

    # applying swift configs if user enabled it
    swift_xml_confs = swift.get_swift_configs()
    cfg = generate_cfg_from_general(cfg, configs, GENERAL_CONFS)
    # invoking applied configs to appropriate xml files
    xml_configs = {
        'core-site': x.create_hadoop_xml(cfg, CORE_DEFAULT + swift_xml_confs),
        'mapred-site': x.create_hadoop_xml(cfg, MAPRED_DEFAULT),
        'hdfs-site': x.create_hadoop_xml(cfg, HDFS_DEFAULT)
    }

    if hive_hostname:
        xml_configs.update({'hive-site':
                            x.create_hadoop_xml(cfg, HIVE_DEFAULT)})
        LOG.debug('Generated hive-site.xml for hive % s', hive_hostname)

    if oozie_hostname:
        xml_configs.update({'oozie-site':
                            x.create_hadoop_xml(cfg, o_h.OOZIE_DEFAULT)})
        LOG.debug('Generated oozie-site.xml for oozie % s', oozie_hostname)

    return xml_configs
Example #13
0
def generate_xml_configs(configs, storage_path, nn_hostname, jt_hostname,
                         oozie_hostname):
    # inserting common configs depends on provisioned VMs and HDFS placement
    # TODO(aignatov): should be moved to cluster context
    cfg = {
        'fs.default.name':
        'hdfs://%s:8020' % nn_hostname,
        'dfs.name.dir':
        extract_hadoop_path(storage_path, '/lib/hadoop/hdfs/namenode'),
        'dfs.data.dir':
        extract_hadoop_path(storage_path, '/lib/hadoop/hdfs/datanode'),
        'dfs.hosts':
        '/etc/hadoop/dn.incl',
        'dfs.hosts.exclude':
        '/etc/hadoop/dn.excl',
    }

    if jt_hostname:
        mr_cfg = {
            'mapred.job.tracker':
            '%s:8021' % jt_hostname,
            'mapred.system.dir':
            extract_hadoop_path(storage_path, '/mapred/mapredsystem'),
            'mapred.local.dir':
            extract_hadoop_path(storage_path, '/lib/hadoop/mapred'),
            'mapred.hosts':
            '/etc/hadoop/tt.incl',
            'mapred.hosts.exclude':
            '/etc/hadoop/tt.excl',
        }
        cfg.update(mr_cfg)

    if oozie_hostname:
        o_cfg = {
            'hadoop.proxyuser.hadoop.hosts': "localhost," + oozie_hostname,
            'hadoop.proxyuser.hadoop.groups': 'hadoop',
        }
        cfg.update(o_cfg)
        LOG.debug('Applied Oozie configs for core-site.xml')
        cfg.update(o_h.get_oozie_required_xml_configs())
        LOG.debug('Applied Oozie configs for oozie-site.xml')

    # inserting user-defined configs
    for key, value in extract_xml_confs(configs):
        cfg[key] = value

    # applying swift configs if user enabled it
    swift_xml_confs = []
    #TODO(aignatov): should be changed. General configs not only Swift
    swift_in_config = False
    if ('general' in configs and ENABLE_SWIFT.name in configs['general']):
        swift_in_config = True
    if ((swift_in_config and configs['general'][ENABLE_SWIFT.name])
            or (not swift_in_config and ENABLE_SWIFT.default_value)):
        swift_xml_confs = swift.get_swift_configs()
        cfg.update(extract_name_values(swift_xml_confs))
        LOG.info("Swift integration is enabled")

    # invoking applied configs to appropriate xml files
    xml_configs = {
        'core-site': x.create_hadoop_xml(cfg, CORE_DEFAULT + swift_xml_confs),
        'mapred-site': x.create_hadoop_xml(cfg, MAPRED_DEFAULT),
        'hdfs-site': x.create_hadoop_xml(cfg, HDFS_DEFAULT)
    }

    if oozie_hostname:
        xml_configs.update(
            {'oozie-site': x.create_hadoop_xml(cfg, o_h.OOZIE_DEFAULT)})
        LOG.debug('Generated oozie-site.xml for oozie % s', oozie_hostname)

    return xml_configs
Example #14
0
def generate_xml_configs(configs, storage_path, nn_hostname, jt_hostname,
                         oozie_hostname, hive_hostname, passwd_hive_mysql):
    general_cfg = get_general_configs(hive_hostname, passwd_hive_mysql)
    # inserting common configs depends on provisioned VMs and HDFS placement
    # TODO(aignatov): should be moved to cluster context
    cfg = {
        'fs.default.name':
        'hdfs://%s:8020' % nn_hostname,
        'dfs.name.dir':
        extract_hadoop_path(storage_path, '/lib/hadoop/hdfs/namenode'),
        'dfs.data.dir':
        extract_hadoop_path(storage_path, '/lib/hadoop/hdfs/datanode'),
        'dfs.hosts':
        '/etc/hadoop/dn.incl',
        'dfs.hosts.exclude':
        '/etc/hadoop/dn.excl',
    }

    if jt_hostname:
        mr_cfg = {
            'mapred.job.tracker':
            '%s:8021' % jt_hostname,
            'mapred.system.dir':
            extract_hadoop_path(storage_path, '/mapred/mapredsystem'),
            'mapred.local.dir':
            extract_hadoop_path(storage_path, '/lib/hadoop/mapred'),
            'mapred.hosts':
            '/etc/hadoop/tt.incl',
            'mapred.hosts.exclude':
            '/etc/hadoop/tt.excl',
        }
        cfg.update(mr_cfg)

    if oozie_hostname:
        o_cfg = {
            'hadoop.proxyuser.hadoop.hosts': "localhost," + oozie_hostname,
            'hadoop.proxyuser.hadoop.groups': 'hadoop',
        }
        cfg.update(o_cfg)
        LOG.debug('Applied Oozie configs for core-site.xml')
        cfg.update(o_h.get_oozie_required_xml_configs())
        LOG.debug('Applied Oozie configs for oozie-site.xml')

    if hive_hostname:
        h_cfg = {
            'hive.warehouse.subdir.inherit.perms':
            True,
            'javax.jdo.option.ConnectionURL':
            'jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true'
        }
        cfg.update(h_cfg)
        LOG.debug('Applied Hive config for hive metastore server')

    # inserting user-defined configs
    for key, value in extract_xml_confs(configs):
        cfg[key] = value

    # applying swift configs if user enabled it
    swift_xml_confs = swift.get_swift_configs()
    cfg = generate_cfg_from_general(cfg, configs, general_cfg)

    # invoking applied configs to appropriate xml files
    core_all = CORE_DEFAULT + swift_xml_confs
    mapred_all = MAPRED_DEFAULT

    if CONF.enable_data_locality:
        cfg.update(topology.TOPOLOGY_CONFIG)

        # applying vm awareness configs
        core_all += topology.vm_awareness_core_config()
        mapred_all += topology.vm_awareness_mapred_config()

    xml_configs = {
        'core-site': x.create_hadoop_xml(cfg, core_all),
        'mapred-site': x.create_hadoop_xml(cfg, mapred_all),
        'hdfs-site': x.create_hadoop_xml(cfg, HDFS_DEFAULT)
    }

    if hive_hostname:
        xml_configs.update(
            {'hive-site': x.create_hadoop_xml(cfg, HIVE_DEFAULT)})
        LOG.debug('Generated hive-site.xml for hive % s', hive_hostname)

    if oozie_hostname:
        xml_configs.update(
            {'oozie-site': x.create_hadoop_xml(cfg, o_h.OOZIE_DEFAULT)})
        LOG.debug('Generated oozie-site.xml for oozie % s', oozie_hostname)

    return xml_configs
Example #15
0
def run_job(job_execution):
    ctx = context.ctx()

    cluster = conductor.cluster_get(ctx, job_execution.cluster_id)
    if cluster.status != 'Active':
        return job_execution

    job = conductor.job_get(ctx, job_execution.job_id)
    if not edp.compare_job_type(job.type, 'Java'):
        input_source = conductor.data_source_get(ctx, job_execution.input_id)
        output_source = conductor.data_source_get(ctx, job_execution.output_id)
    else:
        input_source = None
        output_source = None
    #TODO(nprivalova): should be removed after all features implemented
    validate(input_source, output_source, job)

    for data_source in [input_source, output_source]:
        if data_source and data_source.type == 'hdfs':
            h.configure_cluster_for_hdfs(cluster, data_source)

    hdfs_user = _get_hdfs_user(cluster)
    oozie_server = _get_oozie_server(cluster)
    wf_dir = create_workflow_dir(oozie_server, job, hdfs_user)
    upload_job_files(oozie_server, wf_dir, job, hdfs_user)

    creator = workflow_factory.get_creator(job)

    # Do other job type specific setup here, for example
    # uploading hive configuration
    creator.configure_workflow_if_needed(cluster, wf_dir)

    wf_xml = creator.get_workflow_xml(job_execution, input_source,
                                      output_source)

    path_to_workflow = upload_workflow_file(oozie_server, wf_dir, wf_xml,
                                            hdfs_user)

    rm_path = _get_resource_manager_path(cluster)
    nn_path = cluster['info']['HDFS']['NameNode']

    client = o.OozieClient(cluster['info']['JobFlow']['Oozie'] + "/oozie/",
                           _get_oozie_server(cluster))
    job_parameters = {
        "jobTracker": rm_path,
        "nameNode": nn_path,
        "user.name": hdfs_user,
        "oozie.wf.application.path": "%s%s" % (nn_path, path_to_workflow),
        "oozie.use.system.libpath": "true"
    }

    oozie_job_id = client.add_job(x.create_hadoop_xml(job_parameters),
                                  job_execution)
    job_execution = conductor.job_execution_update(
        ctx, job_execution, {
            'oozie_job_id': oozie_job_id,
            'start_time': datetime.datetime.now()
        })
    client.run_job(job_execution, oozie_job_id)

    return job_execution