Example #1
0
    def test_get_namenode(self):
        cl = tu.create_cluster('cl1', 't1', 'vanilla', '1.2.1',
                               [self.ng_manager, self.ng_namenode])
        self.assertEqual('nn1', u.get_namenode(cl).instance_id)

        cl = tu.create_cluster('cl1', 't1', 'vanilla', '1.2.1',
                               [self.ng_manager])
        self.assertIsNone(u.get_namenode(cl))
Example #2
0
    def test_get_namenode(self):
        cl = tu.create_cluster('cl1', 't1', 'vanilla', '2.6.0',
                               [self.ng_manager, self.ng_namenode])
        self.assertEqual('nn1', u.get_namenode(cl).instance_id)

        cl = tu.create_cluster('cl1', 't1', 'vanilla', '2.6.0',
                               [self.ng_manager])
        self.assertIsNone(u.get_namenode(cl))
Example #3
0
    def start_cluster(self, cluster):
        nn = vu.get_namenode(cluster)
        run.format_namenode(nn)
        run.start_hadoop_process(nn, 'namenode')

        for snn in vu.get_secondarynamenodes(cluster):
            run.start_hadoop_process(snn, 'secondarynamenode')

        rm = vu.get_resourcemanager(cluster)
        if rm:
            run.start_yarn_process(rm, 'resourcemanager')

        run.start_dn_nm_processes(utils.get_instances(cluster))

        run.await_datanodes(cluster)

        hs = vu.get_historyserver(cluster)
        if hs:
            run.start_historyserver(hs)

        oo = vu.get_oozie(cluster)
        if oo:
            run.start_oozie_process(self.pctx, oo)

        hiveserver = vu.get_hiveserver(cluster)
        if hiveserver:
            run.start_hiveserver_process(self.pctx, hiveserver)

        self._set_cluster_info(cluster)
Example #4
0
    def _set_cluster_info(self, cluster):
        nn = vu.get_namenode(cluster)
        rm = vu.get_resourcemanager(cluster)
        hs = vu.get_historyserver(cluster)
        oo = vu.get_oozie(cluster)

        info = {}

        if rm:
            info["YARN"] = {
                "Web UI": "http://%s:%s" % (rm.management_ip, "8088"),
                "ResourceManager": "http://%s:%s" % (rm.management_ip, "8032"),
            }

        if nn:
            info["HDFS"] = {
                "Web UI": "http://%s:%s" % (nn.management_ip, "50070"),
                "NameNode": "hdfs://%s:%s" % (nn.hostname(), "9000"),
            }

        if oo:
            info["JobFlow"] = {"Oozie": "http://%s:%s" % (oo.management_ip, "11000")}

        if hs:
            info["MapReduce JobHistory Server"] = {"Web UI": "http://%s:%s" % (hs.management_ip, "19888")}

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {"info": info})
Example #5
0
    def _set_cluster_info(self, cluster):
        nn = vu.get_namenode(cluster)
        jt = vu.get_jobtracker(cluster)
        oozie = vu.get_oozie(cluster)
        info = {}

        if jt:
            ui_port = c_helper.get_port_from_config("MapReduce", "mapred.job.tracker.http.address", cluster)
            jt_port = c_helper.get_port_from_config("MapReduce", "mapred.job.tracker", cluster)

            info["MapReduce"] = {
                "Web UI": "http://%s:%s" % (jt.management_ip, ui_port),
                "JobTracker": "%s:%s" % (jt.hostname(), jt_port),
            }

        if nn:
            ui_port = c_helper.get_port_from_config("HDFS", "dfs.http.address", cluster)
            nn_port = c_helper.get_port_from_config("HDFS", "fs.default.name", cluster)

            info["HDFS"] = {
                "Web UI": "http://%s:%s" % (nn.management_ip, ui_port),
                "NameNode": "hdfs://%s:%s" % (nn.hostname(), nn_port),
            }

        if oozie:
            # TODO(yrunts) change from hardcode value
            info["JobFlow"] = {"Oozie": "http://%s:11000" % oozie.management_ip}

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {"info": info})
Example #6
0
    def start_cluster(self, cluster):
        nn = vu.get_namenode(cluster)
        run.format_namenode(nn)
        run.start_hadoop_process(nn, 'namenode')

        for snn in vu.get_secondarynamenodes(cluster):
            run.start_hadoop_process(snn, 'secondarynamenode')

        rm = vu.get_resourcemanager(cluster)
        run.start_yarn_process(rm, 'resourcemanager')

        for dn in vu.get_datanodes(cluster):
            run.start_hadoop_process(dn, 'datanode')

        run.await_datanodes(cluster)

        for nm in vu.get_nodemanagers(cluster):
            run.start_yarn_process(nm, 'nodemanager')

        hs = vu.get_historyserver(cluster)
        if hs:
            run.start_historyserver(hs)

        oo = vu.get_oozie(cluster)
        if oo:
            run.start_oozie_process(oo)

        self._set_cluster_info(cluster)
Example #7
0
    def _set_cluster_info(self, cluster):
        nn = vu.get_namenode(cluster)
        rm = vu.get_resourcemanager(cluster)
        hs = vu.get_historyserver(cluster)
        oo = vu.get_oozie(cluster)

        info = {}

        if rm:
            info['YARN'] = {
                'Web UI': 'http://%s:%s' % (rm.management_ip, '8088'),
                'ResourceManager': 'http://%s:%s' % (rm.management_ip, '8032')
            }

        if nn:
            info['HDFS'] = {
                'Web UI': 'http://%s:%s' % (nn.management_ip, '50070'),
                'NameNode': 'hdfs://%s:%s' % (nn.hostname(), '9000')
            }

        if oo:
            info['JobFlow'] = {
                'Oozie': 'http://%s:%s' % (oo.management_ip, '11000')
            }

        if hs:
            info['MapReduce JobHistory Server'] = {
                'Web UI': 'http://%s:%s' % (hs.management_ip, '19888')
            }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
Example #8
0
def generate_sahara_configs(cluster, node_group=None):
    nn_hostname = vu.get_instance_hostname(vu.get_namenode(cluster))
    jt_hostname = vu.get_instance_hostname(vu.get_jobtracker(cluster))
    oozie_hostname = vu.get_instance_hostname(vu.get_oozie(cluster))
    hive_hostname = vu.get_instance_hostname(vu.get_hiveserver(cluster))

    storage_path = node_group.storage_paths() if node_group else None

    # inserting common configs depends on provisioned VMs and HDFS placement
    # TODO(aignatov): should be moved to cluster context

    cfg = {
        'fs.default.name':
        'hdfs://%s:8020' % nn_hostname,
        'dfs.name.dir':
        extract_hadoop_path(storage_path, '/lib/hadoop/hdfs/namenode'),
        'dfs.data.dir':
        extract_hadoop_path(storage_path, '/lib/hadoop/hdfs/datanode'),
        'dfs.hosts':
        '/etc/hadoop/dn.incl',
        'dfs.hosts.exclude':
        '/etc/hadoop/dn.excl',
    }

    if jt_hostname:
        mr_cfg = {
            'mapred.job.tracker':
            '%s:8021' % jt_hostname,
            'mapred.system.dir':
            extract_hadoop_path(storage_path, '/mapred/mapredsystem'),
            'mapred.local.dir':
            extract_hadoop_path(storage_path, '/lib/hadoop/mapred'),
            'mapred.hosts':
            '/etc/hadoop/tt.incl',
            'mapred.hosts.exclude':
            '/etc/hadoop/tt.excl',
        }
        cfg.update(mr_cfg)

    if oozie_hostname:
        o_cfg = {
            'hadoop.proxyuser.hadoop.hosts': "localhost," + oozie_hostname,
            'hadoop.proxyuser.hadoop.groups': 'hadoop',
        }
        cfg.update(o_cfg)
        LOG.debug('Applied Oozie configs for core-site.xml')
        cfg.update(o_h.get_oozie_required_xml_configs())
        LOG.debug('Applied Oozie configs for oozie-site.xml')

    if hive_hostname:
        h_cfg = {
            'hive.warehouse.subdir.inherit.perms':
            True,
            'javax.jdo.option.ConnectionURL':
            'jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true'
        }
        cfg.update(h_cfg)
        LOG.debug('Applied Hive config for hive metastore server')

    return cfg
Example #9
0
    def start_cluster(self, cluster):
        nn = vu.get_namenode(cluster)
        run.format_namenode(nn)
        run.start_hadoop_process(nn, 'namenode')

        for snn in vu.get_secondarynamenodes(cluster):
            run.start_hadoop_process(snn, 'secondarynamenode')

        rm = vu.get_resourcemanager(cluster)
        if rm:
            run.start_yarn_process(rm, 'resourcemanager')

        run.start_all_processes(utils.get_instances(cluster),
                                ['datanode', 'nodemanager'])

        run.await_datanodes(cluster)

        hs = vu.get_historyserver(cluster)
        if hs:
            run.start_historyserver(hs)

        oo = vu.get_oozie(cluster)
        if oo:
            run.start_oozie_process(self.pctx, oo)

        hiveserver = vu.get_hiveserver(cluster)
        if hiveserver:
            run.start_hiveserver_process(self.pctx, hiveserver)

        self._set_cluster_info(cluster)
Example #10
0
    def _set_cluster_info(self, cluster):
        nn = vu.get_namenode(cluster)
        rm = vu.get_resourcemanager(cluster)
        hs = vu.get_historyserver(cluster)
        oo = vu.get_oozie(cluster)

        info = {}

        if rm:
            info['YARN'] = {
                'Web UI': 'http://%s:%s' % (rm.management_ip, '8088'),
                'ResourceManager': 'http://%s:%s' % (rm.management_ip, '8032')
            }

        if nn:
            info['HDFS'] = {
                'Web UI': 'http://%s:%s' % (nn.management_ip, '50070'),
                'NameNode': 'hdfs://%s:%s' % (nn.hostname(), '9000')
            }

        if oo:
            info['JobFlow'] = {
                'Oozie': 'http://%s:%s' % (oo.management_ip, '11000')
            }

        if hs:
            info['MapReduce JobHistory Server'] = {
                'Web UI': 'http://%s:%s' % (hs.management_ip, '19888')
            }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
Example #11
0
    def start_cluster(self, cluster):
        nn = vu.get_namenode(cluster)
        run.format_namenode(nn)
        run.start_hadoop_process(nn, "namenode")

        for snn in vu.get_secondarynamenodes(cluster):
            run.start_hadoop_process(snn, "secondarynamenode")

        rm = vu.get_resourcemanager(cluster)
        if rm:
            run.start_yarn_process(rm, "resourcemanager")

        for dn in vu.get_datanodes(cluster):
            run.start_hadoop_process(dn, "datanode")

        run.await_datanodes(cluster)

        for nm in vu.get_nodemanagers(cluster):
            run.start_yarn_process(nm, "nodemanager")

        hs = vu.get_historyserver(cluster)
        if hs:
            run.start_historyserver(hs)

        oo = vu.get_oozie(cluster)
        if oo:
            run.start_oozie_process(oo)

        self._set_cluster_info(cluster)
Example #12
0
def _get_hadoop_configs(node_group):
    cluster = node_group.cluster
    nn_hostname = vu.get_instance_hostname(vu.get_namenode(cluster))
    dirs = _get_hadoop_dirs(node_group)
    confs = {
        'Hadoop': {
            'fs.defaultFS': 'hdfs://%s:9000' % nn_hostname
        },
        'HDFS': {
            'dfs.namenode.name.dir': ','.join(dirs['hadoop_name_dirs']),
            'dfs.namenode.data.dir': ','.join(dirs['hadoop_data_dirs']),
            'dfs.hosts': '%s/dn-include' % HADOOP_CONF_DIR,
            'dfs.hosts.exclude': '%s/dn-exclude' % HADOOP_CONF_DIR
        }
    }

    res_hostname = vu.get_instance_hostname(vu.get_resourcemanager(cluster))
    if res_hostname:
        confs['YARN'] = {
            'yarn.nodemanager.aux-services':
            'mapreduce_shuffle',
            'yarn.resourcemanager.hostname':
            '%s' % res_hostname,
            'yarn.resourcemanager.nodes.include-path':
            '%s/nm-include' % (HADOOP_CONF_DIR),
            'yarn.resourcemanager.nodes.exclude-path':
            '%s/nm-exclude' % (HADOOP_CONF_DIR)
        }
        confs['MapReduce'] = {'mapreduce.framework.name': 'yarn'}

    oozie = vu.get_oozie(cluster)
    if oozie:
        hadoop_cfg = {
            'hadoop.proxyuser.hadoop.hosts': '*',
            'hadoop.proxyuser.hadoop.groups': 'hadoop'
        }
        confs['Hadoop'].update(hadoop_cfg)

        oozie_cfg = o_helper.get_oozie_required_xml_configs(HADOOP_CONF_DIR)
        if c_helper.is_mysql_enabled(cluster):
            oozie_cfg.update(o_helper.get_oozie_mysql_configs())

        confs['JobFlow'] = oozie_cfg

    if c_helper.get_config_value(c_helper.ENABLE_SWIFT.applicable_target,
                                 c_helper.ENABLE_SWIFT.name, cluster):
        swift_configs = {}
        for config in swift.get_swift_configs():
            swift_configs[config['name']] = config['value']

        confs['Hadoop'].update(swift_configs)

    if c_helper.is_data_locality_enabled(cluster):
        confs['Hadoop'].update(th.TOPOLOGY_CONFIG)
        confs['Hadoop'].update(
            {"topology.script.file.name": HADOOP_CONF_DIR + "/topology.sh"})

    return confs, c_helper.get_env_configs()
Example #13
0
def _get_hadoop_configs(node_group):
    cluster = node_group.cluster
    nn_hostname = vu.get_instance_hostname(vu.get_namenode(cluster))
    dirs = _get_hadoop_dirs(node_group)
    confs = {
        'Hadoop': {
            'fs.defaultFS': 'hdfs://%s:9000' % nn_hostname
        },
        'HDFS': {
            'dfs.namenode.name.dir': ','.join(dirs['hadoop_name_dirs']),
            'dfs.namenode.data.dir': ','.join(dirs['hadoop_data_dirs']),
            'dfs.hosts': '%s/dn-include' % HADOOP_CONF_DIR,
            'dfs.hosts.exclude': '%s/dn-exclude' % HADOOP_CONF_DIR
        }
    }

    res_hostname = vu.get_instance_hostname(vu.get_resourcemanager(cluster))
    if res_hostname:
        confs['YARN'] = {
            'yarn.nodemanager.aux-services': 'mapreduce_shuffle',
            'yarn.resourcemanager.hostname': '%s' % res_hostname,
            'yarn.resourcemanager.nodes.include-path': '%s/nm-include' % (
                HADOOP_CONF_DIR),
            'yarn.resourcemanager.nodes.exclude-path': '%s/nm-exclude' % (
                HADOOP_CONF_DIR)
        }
        confs['MapReduce'] = {
            'mapreduce.framework.name': 'yarn'
        }

    oozie = vu.get_oozie(cluster)
    if oozie:
        hadoop_cfg = {
            'hadoop.proxyuser.hadoop.hosts': '*',
            'hadoop.proxyuser.hadoop.groups': 'hadoop'
        }
        confs['Hadoop'].update(hadoop_cfg)

        oozie_cfg = o_helper.get_oozie_required_xml_configs(HADOOP_CONF_DIR)
        if c_helper.is_mysql_enabled(cluster):
            oozie_cfg.update(o_helper.get_oozie_mysql_configs())

        confs['JobFlow'] = oozie_cfg

    if c_helper.get_config_value(c_helper.ENABLE_SWIFT.applicable_target,
                                 c_helper.ENABLE_SWIFT.name, cluster):
        swift_configs = {}
        for config in swift.get_swift_configs():
            swift_configs[config['name']] = config['value']

        confs['Hadoop'].update(swift_configs)

    if c_helper.is_data_locality_enabled(cluster):
        confs['Hadoop'].update(th.TOPOLOGY_CONFIG)
        confs['Hadoop'].update({"topology.script.file.name":
                                HADOOP_CONF_DIR + "/topology.sh"})

    return confs, c_helper.get_env_configs()
Example #14
0
    def scale_cluster(self, cluster, instances):
        self._setup_instances(cluster, instances)

        run.refresh_nodes(remote.get_remote(vu.get_namenode(cluster)), "dfsadmin")
        jt = vu.get_jobtracker(cluster)
        if jt:
            run.refresh_nodes(remote.get_remote(jt), "mradmin")

        self._start_tt_dn_processes(instances)
Example #15
0
    def scale_cluster(self, cluster, instances):
        self._setup_instances(cluster, instances)

        run.refresh_nodes(remote.get_remote(vu.get_namenode(cluster)),
                          "dfsadmin")
        jt = vu.get_jobtracker(cluster)
        if jt:
            run.refresh_nodes(remote.get_remote(jt), "mradmin")

        self._start_tt_dn_processes(instances)
Example #16
0
    def _start_oozie(self, cluster, oozie):
        nn_instance = vu.get_namenode(cluster)

        with remote.get_remote(oozie) as r:
            if c_helper.is_mysql_enable(cluster):
                run.mysql_start(r, oozie)
                run.oozie_create_db(r)
            run.oozie_share_lib(r, nn_instance.hostname())
            run.start_oozie(r)
            LOG.info(_LI("Oozie service at {host} has been started").format(host=nn_instance.hostname()))
Example #17
0
def await_datanodes(cluster):
    datanodes_count = len(vu.get_datanodes(cluster))
    if datanodes_count < 1:
        return

    l_message = _("Waiting on %s datanodes to start up") % datanodes_count
    with vu.get_namenode(cluster).remote() as r:
        poll_utils.plugin_option_poll(
            cluster, _check_datanodes_count,
            c_helper.DATANODES_STARTUP_TIMEOUT, l_message, 1, {
                'remote': r, 'count': datanodes_count})
Example #18
0
def await_datanodes(cluster):
    datanodes_count = len(vu.get_datanodes(cluster))
    if datanodes_count < 1:
        return

    l_message = _("Waiting on %s datanodes to start up") % datanodes_count
    with vu.get_namenode(cluster).remote() as r:
        poll_utils.plugin_option_poll(
            cluster, _check_datanodes_count,
            c_helper.DATANODES_STARTUP_TIMEOUT, l_message, 1, {
                'remote': r, 'count': datanodes_count})
Example #19
0
    def _start_oozie(self, cluster, oozie):
        nn_instance = vu.get_namenode(cluster)

        with remote.get_remote(oozie) as r:
            if c_helper.is_mysql_enable(cluster):
                run.mysql_start(r, oozie)
                run.oozie_create_db(r)
            run.oozie_share_lib(r, nn_instance.hostname())
            run.start_oozie(r)
            LOG.info(_LI("Oozie service at '%s' has been started"),
                     nn_instance.hostname())
Example #20
0
    def _start_oozie(self, cluster, oozie):
        nn_instance = vu.get_namenode(cluster)

        with remote.get_remote(oozie) as r:
            with context.set_current_instance_id(oozie.instance_id):
                if c_helper.is_mysql_enable(cluster):
                    run.mysql_start(r)
                    run.oozie_create_db(r)
                run.oozie_share_lib(r, nn_instance.hostname())
                run.start_oozie(r)
                LOG.info(_LI("Oozie service has been started"))
Example #21
0
def get_datanodes_status(cluster):
    statuses = {}
    namenode = u.get_namenode(cluster)
    status_regexp = r'^Hostname: (.*)\nDecommission Status : (.*)$'
    matcher = re.compile(status_regexp, re.MULTILINE)
    dfs_report = namenode.remote().execute_command(
        'sudo su - -c "hdfs dfsadmin -report" hadoop')[1]

    for host, status in matcher.findall(dfs_report):
        statuses[host] = status.lower()

    return statuses
Example #22
0
def generate_sahara_configs(cluster, node_group=None):
    nn_hostname = vu.get_instance_hostname(vu.get_namenode(cluster))
    jt_hostname = vu.get_instance_hostname(vu.get_jobtracker(cluster))
    oozie_hostname = vu.get_instance_hostname(vu.get_oozie(cluster))
    hive_hostname = vu.get_instance_hostname(vu.get_hiveserver(cluster))

    storage_path = node_group.storage_paths() if node_group else None

    # inserting common configs depends on provisioned VMs and HDFS placement
    # TODO(aignatov): should be moved to cluster context

    cfg = {
        'fs.default.name': 'hdfs://%s:8020' % nn_hostname,
        'dfs.name.dir': extract_hadoop_path(storage_path,
                                            '/lib/hadoop/hdfs/namenode'),
        'dfs.data.dir': extract_hadoop_path(storage_path,
                                            '/lib/hadoop/hdfs/datanode'),
        'dfs.hosts': '/etc/hadoop/dn.incl',
        'dfs.hosts.exclude': '/etc/hadoop/dn.excl',
    }

    if jt_hostname:
        mr_cfg = {
            'mapred.job.tracker': '%s:8021' % jt_hostname,
            'mapred.system.dir': extract_hadoop_path(storage_path,
                                                     '/mapred/mapredsystem'),
            'mapred.local.dir': extract_hadoop_path(storage_path,
                                                    '/lib/hadoop/mapred'),
            'mapred.hosts': '/etc/hadoop/tt.incl',
            'mapred.hosts.exclude': '/etc/hadoop/tt.excl',
        }
        cfg.update(mr_cfg)

    if oozie_hostname:
        o_cfg = {
            'hadoop.proxyuser.hadoop.hosts': "localhost," + oozie_hostname,
            'hadoop.proxyuser.hadoop.groups': 'hadoop',
        }
        cfg.update(o_cfg)
        LOG.debug('Applied Oozie configs for core-site.xml')
        cfg.update(o_h.get_oozie_required_xml_configs())
        LOG.debug('Applied Oozie configs for oozie-site.xml')

    if hive_hostname:
        h_cfg = {
            'hive.warehouse.subdir.inherit.perms': True,
            'javax.jdo.option.ConnectionURL':
            'jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true'
        }
        cfg.update(h_cfg)
        LOG.debug('Applied Hive config for hive metastore server')

    return cfg
Example #23
0
def get_datanodes_status(cluster):
    statuses = {}
    namenode = u.get_namenode(cluster)
    status_regexp = r'^Hostname: (.*)\nDecommission Status : (.*)$'
    matcher = re.compile(status_regexp, re.MULTILINE)
    dfs_report = namenode.remote().execute_command(
        'sudo su - -c "hdfs dfsadmin -report" hadoop')[1]

    for host, status in matcher.findall(dfs_report):
        statuses[host] = status.lower()

    return statuses
Example #24
0
    def _await_datanodes(self, cluster):
        datanodes_count = len(vu.get_datanodes(cluster))
        if datanodes_count < 1:
            return

        l_message = _("Waiting on %s datanodes to start up") % datanodes_count
        LOG.info(l_message)
        with remote.get_remote(vu.get_namenode(cluster)) as r:
            poll_utils.plugin_option_poll(
                cluster, run.check_datanodes_count,
                c_helper.DATANODES_STARTUP_TIMEOUT, l_message, 1, {
                    'remote': r,
                    'count': datanodes_count})
Example #25
0
    def _await_datanodes(self, cluster):
        datanodes_count = len(vu.get_datanodes(cluster))
        if datanodes_count < 1:
            return

        l_message = _("Waiting on %s datanodes to start up") % datanodes_count
        LOG.info(l_message)
        with remote.get_remote(vu.get_namenode(cluster)) as r:
            poll_utils.plugin_option_poll(
                cluster, run.check_datanodes_count,
                c_helper.DATANODES_STARTUP_TIMEOUT, l_message, 1, {
                    'remote': r,
                    'count': datanodes_count})
Example #26
0
    def start_cluster(self, cluster):
        nn_instance = vu.get_namenode(cluster)
        with remote.get_remote(nn_instance) as r:
            run.format_namenode(r)
            run.start_processes(r, "namenode")

        for snn in vu.get_secondarynamenodes(cluster):
            run.start_processes(remote.get_remote(snn), "secondarynamenode")

        jt_instance = vu.get_jobtracker(cluster)
        if jt_instance:
            run.start_processes(remote.get_remote(jt_instance), "jobtracker")

        self._start_tt_dn_processes(utils.get_instances(cluster))

        self._await_datanodes(cluster)

        LOG.info(_LI("Hadoop services in cluster %s have been started"),
                 cluster.name)

        oozie = vu.get_oozie(cluster)
        if oozie:
            with remote.get_remote(oozie) as r:
                if c_helper.is_mysql_enable(cluster):
                    run.mysql_start(r, oozie)
                    run.oozie_create_db(r)
                run.oozie_share_lib(r, nn_instance.hostname())
                run.start_oozie(r)
                LOG.info(_LI("Oozie service at '%s' has been started"),
                         nn_instance.hostname())

        hive_server = vu.get_hiveserver(cluster)
        if hive_server:
            with remote.get_remote(hive_server) as r:
                run.hive_create_warehouse_dir(r)
                run.hive_copy_shared_conf(
                    r, edp.get_hive_shared_conf_path('hadoop'))

                if c_helper.is_mysql_enable(cluster):
                    if not oozie or hive_server.hostname() != oozie.hostname():
                        run.mysql_start(r, hive_server)
                    run.hive_create_db(r)
                    run.hive_metastore_start(r)
                    LOG.info(_LI("Hive Metastore server at %s has been "
                                 "started"),
                             hive_server.hostname())

        LOG.info(_LI('Cluster %s has been started successfully'), cluster.name)
        self._set_cluster_info(cluster)
Example #27
0
    def start_cluster(self, cluster):
        nn_instance = vu.get_namenode(cluster)
        with remote.get_remote(nn_instance) as r:
            run.format_namenode(r)
            run.start_processes(r, "namenode")

        for snn in vu.get_secondarynamenodes(cluster):
            run.start_processes(remote.get_remote(snn), "secondarynamenode")

        jt_instance = vu.get_jobtracker(cluster)
        if jt_instance:
            run.start_processes(remote.get_remote(jt_instance), "jobtracker")

        self._start_tt_dn_processes(utils.get_instances(cluster))

        self._await_datanodes(cluster)

        LOG.info(_LI("Hadoop services in cluster %s have been started"),
                 cluster.name)

        oozie = vu.get_oozie(cluster)
        if oozie:
            with remote.get_remote(oozie) as r:
                if c_helper.is_mysql_enable(cluster):
                    run.mysql_start(r, oozie)
                    run.oozie_create_db(r)
                run.oozie_share_lib(r, nn_instance.hostname())
                run.start_oozie(r)
                LOG.info(_LI("Oozie service at '%s' has been started"),
                         nn_instance.hostname())

        hive_server = vu.get_hiveserver(cluster)
        if hive_server:
            with remote.get_remote(hive_server) as r:
                run.hive_create_warehouse_dir(r)
                run.hive_copy_shared_conf(
                    r, edp.get_hive_shared_conf_path('hadoop'))

                if c_helper.is_mysql_enable(cluster):
                    if not oozie or hive_server.hostname() != oozie.hostname():
                        run.mysql_start(r, hive_server)
                    run.hive_create_db(r, cluster.extra['hive_mysql_passwd'])
                    run.hive_metastore_start(r)
                    LOG.info(
                        _LI("Hive Metastore server at %s has been "
                            "started"), hive_server.hostname())

        LOG.info(_LI('Cluster %s has been started successfully'), cluster.name)
        self._set_cluster_info(cluster)
Example #28
0
def await_datanodes(cluster):
    datanodes_count = len(vu.get_datanodes(cluster))
    if datanodes_count < 1:
        return

    LOG.info("Waiting %s datanodes to start up" % datanodes_count)
    with vu.get_namenode(cluster).remote() as r:
        while True:
            if _check_datanodes_count(r, datanodes_count):
                LOG.info("Datanodes on cluster %s has been started" % cluster.name)
                return

            context.sleep(1)

            if not g.check_cluster_exists(cluster):
                LOG.info("Stop waiting datanodes on cluster %s since it has " "been deleted" % cluster.name)
                return
Example #29
0
def await_datanodes(cluster):
    datanodes_count = len(vu.get_datanodes(cluster))
    if datanodes_count < 1:
        return

    LOG.info("Waiting %s datanodes to start up" % datanodes_count)
    with vu.get_namenode(cluster).remote() as r:
        while True:
            if _check_datanodes_count(r, datanodes_count):
                LOG.info('Datanodes on cluster %s has been started' %
                         cluster.name)
                return

            context.sleep(1)

            if not g.check_cluster_exists(cluster):
                LOG.info('Stop waiting datanodes on cluster %s since it has '
                         'been deleted' % cluster.name)
                return
Example #30
0
    def _await_datanodes(self, cluster):
        datanodes_count = len(vu.get_datanodes(cluster))
        if datanodes_count < 1:
            return

        LOG.info(_LI("Waiting %s datanodes to start up"), datanodes_count)
        with remote.get_remote(vu.get_namenode(cluster)) as r:
            while True:
                if run.check_datanodes_count(r, datanodes_count):
                    LOG.info(_LI('Datanodes on cluster %s have been started'),
                             cluster.name)
                    return

                context.sleep(1)

                if not g.check_cluster_exists(cluster):
                    LOG.info(
                        _LI('Stop waiting datanodes on cluster %s since it has'
                            ' been deleted'), cluster.name)
                    return
Example #31
0
    def decommission_nodes(self, cluster, instances):
        tts = vu.get_tasktrackers(cluster)
        dns = vu.get_datanodes(cluster)
        decommission_dns = False
        decommission_tts = False

        for i in instances:
            if 'datanode' in i.node_group.node_processes:
                dns.remove(i)
                decommission_dns = True
            if 'tasktracker' in i.node_group.node_processes:
                tts.remove(i)
                decommission_tts = True

        nn = vu.get_namenode(cluster)
        jt = vu.get_jobtracker(cluster)

        if decommission_tts:
            sc.decommission_tt(jt, instances, tts)
        if decommission_dns:
            sc.decommission_dn(nn, instances, dns)
Example #32
0
    def _await_datanodes(self, cluster):
        datanodes_count = len(vu.get_datanodes(cluster))
        if datanodes_count < 1:
            return

        LOG.info(_LI("Waiting %s datanodes to start up"), datanodes_count)
        with remote.get_remote(vu.get_namenode(cluster)) as r:
            while True:
                if run.check_datanodes_count(r, datanodes_count):
                    LOG.info(
                        _LI('Datanodes on cluster %s has been started'),
                        cluster.name)
                    return

                context.sleep(1)

                if not g.check_cluster_exists(cluster):
                    LOG.info(
                        _LI('Stop waiting datanodes on cluster %s since it has'
                            ' been deleted'), cluster.name)
                    return
Example #33
0
    def decommission_nodes(self, cluster, instances):
        tts = vu.get_tasktrackers(cluster)
        dns = vu.get_datanodes(cluster)
        decommission_dns = False
        decommission_tts = False

        for i in instances:
            if "datanode" in i.node_group.node_processes:
                dns.remove(i)
                decommission_dns = True
            if "tasktracker" in i.node_group.node_processes:
                tts.remove(i)
                decommission_tts = True

        nn = vu.get_namenode(cluster)
        jt = vu.get_jobtracker(cluster)

        if decommission_tts:
            sc.decommission_tt(jt, instances, tts)
        if decommission_dns:
            sc.decommission_dn(nn, instances, dns)
Example #34
0
    def _set_cluster_info(self, cluster):
        nn = vu.get_namenode(cluster)
        jt = vu.get_jobtracker(cluster)
        oozie = vu.get_oozie(cluster)
        info = {}

        if jt:
            ui_port = c_helper.get_port_from_config(
                'MapReduce', 'mapred.job.tracker.http.address', cluster)
            jt_port = c_helper.get_port_from_config('MapReduce',
                                                    'mapred.job.tracker',
                                                    cluster)

            info['MapReduce'] = {
                'Web UI': 'http://%s:%s' % (jt.management_ip, ui_port),
                'JobTracker': '%s:%s' % (jt.hostname(), jt_port)
            }

        if nn:
            ui_port = c_helper.get_port_from_config('HDFS', 'dfs.http.address',
                                                    cluster)
            nn_port = c_helper.get_port_from_config('HDFS', 'fs.default.name',
                                                    cluster)

            info['HDFS'] = {
                'Web UI': 'http://%s:%s' % (nn.management_ip, ui_port),
                'NameNode': 'hdfs://%s:%s' % (nn.hostname(), nn_port)
            }

        if oozie:
            # TODO(yrunts) change from hardcode value
            info['JobFlow'] = {
                'Oozie': 'http://%s:11000' % oozie.management_ip
            }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
Example #35
0
    def _set_cluster_info(self, cluster):
        nn = vu.get_namenode(cluster)
        jt = vu.get_jobtracker(cluster)
        oozie = vu.get_oozie(cluster)
        info = {}

        if jt:
            ui_port = c_helper.get_port_from_config(
                'MapReduce', 'mapred.job.tracker.http.address', cluster)
            jt_port = c_helper.get_port_from_config(
                'MapReduce', 'mapred.job.tracker', cluster)

            info['MapReduce'] = {
                'Web UI': 'http://%s:%s' % (jt.management_ip, ui_port),
                'JobTracker': '%s:%s' % (jt.hostname(), jt_port)
            }

        if nn:
            ui_port = c_helper.get_port_from_config('HDFS', 'dfs.http.address',
                                                    cluster)
            nn_port = c_helper.get_port_from_config('HDFS', 'fs.default.name',
                                                    cluster)

            info['HDFS'] = {
                'Web UI': 'http://%s:%s' % (nn.management_ip, ui_port),
                'NameNode': 'hdfs://%s:%s' % (nn.hostname(), nn_port)
            }

        if oozie:
            # TODO(yrunts) change from hardcode value
            info['JobFlow'] = {
                'Oozie': 'http://%s:11000' % oozie.management_ip
            }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
Example #36
0
def start_namenode(cluster):
    nn = vu.get_namenode(cluster)
    _start_namenode(nn)
Example #37
0
 def start_namenode(self, cluster):
     nn = vu.get_namenode(cluster)
     self._start_namenode(nn)
Example #38
0
def start_namenode(cluster):
    nn = vu.get_namenode(cluster)
    _start_namenode(nn)
Example #39
0
def _get_hadoop_configs(pctx, node_group):
    cluster = node_group.cluster
    nn_hostname = vu.get_instance_hostname(vu.get_namenode(cluster))
    dirs = _get_hadoop_dirs(node_group)
    confs = {
        'Hadoop': {
            'fs.defaultFS': 'hdfs://%s:9000' % nn_hostname
        },
        'HDFS': {
            'dfs.namenode.name.dir': ','.join(dirs['hadoop_name_dirs']),
            'dfs.namenode.data.dir': ','.join(dirs['hadoop_data_dirs']),
            'dfs.hosts': '%s/dn-include' % HADOOP_CONF_DIR,
            'dfs.hosts.exclude': '%s/dn-exclude' % HADOOP_CONF_DIR
        }
    }

    res_hostname = vu.get_instance_hostname(vu.get_resourcemanager(cluster))
    if res_hostname:
        confs['YARN'] = {
            'yarn.nodemanager.aux-services': 'mapreduce_shuffle',
            'yarn.resourcemanager.hostname': '%s' % res_hostname,
            'yarn.resourcemanager.nodes.include-path': '%s/nm-include' % (
                HADOOP_CONF_DIR),
            'yarn.resourcemanager.nodes.exclude-path': '%s/nm-exclude' % (
                HADOOP_CONF_DIR)
        }
        confs['MapReduce'] = {
            'mapreduce.framework.name': 'yarn'
        }
        hs_hostname = vu.get_instance_hostname(vu.get_historyserver(cluster))
        if hs_hostname:
            confs['MapReduce']['mapreduce.jobhistory.address'] = (
                "%s:10020" % hs_hostname)

    oozie = vu.get_oozie(cluster)
    if oozie:
        hadoop_cfg = {
            'hadoop.proxyuser.hadoop.hosts': '*',
            'hadoop.proxyuser.hadoop.groups': 'hadoop'
        }
        confs['Hadoop'].update(hadoop_cfg)

        oozie_cfg = o_helper.get_oozie_required_xml_configs(HADOOP_CONF_DIR)
        if c_helper.is_mysql_enabled(pctx, cluster):
            oozie_cfg.update(o_helper.get_oozie_mysql_configs())

        confs['JobFlow'] = oozie_cfg

    if c_helper.is_swift_enabled(pctx, cluster):
        swift_configs = {}
        for config in swift.get_swift_configs():
            swift_configs[config['name']] = config['value']

        confs['Hadoop'].update(swift_configs)

    if c_helper.is_data_locality_enabled(pctx, cluster):
        confs['Hadoop'].update(th.TOPOLOGY_CONFIG)
        confs['Hadoop'].update({"topology.script.file.name":
                                HADOOP_CONF_DIR + "/topology.sh"})

    hive_hostname = vu.get_instance_hostname(vu.get_hiveserver(cluster))
    if hive_hostname:
        hive_cfg = {
            'hive.warehouse.subdir.inherit.perms': True,
            'javax.jdo.option.ConnectionURL':
            'jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true'
        }

        if c_helper.is_mysql_enabled(pctx, cluster):
            hive_cfg.update({
                'javax.jdo.option.ConnectionURL':
                'jdbc:mysql://%s/metastore' % hive_hostname,
                'javax.jdo.option.ConnectionDriverName':
                'com.mysql.jdbc.Driver',
                'javax.jdo.option.ConnectionUserName': '******',
                'javax.jdo.option.ConnectionPassword': '******',
                'datanucleus.autoCreateSchema': 'false',
                'datanucleus.fixedDatastore': 'true',
                'hive.metastore.uris': 'thrift://%s:9083' % hive_hostname,
            })

        proxy_configs = cluster.cluster_configs.get('proxy_configs')
        if proxy_configs and c_helper.is_swift_enabled(pctx, cluster):
            hive_cfg.update({
                swift.HADOOP_SWIFT_USERNAME: proxy_configs['proxy_username'],
                swift.HADOOP_SWIFT_PASSWORD: proxy_configs['proxy_password'],
                swift.HADOOP_SWIFT_TRUST_ID: proxy_configs['proxy_trust_id'],
                swift.HADOOP_SWIFT_DOMAIN_NAME: CONF.proxy_user_domain_name
            })

        confs['Hive'] = hive_cfg

    return confs
Example #40
0
def _get_hadoop_configs(pctx, instance):
    cluster = instance.node_group.cluster
    nn_hostname = vu.get_instance_hostname(vu.get_namenode(cluster))
    dirs = _get_hadoop_dirs(instance)
    confs = {
        'Hadoop': {
            'fs.defaultFS': 'hdfs://%s:9000' % nn_hostname
        },
        'HDFS': {
            'dfs.namenode.name.dir': ','.join(dirs['hadoop_name_dirs']),
            'dfs.datanode.data.dir': ','.join(dirs['hadoop_data_dirs']),
            'dfs.hosts': '%s/dn-include' % HADOOP_CONF_DIR,
            'dfs.hosts.exclude': '%s/dn-exclude' % HADOOP_CONF_DIR
        }
    }

    res_hostname = vu.get_instance_hostname(vu.get_resourcemanager(cluster))
    if res_hostname:
        confs['YARN'] = {
            'yarn.nodemanager.aux-services': 'mapreduce_shuffle',
            'yarn.resourcemanager.hostname': '%s' % res_hostname,
            'yarn.resourcemanager.nodes.include-path': '%s/nm-include' % (
                HADOOP_CONF_DIR),
            'yarn.resourcemanager.nodes.exclude-path': '%s/nm-exclude' % (
                HADOOP_CONF_DIR)
        }
        confs['MapReduce'] = {
            'mapreduce.framework.name': 'yarn'
        }
        hs_hostname = vu.get_instance_hostname(vu.get_historyserver(cluster))
        if hs_hostname:
            confs['MapReduce']['mapreduce.jobhistory.address'] = (
                "%s:10020" % hs_hostname)

    oozie = vu.get_oozie(cluster)
    if oozie:
        hadoop_cfg = {
            'hadoop.proxyuser.hadoop.hosts': '*',
            'hadoop.proxyuser.hadoop.groups': 'hadoop'
        }
        confs['Hadoop'].update(hadoop_cfg)

        oozie_cfg = o_helper.get_oozie_required_xml_configs(HADOOP_CONF_DIR)
        if c_helper.is_mysql_enabled(pctx, cluster):
            oozie_cfg.update(o_helper.get_oozie_mysql_configs())

        confs['JobFlow'] = oozie_cfg

    if c_helper.is_swift_enabled(pctx, cluster):
        swift_configs = {}
        for config in swift.get_swift_configs():
            swift_configs[config['name']] = config['value']

        confs['Hadoop'].update(swift_configs)

    if c_helper.is_data_locality_enabled(pctx, cluster):
        confs['Hadoop'].update(th.TOPOLOGY_CONFIG)
        confs['Hadoop'].update({"topology.script.file.name":
                                HADOOP_CONF_DIR + "/topology.sh"})

    hive_hostname = vu.get_instance_hostname(vu.get_hiveserver(cluster))
    if hive_hostname:
        hive_cfg = {
            'hive.warehouse.subdir.inherit.perms': True,
            'javax.jdo.option.ConnectionURL':
            'jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true'
        }

        if c_helper.is_mysql_enabled(pctx, cluster):
            hive_cfg.update({
                'javax.jdo.option.ConnectionURL':
                'jdbc:mysql://%s/metastore' % hive_hostname,
                'javax.jdo.option.ConnectionDriverName':
                'com.mysql.jdbc.Driver',
                'javax.jdo.option.ConnectionUserName': '******',
                'javax.jdo.option.ConnectionPassword': '******',
                'datanucleus.autoCreateSchema': 'false',
                'datanucleus.fixedDatastore': 'true',
                'hive.metastore.uris': 'thrift://%s:9083' % hive_hostname,
            })

        proxy_configs = cluster.cluster_configs.get('proxy_configs')
        if proxy_configs and c_helper.is_swift_enabled(pctx, cluster):
            hive_cfg.update({
                swift.HADOOP_SWIFT_USERNAME: proxy_configs['proxy_username'],
                swift.HADOOP_SWIFT_PASSWORD: proxy_configs['proxy_password'],
                swift.HADOOP_SWIFT_TRUST_ID: proxy_configs['proxy_trust_id'],
                swift.HADOOP_SWIFT_DOMAIN_NAME: CONF.proxy_user_domain_name
            })

        confs['Hive'] = hive_cfg

    return confs
Example #41
0
 def start_namenode(self, cluster):
     nn = vu.get_namenode(cluster)
     self._start_namenode(nn)
Example #42
0
def _get_hadoop_configs(pctx, instance):
    cluster = instance.node_group.cluster
    nn_hostname = vu.get_instance_hostname(vu.get_namenode(cluster))
    dirs = _get_hadoop_dirs(instance)
    confs = {
        "Hadoop": {"fs.defaultFS": "hdfs://%s:9000" % nn_hostname},
        "HDFS": {
            "dfs.namenode.name.dir": ",".join(dirs["hadoop_name_dirs"]),
            "dfs.datanode.data.dir": ",".join(dirs["hadoop_data_dirs"]),
            "dfs.hosts": "%s/dn-include" % HADOOP_CONF_DIR,
            "dfs.hosts.exclude": "%s/dn-exclude" % HADOOP_CONF_DIR,
        },
    }

    res_hostname = vu.get_instance_hostname(vu.get_resourcemanager(cluster))
    if res_hostname:
        confs["YARN"] = {
            "yarn.nodemanager.aux-services": "mapreduce_shuffle",
            "yarn.resourcemanager.hostname": "%s" % res_hostname,
            "yarn.resourcemanager.nodes.include-path": "%s/nm-include" % (HADOOP_CONF_DIR),
            "yarn.resourcemanager.nodes.exclude-path": "%s/nm-exclude" % (HADOOP_CONF_DIR),
        }
        confs["MapReduce"] = {"mapreduce.framework.name": "yarn"}
        hs_hostname = vu.get_instance_hostname(vu.get_historyserver(cluster))
        if hs_hostname:
            confs["MapReduce"]["mapreduce.jobhistory.address"] = "%s:10020" % hs_hostname

    oozie = vu.get_oozie(cluster)
    if oozie:
        hadoop_cfg = {"hadoop.proxyuser.hadoop.hosts": "*", "hadoop.proxyuser.hadoop.groups": "hadoop"}
        confs["Hadoop"].update(hadoop_cfg)

        oozie_cfg = o_helper.get_oozie_required_xml_configs(HADOOP_CONF_DIR)
        if c_helper.is_mysql_enabled(pctx, cluster):
            oozie_cfg.update(o_helper.get_oozie_mysql_configs())

        confs["JobFlow"] = oozie_cfg

    if c_helper.is_swift_enabled(pctx, cluster):
        swift_configs = {}
        for config in swift.get_swift_configs():
            swift_configs[config["name"]] = config["value"]

        confs["Hadoop"].update(swift_configs)

    if c_helper.is_data_locality_enabled(pctx, cluster):
        confs["Hadoop"].update(th.TOPOLOGY_CONFIG)
        confs["Hadoop"].update({"topology.script.file.name": HADOOP_CONF_DIR + "/topology.sh"})

    hive_hostname = vu.get_instance_hostname(vu.get_hiveserver(cluster))
    if hive_hostname:
        hive_cfg = {
            "hive.warehouse.subdir.inherit.perms": True,
            "javax.jdo.option.ConnectionURL": "jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true",
        }

        if c_helper.is_mysql_enabled(pctx, cluster):
            hive_cfg.update(
                {
                    "javax.jdo.option.ConnectionURL": "jdbc:mysql://%s/metastore" % hive_hostname,
                    "javax.jdo.option.ConnectionDriverName": "com.mysql.jdbc.Driver",
                    "javax.jdo.option.ConnectionUserName": "******",
                    "javax.jdo.option.ConnectionPassword": "******",
                    "datanucleus.autoCreateSchema": "false",
                    "datanucleus.fixedDatastore": "true",
                    "hive.metastore.uris": "thrift://%s:9083" % hive_hostname,
                }
            )

        proxy_configs = cluster.cluster_configs.get("proxy_configs")
        if proxy_configs and c_helper.is_swift_enabled(pctx, cluster):
            key = key_manager.API().get(context.current(), proxy_configs["proxy_password"])
            password = key.get_encoded()
            hive_cfg.update(
                {
                    swift.HADOOP_SWIFT_USERNAME: proxy_configs["proxy_username"],
                    swift.HADOOP_SWIFT_PASSWORD: password,
                    swift.HADOOP_SWIFT_TRUST_ID: proxy_configs["proxy_trust_id"],
                    swift.HADOOP_SWIFT_DOMAIN_NAME: CONF.proxy_user_domain_name,
                }
            )

        confs["Hive"] = hive_cfg

    return confs
Example #43
0
def refresh_hadoop_nodes(cluster):
    nn = vu.get_namenode(cluster)
    nn.remote().execute_command(
        'sudo su - -c "hdfs dfsadmin -refreshNodes" hadoop')
Example #44
0
def refresh_hadoop_nodes(cluster):
    nn = vu.get_namenode(cluster)
    nn.remote().execute_command(
        'sudo su - -c "hdfs dfsadmin -refreshNodes" hadoop')