Ejemplo n.º 1
0
    def _set_cluster_info(self, cluster):
        mng = u.get_instances(cluster, 'manager')[0]
        nn = u.get_namenode(cluster)
        jt = u.get_jobtracker(cluster)
        oozie = u.get_oozie(cluster)

        #TODO(alazarev) make port configurable (bug #1262895)
        info = {'IDH Manager': {
            'Web UI': 'https://%s:9443' % mng.management_ip
        }}

        if jt:
            #TODO(alazarev) make port configurable (bug #1262895)
            info['MapReduce'] = {
                'Web UI': 'http://%s:50030' % jt.management_ip
            }
            #TODO(alazarev) make port configurable (bug #1262895)
            info['MapReduce']['JobTracker'] = '%s:54311' % jt.hostname()
        if nn:
            #TODO(alazarev) make port configurable (bug #1262895)
            info['HDFS'] = {
                'Web UI': 'http://%s:50070' % nn.management_ip
            }
            #TODO(alazarev) make port configurable (bug #1262895)
            info['HDFS']['NameNode'] = 'hdfs://%s:8020' % nn.hostname()

        if oozie:
            #TODO(alazarev) make port configurable (bug #1262895)
            info['JobFlow'] = {
                'Oozie': 'http://%s:11000' % oozie.management_ip
            }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
Ejemplo n.º 2
0
def generate_sahara_configs(cluster, node_group=None):
    nn_hostname = _get_hostname(utils.get_namenode(cluster))
    jt_hostname = _get_hostname(utils.get_jobtracker(cluster))
    oozie_hostname = _get_hostname(utils.get_oozie(cluster))
    hive_hostname = _get_hostname(utils.get_hiveserver(cluster))

    storage_path = node_group.storage_paths() if node_group else None

    # inserting common configs depends on provisioned VMs and HDFS placement
    # TODO(aignatov): should be moved to cluster context

    cfg = {
        'fs.default.name':
        'hdfs://%s:8020' % nn_hostname,
        'dfs.name.dir':
        extract_hadoop_path(storage_path, '/lib/hadoop/hdfs/namenode'),
        'dfs.data.dir':
        extract_hadoop_path(storage_path, '/lib/hadoop/hdfs/datanode'),
        'dfs.hosts':
        '/etc/hadoop/dn.incl',
        'dfs.hosts.exclude':
        '/etc/hadoop/dn.excl',
    }

    if jt_hostname:
        mr_cfg = {
            'mapred.job.tracker':
            '%s:8021' % jt_hostname,
            'mapred.system.dir':
            extract_hadoop_path(storage_path, '/mapred/mapredsystem'),
            'mapred.local.dir':
            extract_hadoop_path(storage_path, '/lib/hadoop/mapred'),
            'mapred.hosts':
            '/etc/hadoop/tt.incl',
            'mapred.hosts.exclude':
            '/etc/hadoop/tt.excl',
        }
        cfg.update(mr_cfg)

    if oozie_hostname:
        o_cfg = {
            'hadoop.proxyuser.hadoop.hosts': "localhost," + oozie_hostname,
            'hadoop.proxyuser.hadoop.groups': 'hadoop',
        }
        cfg.update(o_cfg)
        LOG.debug('Applied Oozie configs for core-site.xml')
        cfg.update(o_h.get_oozie_required_xml_configs())
        LOG.debug('Applied Oozie configs for oozie-site.xml')

    if hive_hostname:
        h_cfg = {
            'hive.warehouse.subdir.inherit.perms':
            True,
            'javax.jdo.option.ConnectionURL':
            'jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true'
        }
        cfg.update(h_cfg)
        LOG.debug('Applied Hive config for hive metastore server')

    return cfg
Ejemplo n.º 3
0
    def _set_cluster_info(self, cluster):
        mng = u.get_instances(cluster, 'manager')[0]
        nn = u.get_namenode(cluster)
        jt = u.get_jobtracker(cluster)
        oozie = u.get_oozie(cluster)

        #TODO(alazarev) make port configurable (bug #1262895)
        info = {
            'IDH Manager': {
                'Web UI': 'https://%s:9443' % mng.management_ip
            }
        }

        if jt:
            #TODO(alazarev) make port configurable (bug #1262895)
            info['MapReduce'] = {
                'Web UI': 'http://%s:50030' % jt.management_ip
            }
            #TODO(alazarev) make port configurable (bug #1262895)
            info['MapReduce']['JobTracker'] = '%s:54311' % jt.hostname()
        if nn:
            #TODO(alazarev) make port configurable (bug #1262895)
            info['HDFS'] = {'Web UI': 'http://%s:50070' % nn.management_ip}
            #TODO(alazarev) make port configurable (bug #1262895)
            info['HDFS']['NameNode'] = 'hdfs://%s:8020' % nn.hostname()

        if oozie:
            #TODO(alazarev) make port configurable (bug #1262895)
            info['JobFlow'] = {
                'Oozie': 'http://%s:11000' % oozie.management_ip
            }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
Ejemplo n.º 4
0
def _configure_services(client, cluster):
    nn_host = u.get_namenode(cluster).fqdn()
    snn = u.get_secondarynamenodes(cluster)
    snn_host = snn[0].fqdn() if snn else None
    jt_host = u.get_jobtracker(cluster).fqdn() if u.get_jobtracker(
        cluster) else None
    dn_hosts = [dn.fqdn() for dn in u.get_datanodes(cluster)]
    tt_hosts = [tt.fqdn() for tt in u.get_tasktrackers(cluster)]

    oozie_host = u.get_oozie(cluster).fqdn() if u.get_oozie(
        cluster) else None
    hive_host = u.get_hiveserver(cluster).fqdn() if u.get_hiveserver(
        cluster) else None

    services = []
    if u.get_namenode(cluster):
        services += ['hdfs']

    if u.get_jobtracker(cluster):
        services += ['mapred']

    if oozie_host:
        services += ['oozie']
        services += ['pig']

    if hive_host:
        services += ['hive']

    LOG.debug("Add services: %s" % ', '.join(services))
    client.services.add(services)

    LOG.debug("Assign roles to hosts")
    client.services.hdfs.add_nodes('PrimaryNameNode', [nn_host])

    client.services.hdfs.add_nodes('DataNode', dn_hosts)
    if snn:
        client.services.hdfs.add_nodes('SecondaryNameNode', [snn_host])

    if oozie_host:
        client.services.oozie.add_nodes('Oozie', [oozie_host])

    if hive_host:
        client.services.hive.add_nodes('HiveServer', [hive_host])

    if jt_host:
        client.services.mapred.add_nodes('JobTracker', [jt_host])
        client.services.mapred.add_nodes('TaskTracker', tt_hosts)
Ejemplo n.º 5
0
def _configure_services(client, cluster):
    nn_host = u.get_namenode(cluster).fqdn()
    snn = u.get_secondarynamenodes(cluster)
    snn_host = snn[0].fqdn() if snn else None
    jt_host = u.get_jobtracker(cluster).fqdn() if u.get_jobtracker(
        cluster) else None
    dn_hosts = [dn.fqdn() for dn in u.get_datanodes(cluster)]
    tt_hosts = [tt.fqdn() for tt in u.get_tasktrackers(cluster)]

    oozie_host = u.get_oozie(cluster).fqdn() if u.get_oozie(cluster) else None
    hive_host = u.get_hiveserver(cluster).fqdn() if u.get_hiveserver(
        cluster) else None

    services = []
    if u.get_namenode(cluster):
        services += ['hdfs']

    if u.get_jobtracker(cluster):
        services += ['mapred']

    if oozie_host:
        services += ['oozie']
        services += ['pig']

    if hive_host:
        services += ['hive']

    LOG.debug("Add services: %s" % ', '.join(services))
    client.services.add(services)

    LOG.debug("Assign roles to hosts")
    client.services.hdfs.add_nodes('PrimaryNameNode', [nn_host])

    client.services.hdfs.add_nodes('DataNode', dn_hosts)
    if snn:
        client.services.hdfs.add_nodes('SecondaryNameNode', [snn_host])

    if oozie_host:
        client.services.oozie.add_nodes('Oozie', [oozie_host])

    if hive_host:
        client.services.hive.add_nodes('HiveServer', [hive_host])

    if jt_host:
        client.services.mapred.add_nodes('JobTracker', [jt_host])
        client.services.mapred.add_nodes('TaskTracker', tt_hosts)
Ejemplo n.º 6
0
    def scale_cluster(self, cluster, instances):
        self._setup_instances(cluster, instances)

        run.refresh_nodes(remote.get_remote(
            utils.get_namenode(cluster)), "dfsadmin")
        jt = utils.get_jobtracker(cluster)
        if jt:
            run.refresh_nodes(remote.get_remote(jt), "mradmin")

        self._start_tt_dn_processes(instances)
Ejemplo n.º 7
0
    def scale_cluster(self, cluster, instances):
        self._setup_instances(cluster, instances)

        run.refresh_nodes(remote.get_remote(utils.get_namenode(cluster)),
                          "dfsadmin")
        jt = utils.get_jobtracker(cluster)
        if jt:
            run.refresh_nodes(remote.get_remote(jt), "mradmin")

        self._start_tt_dn_processes(instances)
Ejemplo n.º 8
0
def generate_sahara_configs(cluster, node_group=None):
    nn_hostname = _get_hostname(utils.get_namenode(cluster))
    jt_hostname = _get_hostname(utils.get_jobtracker(cluster))
    oozie_hostname = _get_hostname(utils.get_oozie(cluster))
    hive_hostname = _get_hostname(utils.get_hiveserver(cluster))

    storage_path = node_group.storage_paths() if node_group else None

    # inserting common configs depends on provisioned VMs and HDFS placement
    # TODO(aignatov): should be moved to cluster context

    cfg = {
        'fs.default.name': 'hdfs://%s:8020' % nn_hostname,
        'dfs.name.dir': extract_hadoop_path(storage_path,
                                            '/lib/hadoop/hdfs/namenode'),
        'dfs.data.dir': extract_hadoop_path(storage_path,
                                            '/lib/hadoop/hdfs/datanode'),
        'dfs.hosts': '/etc/hadoop/dn.incl',
        'dfs.hosts.exclude': '/etc/hadoop/dn.excl',
    }

    if jt_hostname:
        mr_cfg = {
            'mapred.job.tracker': '%s:8021' % jt_hostname,
            'mapred.system.dir': extract_hadoop_path(storage_path,
                                                     '/mapred/mapredsystem'),
            'mapred.local.dir': extract_hadoop_path(storage_path,
                                                    '/lib/hadoop/mapred'),
            'mapred.hosts': '/etc/hadoop/tt.incl',
            'mapred.hosts.exclude': '/etc/hadoop/tt.excl',
        }
        cfg.update(mr_cfg)

    if oozie_hostname:
        o_cfg = {
            'hadoop.proxyuser.hadoop.hosts': "localhost," + oozie_hostname,
            'hadoop.proxyuser.hadoop.groups': 'hadoop',
        }
        cfg.update(o_cfg)
        LOG.debug('Applied Oozie configs for core-site.xml')
        cfg.update(o_h.get_oozie_required_xml_configs())
        LOG.debug('Applied Oozie configs for oozie-site.xml')

    if hive_hostname:
        h_cfg = {
            'hive.warehouse.subdir.inherit.perms': True,
            'javax.jdo.option.ConnectionURL':
            'jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true'
        }
        cfg.update(h_cfg)
        LOG.debug('Applied Hive config for hive metastore server')

    return cfg
Ejemplo n.º 9
0
    def _validate_additional_ng_scaling(self, cluster, additional):
        jt = u.get_jobtracker(cluster)
        scalable_processes = self._get_scalable_processes()

        for ng_id in additional:
            ng = self._get_by_id(cluster.node_groups, ng_id)
            if not set(ng.node_processes).issubset(scalable_processes):
                raise ex.NodeGroupCannotBeScaled(
                    ng.name, "Intel plugin cannot scale nodegroup"
                    " with processes: " + ' '.join(ng.node_processes))
            if not jt and 'tasktracker' in ng.node_processes:
                raise ex.NodeGroupCannotBeScaled(
                    ng.name, "Intel plugin cannot scale node group with "
                    "processes which have no master-processes run "
                    "in cluster")
Ejemplo n.º 10
0
    def start_cluster(self, cluster):
        nn_instance = utils.get_namenode(cluster)
        with remote.get_remote(nn_instance) as r:
            run.format_namenode(r)
            run.start_processes(r, "namenode")

        for snn in utils.get_secondarynamenodes(cluster):
            run.start_processes(remote.get_remote(snn), "secondarynamenode")

        jt_instance = utils.get_jobtracker(cluster)
        if jt_instance:
            run.start_processes(remote.get_remote(jt_instance), "jobtracker")

        self._start_tt_dn_processes(utils.get_instances(cluster))

        self._await_datanodes(cluster)

        LOG.info("Hadoop services in cluster %s have been started" %
                 cluster.name)

        oozie = utils.get_oozie(cluster)
        if oozie:
            with remote.get_remote(oozie) as r:
                if c_helper.is_mysql_enable(cluster):
                    run.mysql_start(r, oozie)
                    run.oozie_create_db(r)
                run.oozie_share_lib(r, nn_instance.hostname())
                run.start_oozie(r)
                LOG.info("Oozie service at '%s' has been started",
                         nn_instance.hostname())

        hive_server = utils.get_hiveserver(cluster)
        if hive_server:
            with remote.get_remote(hive_server) as r:
                run.hive_create_warehouse_dir(r)
                run.hive_copy_shared_conf(
                    r, edp.get_hive_shared_conf_path('hadoop'))

                if c_helper.is_mysql_enable(cluster):
                    if not oozie or hive_server.hostname() != oozie.hostname():
                        run.mysql_start(r, hive_server)
                    run.hive_create_db(r)
                    run.hive_metastore_start(r)
                    LOG.info("Hive Metastore server at %s has been started",
                             hive_server.hostname())

        LOG.info('Cluster %s has been started successfully' % cluster.name)
        self._set_cluster_info(cluster)
Ejemplo n.º 11
0
    def start_cluster(self, cluster):
        nn_instance = utils.get_namenode(cluster)
        with remote.get_remote(nn_instance) as r:
            run.format_namenode(r)
            run.start_processes(r, "namenode")

        for snn in utils.get_secondarynamenodes(cluster):
            run.start_processes(remote.get_remote(snn), "secondarynamenode")

        jt_instance = utils.get_jobtracker(cluster)
        if jt_instance:
            run.start_processes(remote.get_remote(jt_instance), "jobtracker")

        self._start_tt_dn_processes(utils.get_instances(cluster))

        self._await_datanodes(cluster)

        LOG.info("Hadoop services in cluster %s have been started" %
                 cluster.name)

        oozie = utils.get_oozie(cluster)
        if oozie:
            with remote.get_remote(oozie) as r:
                if c_helper.is_mysql_enable(cluster):
                    run.mysql_start(r, oozie)
                    run.oozie_create_db(r)
                run.oozie_share_lib(r, nn_instance.hostname())
                run.start_oozie(r)
                LOG.info("Oozie service at '%s' has been started",
                         nn_instance.hostname())

        hive_server = utils.get_hiveserver(cluster)
        if hive_server:
            with remote.get_remote(hive_server) as r:
                run.hive_create_warehouse_dir(r)
                run.hive_copy_shared_conf(
                    r, edp.get_hive_shared_conf_path('hadoop'))

                if c_helper.is_mysql_enable(cluster):
                    if not oozie or hive_server.hostname() != oozie.hostname():
                        run.mysql_start(r, hive_server)
                    run.hive_create_db(r)
                    run.hive_metastore_start(r)
                    LOG.info("Hive Metastore server at %s has been started",
                             hive_server.hostname())

        LOG.info('Cluster %s has been started successfully' % cluster.name)
        self._set_cluster_info(cluster)
Ejemplo n.º 12
0
    def _validate_additional_ng_scaling(self, cluster, additional):
        jt = u.get_jobtracker(cluster)
        scalable_processes = self._get_scalable_processes()

        for ng_id in additional:
            ng = self._get_by_id(cluster.node_groups, ng_id)
            if not set(ng.node_processes).issubset(scalable_processes):
                raise ex.NodeGroupCannotBeScaled(
                    ng.name, "Intel plugin cannot scale nodegroup"
                             " with processes: " +
                             ' '.join(ng.node_processes))
            if not jt and 'tasktracker' in ng.node_processes:
                raise ex.NodeGroupCannotBeScaled(
                    ng.name, "Intel plugin cannot scale node group with "
                             "processes which have no master-processes run "
                             "in cluster")
Ejemplo n.º 13
0
def start_cluster(cluster):
    client = c.IntelClient(u.get_instance(cluster, 'manager'), cluster.name)

    LOG.debug("Starting hadoop services")
    client.services.hdfs.start()

    if u.get_jobtracker(cluster):
        client.services.mapred.start()

    if u.get_hiveserver(cluster):
        client.services.hive.start()

    if u.get_oozie(cluster):
        LOG.info("Setup oozie")
        _setup_oozie(cluster)

        client.services.oozie.start()
Ejemplo n.º 14
0
def start_cluster(cluster):
    client = c.IntelClient(u.get_instance(cluster, 'manager'), cluster.name)

    LOG.debug("Starting hadoop services")
    client.services.hdfs.start()

    if u.get_jobtracker(cluster):
        client.services.mapred.start()

    if u.get_hiveserver(cluster):
        client.services.hive.start()

    if u.get_oozie(cluster):
        LOG.info("Setup oozie")
        _setup_oozie(cluster)

        client.services.oozie.start()
Ejemplo n.º 15
0
    def decommission_nodes(self, cluster, instances):
        tts = utils.get_tasktrackers(cluster)
        dns = utils.get_datanodes(cluster)
        decommission_dns = False
        decommission_tts = False

        for i in instances:
            if 'datanode' in i.node_group.node_processes:
                dns.remove(i)
                decommission_dns = True
            if 'tasktracker' in i.node_group.node_processes:
                tts.remove(i)
                decommission_tts = True

        nn = utils.get_namenode(cluster)
        jt = utils.get_jobtracker(cluster)

        if decommission_tts:
            sc.decommission_tt(jt, instances, tts)
        if decommission_dns:
            sc.decommission_dn(nn, instances, dns)
Ejemplo n.º 16
0
    def decommission_nodes(self, cluster, instances):
        tts = utils.get_tasktrackers(cluster)
        dns = utils.get_datanodes(cluster)
        decommission_dns = False
        decommission_tts = False

        for i in instances:
            if 'datanode' in i.node_group.node_processes:
                dns.remove(i)
                decommission_dns = True
            if 'tasktracker' in i.node_group.node_processes:
                tts.remove(i)
                decommission_tts = True

        nn = utils.get_namenode(cluster)
        jt = utils.get_jobtracker(cluster)

        if decommission_tts:
            sc.decommission_tt(jt, instances, tts)
        if decommission_dns:
            sc.decommission_dn(nn, instances, dns)
Ejemplo n.º 17
0
    def _set_cluster_info(self, cluster):
        nn = utils.get_namenode(cluster)
        jt = utils.get_jobtracker(cluster)
        oozie = utils.get_oozie(cluster)
        info = {}

        if jt:
            ui_port = c_helper.get_port_from_config(
                'MapReduce', 'mapred.job.tracker.http.address', cluster)
            jt_port = c_helper.get_port_from_config('MapReduce',
                                                    'mapred.job.tracker',
                                                    cluster)

            info['MapReduce'] = {
                'Web UI': 'http://%s:%s' % (jt.management_ip, ui_port),
                'JobTracker': '%s:%s' % (jt.hostname(), jt_port)
            }

        if nn:
            ui_port = c_helper.get_port_from_config('HDFS', 'dfs.http.address',
                                                    cluster)
            nn_port = c_helper.get_port_from_config('HDFS', 'fs.default.name',
                                                    cluster)

            info['HDFS'] = {
                'Web UI': 'http://%s:%s' % (nn.management_ip, ui_port),
                'NameNode': 'hdfs://%s:%s' % (nn.hostname(), nn_port)
            }

        if oozie:
            #TODO(yrunts) change from hardcode value
            info['JobFlow'] = {
                'Oozie': 'http://%s:11000' % oozie.management_ip
            }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
Ejemplo n.º 18
0
    def _set_cluster_info(self, cluster):
        nn = utils.get_namenode(cluster)
        jt = utils.get_jobtracker(cluster)
        oozie = utils.get_oozie(cluster)
        info = {}

        if jt:
            ui_port = c_helper.get_port_from_config(
                'MapReduce', 'mapred.job.tracker.http.address', cluster)
            jt_port = c_helper.get_port_from_config(
                'MapReduce', 'mapred.job.tracker', cluster)

            info['MapReduce'] = {
                'Web UI': 'http://%s:%s' % (jt.management_ip, ui_port),
                'JobTracker': '%s:%s' % (jt.hostname(), jt_port)
            }

        if nn:
            ui_port = c_helper.get_port_from_config('HDFS', 'dfs.http.address',
                                                    cluster)
            nn_port = c_helper.get_port_from_config('HDFS', 'fs.default.name',
                                                    cluster)

            info['HDFS'] = {
                'Web UI': 'http://%s:%s' % (nn.management_ip, ui_port),
                'NameNode': 'hdfs://%s:%s' % (nn.hostname(), nn_port)
            }

        if oozie:
            #TODO(yrunts) change from hardcode value
            info['JobFlow'] = {
                'Oozie': 'http://%s:11000' % oozie.management_ip
            }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})