Exemple #1
0
    def test_get_instances(self):
        res = pu.get_instances(self.cluster)
        self.assertEqual([
            FakeInstance("1"), FakeInstance("2"), FakeInstance("3")], res)

        res = pu.get_instances(self.cluster, "node_process1")
        self.assertEqual([FakeInstance("1")], res)
Exemple #2
0
 def test_get_instances(self):
     self.assertEqual(5, len(u.get_instances(self.c1)))
     self.assertEqual([], u.get_instances(self.c1, "wrong-process"))
     self.assertEqual(self.ng1.instances, u.get_instances(self.c1, "nn"))
     instances = list(self.ng2.instances)
     instances += self.ng3.instances
     self.assertEqual(instances, u.get_instances(self.c1, "dn"))
Exemple #3
0
def _configure_hdfs_site(cluster, blueprint):
    props = _find_hdfs_site(blueprint)

    props["dfs.client.failover.proxy.provider.hdfs-ha"] = (
        "org.apache.hadoop.hdfs.server.namenode.ha." "ConfiguredFailoverProxyProvider"
    )
    props["dfs.ha.automatic-failover.enabled"] = "true"
    props["dfs.ha.fencing.methods"] = "shell(/bin/true)"
    props["dfs.nameservices"] = "hdfs-ha"

    jns = utils.get_instances(cluster, p_common.JOURNAL_NODE)
    journalnodes_concat = ";".join(["%s:8485" % i.fqdn() for i in jns])
    journalnodes_value = "qjournal://%s/hdfs-ha" % journalnodes_concat
    props["dfs.namenode.shared.edits.dir"] = journalnodes_value

    nns = utils.get_instances(cluster, p_common.NAMENODE)
    nn_id_concat = ",".join([i.instance_name for i in nns])
    props["dfs.ha.namenodes.hdfs-ha"] = nn_id_concat

    props["dfs.namenode.http-address"] = "%s:50070" % nns[0].fqdn()
    props["dfs.namenode.https-address"] = "%s:50470" % nns[0].fqdn()
    for i in nns:
        props["dfs.namenode.http-address.hdfs-ha.%s" % i.instance_name] = "%s:50070" % i.fqdn()
        props["dfs.namenode.https-address.hdfs-ha.%s" % i.instance_name] = "%s:50470" % i.fqdn()
        props["dfs.namenode.rpc-address.hdfs-ha.%s" % i.instance_name] = "%s:8020" % i.fqdn()

    return blueprint
Exemple #4
0
 def test_get_instances(self):
     self.assertEqual(len(u.get_instances(self.c1)), 5)
     self.assertEqual(u.get_instances(self.c1, 'wrong-process'), [])
     self.assertEqual(u.get_instances(self.c1, 'nn'),
                      self.ng1.instances)
     instances = list(self.ng2.instances)
     instances += self.ng3.instances
     self.assertEqual(u.get_instances(self.c1, 'dn'), instances)
Exemple #5
0
def restart_nns_and_rms(cluster):
    nns = plugin_utils.get_instances(cluster, p_common.NAMENODE)
    for nn in nns:
        restart_namenode(cluster, nn)

    rms = plugin_utils.get_instances(cluster, p_common.RESOURCEMANAGER)
    for rm in rms:
        restart_resourcemanager(cluster, rm)
Exemple #6
0
 def deploy_kerberos(self, cluster):
     all_instances = plugin_utils.get_instances(cluster)
     namenodes = plugin_utils.get_instances(cluster, 'namenode')
     server = None
     if len(namenodes) > 0:
         server = namenodes[0]
     elif len(all_instances) > 0:
         server = all_instances[0]
     if server:
         krb.deploy_infrastructure(cluster, server)
Exemple #7
0
 def get_plain_instances(self):
     fs = self.get_fs_instances()
     zk = self.get_zk_instances()
     cldb = self.get_cldb_instances()
     zk_fs_cldb = zk + fs + cldb
     instances = u.get_instances(self.get_cluster())
     return [i for i in instances if i not in zk_fs_cldb]
Exemple #8
0
    def start_cluster(self, cluster):
        nn_instance = utils.get_instance(cluster, "namenode")
        sm_instance = utils.get_instance(cluster, "master")
        dn_instances = utils.get_instances(cluster, "datanode")

        # Start the name node
        with remote.get_remote(nn_instance) as r:
            run.format_namenode(r)
            run.start_processes(r, "namenode")

        # start the data nodes
        self._start_slave_datanode_processes(dn_instances)

        LOG.info(_LI("Hadoop services in cluster %s have been started"),
                 cluster.name)

        with remote.get_remote(nn_instance) as r:
            r.execute_command("sudo -u hdfs hdfs dfs -mkdir -p /user/$USER/")
            r.execute_command("sudo -u hdfs hdfs dfs -chown $USER "
                              "/user/$USER/")

        # start spark nodes
        if sm_instance:
            with remote.get_remote(sm_instance) as r:
                run.start_spark_master(r, self._spark_home(cluster))
                LOG.info(_LI("Spark service at '%s' has been started"),
                         sm_instance.hostname())

        LOG.info(_LI('Cluster %s has been started successfully'),
                 cluster.name)
        self._set_cluster_info(cluster)
Exemple #9
0
def start_cluster(cluster):
    if pu.get_oozie(cluster):
        _install_extjs(cluster)

    if pu.get_hive_metastore(cluster):
        _configure_hive(cluster)

    if pu.get_sentry(cluster):
        _configure_sentry(cluster)

    cu.first_run(cluster)

    if c_helper.is_swift_enabled(cluster):
        instances = gu.get_instances(cluster)
        _configure_swift(instances)

    if pu.get_hive_metastore(cluster):
        _put_hive_hdfs_xml(cluster)

    if pu.get_flumes(cluster):
        cm_cluster = cu.get_cloudera_cluster(cluster)
        flume = cm_cluster.get_service(cu.FLUME_SERVICE_NAME)
        cu.start_service(flume)

    cu.restart_mgmt_service(cluster)
    def start_cluster(self, cluster):
        nn = vu.get_namenode(cluster)
        run.format_namenode(nn)
        run.start_hadoop_process(nn, 'namenode')

        for snn in vu.get_secondarynamenodes(cluster):
            run.start_hadoop_process(snn, 'secondarynamenode')

        rm = vu.get_resourcemanager(cluster)
        if rm:
            run.start_yarn_process(rm, 'resourcemanager')

        run.start_dn_nm_processes(utils.get_instances(cluster))

        run.await_datanodes(cluster)

        hs = vu.get_historyserver(cluster)
        if hs:
            run.start_historyserver(hs)

        oo = vu.get_oozie(cluster)
        if oo:
            run.start_oozie_process(self.pctx, oo)

        hiveserver = vu.get_hiveserver(cluster)
        if hiveserver:
            run.start_hiveserver_process(self.pctx, hiveserver)

        self._set_cluster_info(cluster)
Exemple #11
0
    def _validate_existing_ng_scaling(self, cluster, existing):
        scalable_processes = self._get_scalable_processes()
        dn_to_delete = 0
        for ng in cluster.node_groups:
            if ng.id in existing:
                if ng.count > existing[ng.id] and ("datanode" in
                                                   ng.node_processes):
                    dn_to_delete += ng.count - existing[ng.id]
                if not set(ng.node_processes).issubset(scalable_processes):
                    raise ex.NodeGroupCannotBeScaled(
                        ng.name, _("Spark plugin cannot scale nodegroup"
                                   " with processes: %s") %
                        ' '.join(ng.node_processes))

        dn_amount = len(utils.get_instances(cluster, "datanode"))
        rep_factor = utils.get_config_value_or_default('HDFS',
                                                       "dfs.replication",
                                                       cluster)

        if dn_to_delete > 0 and dn_amount - dn_to_delete < rep_factor:
            raise ex.ClusterCannotBeScaled(
                cluster.name, _("Spark plugin cannot shrink cluster because "
                                "there would be not enough nodes for HDFS "
                                "replicas (replication factor is %s)") %
                rep_factor)
Exemple #12
0
    def _extract_configs_to_extra(self, cluster):
        sp_master = utils.get_instance(cluster, "master")
        sp_slaves = utils.get_instances(cluster, "slave")

        extra = dict()

        config_master = config_slaves = ''
        if sp_master is not None:
            config_master = c_helper.generate_spark_env_configs(cluster)

        if sp_slaves is not None:
            slavenames = []
            for slave in sp_slaves:
                slavenames.append(slave.hostname())
            config_slaves = c_helper.generate_spark_slaves_configs(slavenames)
        else:
            config_slaves = "\n"

        # Any node that might be used to run spark-submit will need
        # these libs for swift integration
        config_defaults = c_helper.generate_spark_executor_classpath(cluster)

        extra['job_cleanup'] = c_helper.generate_job_cleanup_config(cluster)

        extra['sp_master'] = config_master
        extra['sp_slaves'] = config_slaves
        extra['sp_defaults'] = config_defaults

        if c_helper.is_data_locality_enabled(cluster):
            topology_data = th.generate_topology_map(
                cluster, CONF.enable_hypervisor_awareness)
            extra['topology_data'] = "\n".join(
                [k + " " + v for k, v in topology_data.items()]) + "\n"

        return extra
Exemple #13
0
def setup_agents(cluster, instances=None):
    LOG.debug("Set up Ambari agents")
    manager_address = plugin_utils.get_instance(
        cluster, p_common.AMBARI_SERVER).fqdn()
    if not instances:
        instances = plugin_utils.get_instances(cluster)
    _setup_agents(instances, manager_address)
Exemple #14
0
def _confgure_hbase_site(cluster, blueprint):
    props = _find_hbase_site(blueprint)

    props["hbase.regionserver.global.memstore.lowerLimit"] = "0.38"
    props["hbase.regionserver.global.memstore.upperLimit"] = "0.4"
    props["hbase.regionserver.handler.count"] = "60"
    props["hbase.regionserver.info.port"] = "60030"
    props["hbase.regionserver.storefile.refresh.period"] = "20"

    props["hbase.rootdir"] = "hdfs://hdfs-ha/apps/hbase/data"

    props["hbase.security.authentication"] = "simple"
    props["hbase.security.authorization"] = "false"
    props["hbase.superuser"] = "******"
    props["hbase.tmp.dir"] = "/hadoop/hbase"
    props["hbase.zookeeper.property.clientPort"] = "2181"

    zk_instances = utils.get_instances(cluster, p_common.ZOOKEEPER_SERVER)
    zk_quorum_value = ",".join([i.fqdn() for i in zk_instances])
    props["hbase.zookeeper.quorum"] = zk_quorum_value

    props["hbase.zookeeper.useMulti"] = "true"
    props["hfile.block.cache.size"] = "0.40"
    props["zookeeper.session.timeout"] = "30000"
    props["zookeeper.znode.parent"] = "/hbase-unsecure"

    return blueprint
Exemple #15
0
def _set_default_fs(cluster, blueprint, ha_type):
    if ha_type == p_common.NAMENODE_HA:
        _find_core_site(blueprint)["fs.defaultFS"] = "hdfs://hdfs-ha"
    elif ha_type == p_common.RESOURCEMANAGER_HA:
        nn_instance = utils.get_instances(cluster, p_common.NAMENODE)[0]
        _find_core_site(blueprint)["fs.defaultFS"] = "hdfs://%s:8020" % nn_instance.fqdn()
    return blueprint
Exemple #16
0
    def start_cluster(self, cluster):
        nn_instance = utils.get_instance(cluster, "namenode")
        dn_instances = utils.get_instances(cluster, "datanode")
        zep_instance = utils.get_instance(cluster, "zeppelin")

        # Start the name node
        self._start_namenode(nn_instance)

        # start the data nodes
        self._start_datanode_processes(dn_instances)

        LOG.info(_LI("Hadoop services have been started"))

        with remote.get_remote(nn_instance) as r:
            r.execute_command("sudo -u hdfs hdfs dfs -mkdir -p /user/$USER/")
            r.execute_command("sudo -u hdfs hdfs dfs -chown $USER "
                              "/user/$USER/")

        # start spark nodes
        self.start_spark(cluster)

        # start zeppelin, if necessary
        if zep_instance:
            self._start_zeppelin(zep_instance)

        LOG.info(_LI('Cluster has been started successfully'))
        self._set_cluster_info(cluster)
Exemple #17
0
    def _extract_configs_to_extra(self, cluster):
        st_master = utils.get_instance(cluster, "nimbus")
        zk_servers = utils.get_instances(cluster, "zookeeper")

        extra = dict()

        config_instances = ''
        if st_master is not None:
            if zk_servers is not None:
                zknames = []
                for zk in zk_servers:
                    zknames.append(zk.hostname())

            config_instances = c_helper.generate_storm_config(
                st_master.hostname(),
                zknames)

        config = self._convert_dict_to_yaml(config_instances)
        supervisor_conf = c_helper.generate_slave_supervisor_conf()
        nimbus_ui_conf = c_helper.generate_master_supervisor_conf()
        zk_conf = c_helper.generate_zookeeper_conf()

        for ng in cluster.node_groups:
            extra[ng.id] = {
                'st_instances': config,
                'slave_sv_conf': supervisor_conf,
                'master_sv_conf': nimbus_ui_conf,
                'zk_conf': zk_conf
            }

        return extra
Exemple #18
0
def _set_primary_and_standby_namenode(cluster, blueprint):
    props = _find_hadoop_env(blueprint)
    nns = utils.get_instances(cluster, p_common.NAMENODE)
    props["dfs_ha_initial_namenode_active"] = nns[0].fqdn()
    props["dfs_ha_initial_namenode_standby"] = nns[1].fqdn()

    return blueprint
Exemple #19
0
 def start_cluster(self, cluster):
     self._set_cluster_info(cluster)
     deploy.start_cluster(cluster)
     cluster_instances = plugin_utils.get_instances(cluster)
     swift_helper.install_ssl_certs(cluster_instances)
     deploy.add_hadoop_swift_jar(cluster_instances)
     deploy.prepare_hive(cluster)
Exemple #20
0
def _clear_exclude_files(cluster):
    for instance in u.get_instances(cluster):
        with instance.remote() as r:
            r.execute_command(
                'sudo su - -c "echo > %s/dn-exclude" hadoop' % HADOOP_CONF_DIR)
            r.execute_command(
                'sudo su - -c "echo > %s/nm-exclude" hadoop' % HADOOP_CONF_DIR)
Exemple #21
0
def configure_cluster_for_hdfs(cluster, data_source_url):
    host = urlparse.urlparse(data_source_url).hostname

    etc_hosts_information = _get_cluster_hosts_information(host, cluster)
    if etc_hosts_information is None:
        # Ip address hasn't been resolved, the last chance is for VM itself
        return

    # If the cluster was already configured for this data source
    # there's no need to configure it again
    if _is_cluster_configured(cluster, etc_hosts_information.splitlines()):
        return

    etc_hosts_update = ('/tmp/etc-hosts-update'
                        '.%s' % six.text_type(uuidutils.generate_uuid()))
    tmp_etc_hosts = ('/tmp/etc-hosts'
                     '.%s' % six.text_type(uuidutils.generate_uuid()))
    update_etc_hosts_cmd = (
        'cat %(etc_hosts_update)s /etc/hosts | '
        'sort | uniq > %(tmp_etc_hosts)s && '
        'cat %(tmp_etc_hosts)s > /etc/hosts && '
        'rm -f %(tmp_etc_hosts)s %(etc_hosts_update)s' %
        {'etc_hosts_update': etc_hosts_update, 'tmp_etc_hosts': tmp_etc_hosts})

    for inst in u.get_instances(cluster):
        with inst.remote() as r:
            r.write_file_to(etc_hosts_update, etc_hosts_information)
            r.execute_command(update_etc_hosts_cmd, run_as_root=True)
Exemple #22
0
    def _setup_instances(self, cluster, instances=None):
        extra = self._extract_configs_to_extra(cluster)

        if instances is None:
            instances = utils.get_instances(cluster)

        self._push_configs_to_nodes(cluster, extra, instances)
Exemple #23
0
    def start_cluster(self, cluster):
        sm_instance = utils.get_instance(cluster, "nimbus")
        sl_instances = utils.get_instances(cluster, "supervisor")
        zk_instances = utils.get_instances(cluster, "zookeeper")

        # start zookeeper processes
        self._start_zookeeper_processes(zk_instances)

        # start storm master
        if sm_instance:
            self._start_storm_master(sm_instance)

        # start storm slaves
        self._start_slave_processes(sl_instances)

        LOG.info(_LI("Cluster {cluster} has been started successfully").format(cluster=cluster.name))
        self._set_cluster_info(cluster)
Exemple #24
0
def _configure_yarn_site(cluster, blueprint):
    props = _find_yarn_site(blueprint)
    name = cluster.name
    rm_instances = utils.get_instances(cluster, p_common.RESOURCEMANAGER)

    props["hadoop.registry.rm.enabled"] = "false"

    zk_instances = utils.get_instances(cluster, p_common.ZOOKEEPER_SERVER)

    zks = ",".join(["%s:2181" % i.fqdn() for i in zk_instances])
    props["yarn.resourcemanager.zk-address"] = zks

    hs = utils.get_instance(cluster, p_common.HISTORYSERVER)
    props["yarn.log.server.url"] = "%s:19888/jobhistory/logs/" % hs.fqdn()

    props["yarn.resourcemanager.address"] = "%s:8050" % rm_instances[0].fqdn()
    props["yarn.resourcemanager.admin.address"] = "%s:8141" % rm_instances[0].fqdn()
    props["yarn.resourcemanager.cluster-id"] = name
    props["yarn.resourcemanager.ha.automatic-failover.zk-base-path"] = "/yarn-leader-election"
    props["yarn.resourcemanager.ha.enabled"] = "true"

    rm_id_concat = ",".join([i.instance_name for i in rm_instances])
    props["yarn.resourcemanager.ha.rm-ids"] = rm_id_concat

    for i in rm_instances:
        props["yarn.resourcemanager.hostname.%s" % i.instance_name] = i.fqdn()
        props["yarn.resourcemanager.webapp.address.%s" % i.instance_name] = "%s:8088" % i.fqdn()
        props["yarn.resourcemanager.webapp.https.address.%s" % i.instance_name] = "%s:8090" % i.fqdn()

    props["yarn.resourcemanager.hostname"] = rm_instances[0].fqdn()
    props["yarn.resourcemanager.recovery.enabled"] = "true"
    props["yarn.resourcemanager.resource-tracker.address"] = "%s:8025" % rm_instances[0].fqdn()
    props["yarn.resourcemanager.scheduler.address"] = "%s:8030" % rm_instances[0].fqdn()
    props["yarn.resourcemanager.store.class"] = (
        "org.apache.hadoop.yarn.server.resourcemanager.recovery." "ZKRMStateStore"
    )
    props["yarn.resourcemanager.webapp.address"] = "%s:8088" % rm_instances[0].fqdn()
    props["yarn.resourcemanager.webapp.https.address"] = "%s:8090" % rm_instances[0].fqdn()

    tls_instance = utils.get_instance(cluster, p_common.APP_TIMELINE_SERVER)
    props["yarn.timeline-service.address"] = "%s:10200" % tls_instance.fqdn()
    props["yarn.timeline-service.webapp.address"] = "%s:8188" % tls_instance.fqdn()
    props["yarn.timeline-service.webapp.https.address"] = "%s:8190" % tls_instance.fqdn()

    return blueprint
Exemple #25
0
def setup_agents(cluster):
    LOG.debug("Set up Ambari agents")
    manager_address = plugin_utils.get_instance(
        cluster, p_common.AMBARI_SERVER).fqdn()
    with context.ThreadGroup() as tg:
        for inst in plugin_utils.get_instances(cluster):
            tg.spawn("hwx-agent-setup-%s" % inst.id,
                     _setup_agent, inst, manager_address)
    LOG.debug("Ambari agents has been installed")
Exemple #26
0
def _update_exclude_files(cluster, instances):
    datanodes = _get_instances_with_service(instances, "datanode")
    nodemanagers = _get_instances_with_service(instances, "nodemanager")
    dn_hosts = u.generate_fqdn_host_names(datanodes)
    nm_hosts = u.generate_fqdn_host_names(nodemanagers)
    for instance in u.get_instances(cluster):
        with instance.remote() as r:
            r.execute_command("sudo su - -c \"echo '%s' > %s/dn-exclude\" hadoop" % (dn_hosts, HADOOP_CONF_DIR))
            r.execute_command("sudo su - -c \"echo '%s' > %s/nm-exclude\" hadoop" % (nm_hosts, HADOOP_CONF_DIR))
Exemple #27
0
def disable_repos(cluster):
    if configs.use_base_repos_needed(cluster):
        LOG.debug("Using base repos")
        return
    instances = plugin_utils.get_instances(cluster)
    with context.ThreadGroup() as tg:
        for inst in instances:
            tg.spawn("disable-repos-%s" % inst.instance_name,
                     _disable_repos_on_inst, inst)
def _get_cluster_hosts_information(host, cluster):
    for clust in conductor.cluster_get_all(context.ctx()):
        if clust.id == cluster.id:
            continue

        for i in u.get_instances(clust):
            if i.instance_name == host:
                return g.generate_etc_hosts(clust)

    return None
Exemple #29
0
    def _setup_instances(self, cluster, instances):
        if (CONF.use_identity_api_v3 and CONF.use_domain_for_proxy_users and
                vu.get_hiveserver(cluster) and
                c_helper.is_swift_enable(cluster)):
            cluster = proxy.create_proxy_user_for_cluster(cluster)
            instances = utils.get_instances(cluster)

        extra = self._extract_configs_to_extra(cluster)
        cluster = conductor.cluster_get(context.ctx(), cluster)
        self._push_configs_to_nodes(cluster, extra, instances)
Exemple #30
0
    def configure_swift(self, cluster, instances=None):
        if self.c_helper.is_swift_enabled(cluster):
            if not instances:
                instances = u.get_instances(cluster)
            cpo.add_provisioning_step(cluster.id, _("Configure Swift"), len(instances))

            with context.ThreadGroup() as tg:
                for i in instances:
                    tg.spawn("cdh-swift-conf-%s" % i.instance_name, self._configure_swift_to_inst, i)
            swift_helper.install_ssl_certs(instances)
Exemple #31
0
def validate_cluster_creating(cluster):
    mng_count = _get_inst_count(cluster, 'CLOUDERA_MANAGER')
    if mng_count != 1:
        raise ex.InvalidComponentCountException('CLOUDERA_MANAGER', 1,
                                                mng_count)

    zk_count = _get_inst_count(cluster, 'ZOOKEEPER_SERVER')
    nn_count = _get_inst_count(cluster, 'HDFS_NAMENODE')
    if nn_count != 1:
        raise ex.InvalidComponentCountException('HDFS_NAMENODE', 1, nn_count)

    snn_count = _get_inst_count(cluster, 'HDFS_SECONDARYNAMENODE')
    if snn_count != 1:
        raise ex.InvalidComponentCountException('HDFS_SECONDARYNAMENODE', 1,
                                                snn_count)

    dn_count = _get_inst_count(cluster, 'HDFS_DATANODE')
    replicas = PU.get_config_value('HDFS', 'dfs_replication', cluster)
    if dn_count < replicas:
        raise ex.InvalidComponentCountException(
            'HDFS_DATANODE', replicas, dn_count,
            _('Number of datanodes must be not less than dfs_replication.'))

    jn_count = _get_inst_count(cluster, 'HDFS_JOURNALNODE')
    require_anti_affinity = PU.c_helper.get_required_anti_affinity(cluster)
    if jn_count > 0:
        if jn_count < 3:
            raise ex.InvalidComponentCountException('HDFS_JOURNALNODE',
                                                    _('not less than 3'),
                                                    jn_count)
        if not jn_count % 2:
            raise ex.InvalidComponentCountException('HDFS_JOURNALNODE',
                                                    _('be odd'), jn_count)
        if zk_count < 1:
            raise ex.RequiredServiceMissingException('ZOOKEEPER',
                                                     required_by='HDFS HA')
        if require_anti_affinity:
            if 'HDFS_SECONDARYNAMENODE' not in _get_anti_affinity(cluster):
                raise ex.NameNodeHAConfigurationError(
                    _('HDFS_SECONDARYNAMENODE should be enabled '
                      'in anti_affinity.'))
            if 'HDFS_NAMENODE' not in _get_anti_affinity(cluster):
                raise ex.NameNodeHAConfigurationError(
                    _('HDFS_NAMENODE should be enabled in anti_affinity.'))

    rm_count = _get_inst_count(cluster, 'YARN_RESOURCEMANAGER')
    if rm_count > 1:
        raise ex.InvalidComponentCountException('YARN_RESOURCEMANAGER',
                                                _('0 or 1'), rm_count)

    stdb_rm_count = _get_inst_count(cluster, 'YARN_STANDBYRM')
    if stdb_rm_count > 1:
        raise ex.InvalidComponentCountException('YARN_STANDBYRM', _('0 or 1'),
                                                stdb_rm_count)
    if stdb_rm_count > 0:
        if rm_count < 1:
            raise ex.RequiredServiceMissingException('YARN_RESOURCEMANAGER',
                                                     required_by='RM HA')
        if zk_count < 1:
            raise ex.RequiredServiceMissingException('ZOOKEEPER',
                                                     required_by='RM HA')
        if require_anti_affinity:
            if 'YARN_RESOURCEMANAGER' not in _get_anti_affinity(cluster):
                raise ex.ResourceManagerHAConfigurationError(
                    _('YARN_RESOURCEMANAGER should be enabled in '
                      'anti_affinity.'))
            if 'YARN_STANDBYRM' not in _get_anti_affinity(cluster):
                raise ex.ResourceManagerHAConfigurationError(
                    _('YARN_STANDBYRM should be enabled in anti_affinity.'))

    hs_count = _get_inst_count(cluster, 'YARN_JOBHISTORY')
    if hs_count > 1:
        raise ex.InvalidComponentCountException('YARN_JOBHISTORY', _('0 or 1'),
                                                hs_count)

    if rm_count > 0 and hs_count < 1:
        raise ex.RequiredServiceMissingException(
            'YARN_JOBHISTORY', required_by='YARN_RESOURCEMANAGER')

    nm_count = _get_inst_count(cluster, 'YARN_NODEMANAGER')
    if rm_count == 0:
        if nm_count > 0:
            raise ex.RequiredServiceMissingException(
                'YARN_RESOURCEMANAGER', required_by='YARN_NODEMANAGER')

    oo_count = _get_inst_count(cluster, 'OOZIE_SERVER')
    if oo_count > 1:
        raise ex.InvalidComponentCountException('OOZIE_SERVER', _('0 or 1'),
                                                oo_count)

    if oo_count == 1:
        if dn_count < 1:
            raise ex.RequiredServiceMissingException(
                'HDFS_DATANODE', required_by='OOZIE_SERVER')

        if nm_count < 1:
            raise ex.RequiredServiceMissingException(
                'YARN_NODEMANAGER', required_by='OOZIE_SERVER')

        if hs_count != 1:
            raise ex.RequiredServiceMissingException(
                'YARN_JOBHISTORY', required_by='OOZIE_SERVER')

    hms_count = _get_inst_count(cluster, 'HIVE_METASTORE')
    hvs_count = _get_inst_count(cluster, 'HIVE_SERVER2')
    whc_count = _get_inst_count(cluster, 'HIVE_WEBHCAT')

    if hms_count and rm_count < 1:
        raise ex.RequiredServiceMissingException('YARN_RESOURCEMANAGER',
                                                 required_by='HIVE_METASTORE')

    if hms_count and not hvs_count:
        raise ex.RequiredServiceMissingException('HIVE_SERVER2',
                                                 required_by='HIVE_METASTORE')

    if hvs_count and not hms_count:
        raise ex.RequiredServiceMissingException('HIVE_METASTORE',
                                                 required_by='HIVE_SERVER2')

    if whc_count and not hms_count:
        raise ex.RequiredServiceMissingException('HIVE_METASTORE',
                                                 required_by='HIVE_WEBHCAT')

    hue_count = _get_inst_count(cluster, 'HUE_SERVER')
    if hue_count > 1:
        raise ex.InvalidComponentCountException('HUE_SERVER', _('0 or 1'),
                                                hue_count)

    shs_count = _get_inst_count(cluster, 'SPARK_YARN_HISTORY_SERVER')
    if shs_count > 1:
        raise ex.InvalidComponentCountException('SPARK_YARN_HISTORY_SERVER',
                                                _('0 or 1'), shs_count)
    if shs_count and not rm_count:
        raise ex.RequiredServiceMissingException(
            'YARN_RESOURCEMANAGER', required_by='SPARK_YARN_HISTORY_SERVER')

    if oo_count < 1 and hue_count:
        raise ex.RequiredServiceMissingException('OOZIE_SERVER',
                                                 required_by='HUE_SERVER')

    if hms_count < 1 and hue_count:
        raise ex.RequiredServiceMissingException('HIVE_METASTORE',
                                                 required_by='HUE_SERVER')

    hbm_count = _get_inst_count(cluster, 'HBASE_MASTER')
    hbr_count = _get_inst_count(cluster, 'HBASE_REGIONSERVER')

    if hbm_count >= 1:
        if zk_count < 1:
            raise ex.RequiredServiceMissingException('ZOOKEEPER',
                                                     required_by='HBASE')
        if hbr_count < 1:
            raise ex.InvalidComponentCountException('HBASE_REGIONSERVER',
                                                    _('at least 1'), hbr_count)
    elif hbr_count >= 1:
        raise ex.InvalidComponentCountException('HBASE_MASTER',
                                                _('at least 1'), hbm_count)

    a_count = _get_inst_count(cluster, 'FLUME_AGENT')
    if a_count >= 1:
        if dn_count < 1:
            raise ex.RequiredServiceMissingException('HDFS_DATANODE',
                                                     required_by='FLUME_AGENT')

    snt_count = _get_inst_count(cluster, 'SENTRY_SERVER')
    if snt_count > 1:
        raise ex.InvalidComponentCountException('SENTRY_SERVER', _('0 or 1'),
                                                snt_count)
    if snt_count == 1:
        if dn_count < 1:
            raise ex.RequiredServiceMissingException(
                'HDFS_DATANODE', required_by='SENTRY_SERVER')
        if zk_count < 1:
            raise ex.RequiredServiceMissingException(
                'ZOOKEEPER', required_by='SENTRY_SERVER')

    slr_count = _get_inst_count(cluster, 'SOLR_SERVER')
    if slr_count >= 1:
        if dn_count < 1:
            raise ex.RequiredServiceMissingException('HDFS_DATANODE',
                                                     required_by='SOLR_SERVER')
        if zk_count < 1:
            raise ex.RequiredServiceMissingException('ZOOKEEPER',
                                                     required_by='SOLR_SERVER')

    s2s_count = _get_inst_count(cluster, 'SQOOP_SERVER')
    if s2s_count > 1:
        raise ex.InvalidComponentCountException('SQOOP_SERVER', _('0 or 1'),
                                                s2s_count)
    if s2s_count == 1:
        if dn_count < 1:
            raise ex.RequiredServiceMissingException(
                'HDFS_DATANODE', required_by='SQOOP_SERVER')
        if nm_count < 1:
            raise ex.RequiredServiceMissingException(
                'YARN_NODEMANAGER', required_by='SQOOP_SERVER')
        if hs_count != 1:
            raise ex.RequiredServiceMissingException(
                'YARN_JOBHISTORY', required_by='SQOOP_SERVER')

    lhbi_count = _get_inst_count(cluster, 'HBASE_INDEXER')
    if lhbi_count >= 1:
        if dn_count < 1:
            raise ex.RequiredServiceMissingException(
                'HDFS_DATANODE', required_by='HBASE_INDEXER')
        if zk_count < 1:
            raise ex.RequiredServiceMissingException(
                'ZOOKEEPER', required_by='HBASE_INDEXER')
        if slr_count < 1:
            raise ex.RequiredServiceMissingException(
                'SOLR_SERVER', required_by='HBASE_INDEXER')
        if hbm_count < 1:
            raise ex.RequiredServiceMissingException(
                'HBASE_MASTER', required_by='HBASE_INDEXER')

    ics_count = _get_inst_count(cluster, 'IMPALA_CATALOGSERVER')
    iss_count = _get_inst_count(cluster, 'IMPALA_STATESTORE')
    id_count = _get_inst_count(cluster, 'IMPALAD')
    if ics_count > 1:
        raise ex.InvalidComponentCountException('IMPALA_CATALOGSERVER',
                                                _('0 or 1'), ics_count)
    if iss_count > 1:
        raise ex.InvalidComponentCountException('IMPALA_STATESTORE',
                                                _('0 or 1'), iss_count)
    if ics_count == 1:
        datanodes = set(u.get_instances(cluster, "HDFS_DATANODE"))
        impalads = set(u.get_instances(cluster, "IMPALAD"))
        if len(datanodes ^ impalads) > 0:
            raise ex.InvalidClusterTopology(
                _("IMPALAD must be installed on every HDFS_DATANODE"))

        if iss_count != 1:
            raise ex.RequiredServiceMissingException('IMPALA_STATESTORE',
                                                     required_by='IMPALA')
        if id_count < 1:
            raise ex.RequiredServiceMissingException('IMPALAD',
                                                     required_by='IMPALA')
        if dn_count < 1:
            raise ex.RequiredServiceMissingException('HDFS_DATANODE',
                                                     required_by='IMPALA')
        if hms_count < 1:
            raise ex.RequiredServiceMissingException('HIVE_METASTORE',
                                                     required_by='IMPALA')

    kms_count = _get_inst_count(cluster, 'KMS')
    if kms_count > 1:
        raise ex.InvalidComponentCountException('KMS', _('0 or 1'), kms_count)
Exemple #32
0
 def get_jns(self, cluster):
     return u.get_instances(cluster, 'HDFS_JOURNALNODE')
Exemple #33
0
 def start_cluster(self, cluster):
     instances = u.get_instances(cluster)
     cluster_context = self.get_context(cluster, added=instances)
     self._node_manager.start(cluster_context)
     self._configurer.post_start(cluster_context)
Exemple #34
0
 def configure_cluster(self, cluster):
     instances = utils.get_instances(cluster)
     self._setup_instances(cluster, instances)
Exemple #35
0
def setup_maprfs_on_cluster(cluster, path_to_disk_setup_script):
    mapr_node_list = utils.get_instances(cluster, 'FileServer')
    for instance in mapr_node_list:
        setup_maprfs_on_instance(instance, path_to_disk_setup_script)
 def get_instance(self, node_process):
     node_process_name = su.get_node_process_name(node_process)
     instances = u.get_instances(self.cluster, node_process_name)
     return instances[0] if instances else None
Exemple #37
0
 def get_hdfs_nodes(self, cluster, instances=None):
     instances = instances if instances else u.get_instances(cluster)
     return u.instances_with_services(
         instances,
         ["HDFS_DATANODE", "HDFS_NAMENODE", "HDFS_SECONDARYNAMENODE"])
Exemple #38
0
 def start_cluster(self, cluster):
     self.deploy_kerberos(cluster)
     with context.ThreadGroup() as tg:
         for instance in plugin_utils.get_instances(cluster):
             tg.spawn('fake-check-%s' % instance.id, self._check_ops,
                      instance)
Exemple #39
0
 def get_instances(self, node_process=None):
     name = _get_node_process_name(node_process)
     return u.get_instances(self.cluster, name)
Exemple #40
0
def start_zookeeper_nodes_on_cluster(cluster):
    zkeeper_node_list = utils.get_instances(cluster, names.ZOOKEEPER)
    for z_keeper_node in zkeeper_node_list:
        run_scripts.start_zookeeper(z_keeper_node.remote())
Exemple #41
0
 def configure_cluster(self, cluster):
     with context.ThreadGroup() as tg:
         for instance in plugin_utils.get_instances(cluster):
             tg.spawn('fake-write-%s' % instance.id, self._write_ops,
                      instance)
Exemple #42
0
def _is_cluster_configured(cluster, host_info):
    inst = u.get_instances(cluster)[0]
    cat_etc_hosts = 'cat /etc/hosts'
    with inst.remote() as r:
        exit_code, etc_hosts = r.execute_command(cat_etc_hosts)
        return all(host in etc_hosts for host in host_info)
Exemple #43
0
def exec_configure_sh_on_cluster(cluster):
    inst_list = utils.get_instances(cluster)
    for n in inst_list:
        exec_configure_sh_on_instance(cluster, n)
Exemple #44
0
    def _set_cluster_info(self, cluster):
        ambari_ip = plugin_utils.get_instance(
            cluster, p_common.AMBARI_SERVER).get_ip_or_dns_name()
        ambari_port = "8080"
        info = {
            p_common.AMBARI_SERVER: {
                "Web UI":
                "http://{host}:{port}".format(host=ambari_ip,
                                              port=ambari_port),
                "Username":
                "******",
                "Password":
                cluster.extra["ambari_password"]
            }
        }
        nns = plugin_utils.get_instances(cluster, p_common.NAMENODE)
        info[p_common.NAMENODE] = {}
        for idx, namenode in enumerate(nns):
            info[p_common.NAMENODE]["Web UI %s" % (idx + 1)] = (
                "http://%s:50070" % namenode.get_ip_or_dns_name())

        rms = plugin_utils.get_instances(cluster, p_common.RESOURCEMANAGER)
        info[p_common.RESOURCEMANAGER] = {}
        for idx, resourcemanager in enumerate(rms):
            info[p_common.RESOURCEMANAGER]["Web UI %s" % (idx + 1)] = (
                "http://%s:8088" % resourcemanager.get_ip_or_dns_name())

        historyserver = plugin_utils.get_instance(cluster,
                                                  p_common.HISTORYSERVER)
        if historyserver:
            info[p_common.HISTORYSERVER] = {
                "Web UI":
                "http://%s:19888" % historyserver.get_ip_or_dns_name()
            }
        atlserver = plugin_utils.get_instance(cluster,
                                              p_common.APP_TIMELINE_SERVER)
        if atlserver:
            info[p_common.APP_TIMELINE_SERVER] = {
                "Web UI": "http://%s:8188" % atlserver.get_ip_or_dns_name()
            }
        oozie = plugin_utils.get_instance(cluster, p_common.OOZIE_SERVER)
        if oozie:
            info[p_common.OOZIE_SERVER] = {
                "Web UI": "http://%s:11000/oozie" % oozie.get_ip_or_dns_name()
            }
        hbase_master = plugin_utils.get_instance(cluster,
                                                 p_common.HBASE_MASTER)
        if hbase_master:
            info[p_common.HBASE_MASTER] = {
                "Web UI":
                "http://%s:60010" % hbase_master.get_ip_or_dns_name()
            }
        falcon = plugin_utils.get_instance(cluster, p_common.FALCON_SERVER)
        if falcon:
            info[p_common.FALCON_SERVER] = {
                "Web UI": "http://%s:15000" % falcon.get_ip_or_dns_name()
            }
        storm_ui = plugin_utils.get_instance(cluster, p_common.STORM_UI_SERVER)
        if storm_ui:
            info[p_common.STORM_UI_SERVER] = {
                "Web UI": "http://%s:8744" % storm_ui.get_ip_or_dns_name()
            }
        ranger_admin = plugin_utils.get_instance(cluster,
                                                 p_common.RANGER_ADMIN)
        if ranger_admin:
            info[p_common.RANGER_ADMIN] = {
                "Web UI": "http://%s:6080" % ranger_admin.get_ip_or_dns_name(),
                "Username": "******",
                "Password": "******"
            }
        spark_hs = plugin_utils.get_instance(cluster,
                                             p_common.SPARK_JOBHISTORYSERVER)
        if spark_hs:
            info[p_common.SPARK_JOBHISTORYSERVER] = {
                "Web UI": "http://%s:18080" % spark_hs.get_ip_or_dns_name()
            }
        info.update(cluster.info.to_dict())
        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {"info": info})
        cluster = conductor.cluster_get(ctx, cluster.id)
Exemple #45
0
 def _push_namenode_configs(self, cluster, r):
     r.write_file_to(
         '/etc/hadoop/dn.incl',
         utils.generate_fqdn_host_names(
             utils.get_instances(cluster, "datanode")))
     r.write_file_to('/etc/hadoop/dn.excl', '')
Exemple #46
0
 def get_instance(self, node_process):
     name = _get_node_process_name(node_process)
     i = u.get_instances(self.cluster, name)
     return i[0] if i else None
 def get_instances(self, node_process=None):
     if node_process is not None:
         node_process = su.get_node_process_name(node_process)
     return u.get_instances(self.cluster, node_process)
Exemple #48
0
 def get_hive_servers(self, cluster):
     return u.get_instances(cluster, 'HIVE_SERVER2')
Exemple #49
0
 def get_datanodes(self, cluster):
     return u.get_instances(cluster, 'HDFS_DATANODE')
Exemple #50
0
 def get_flumes(self, cluster):
     return u.get_instances(cluster, 'FLUME_AGENT')
Exemple #51
0
 def get_nodemanagers(self, cluster):
     return u.get_instances(cluster, 'YARN_NODEMANAGER')
Exemple #52
0
 def get_kafka_brokers(self, cluster):
     return u.get_instances(cluster, 'KAFKA_BROKER')
Exemple #53
0
 def get_zookeepers(self, cluster):
     return u.get_instances(cluster, 'ZOOKEEPER_SERVER')
Exemple #54
0
 def get_impalads(self, cluster):
     return u.get_instances(cluster, 'IMPALAD')
Exemple #55
0
 def get_solrs(self, cluster):
     return u.get_instances(cluster, 'SOLR_SERVER')
Exemple #56
0
 def configure_cluster(self, cluster):
     instances = u.get_instances(cluster)
     cluster_context = self.get_context(cluster, added=instances)
     self._configurer.configure(cluster_context)
Exemple #57
0
 def get_hbase_indexers(self, cluster):
     return u.get_instances(cluster, 'KEY_VALUE_STORE_INDEXER')
Exemple #58
0
 def get_kms(self, cluster):
     return u.get_instances(cluster, 'KMS')
Exemple #59
0
def config_user_env(cluster):
    instances = utils.get_instances(cluster)
    user_env = files.get_file_text('plugins/sandbox/hadoop2/resources/user_env.template')
    for instance in instances:
        run.config_env(instance, user_env)
Exemple #60
0
def wait_for_mfs_unlock(cluster, path_to_waiting_script):
    mapr_node_list = utils.get_instances(cluster, names.FILE_SERVER)
    for instance in mapr_node_list:
        create_waiting_script_file(instance, path_to_waiting_script)
        exec_waiting_script_on_instance(instance)