def test_get_instances(self): res = pu.get_instances(self.cluster) self.assertEqual([ FakeInstance("1"), FakeInstance("2"), FakeInstance("3")], res) res = pu.get_instances(self.cluster, "node_process1") self.assertEqual([FakeInstance("1")], res)
def test_get_instances(self): self.assertEqual(5, len(u.get_instances(self.c1))) self.assertEqual([], u.get_instances(self.c1, "wrong-process")) self.assertEqual(self.ng1.instances, u.get_instances(self.c1, "nn")) instances = list(self.ng2.instances) instances += self.ng3.instances self.assertEqual(instances, u.get_instances(self.c1, "dn"))
def _configure_hdfs_site(cluster, blueprint): props = _find_hdfs_site(blueprint) props["dfs.client.failover.proxy.provider.hdfs-ha"] = ( "org.apache.hadoop.hdfs.server.namenode.ha." "ConfiguredFailoverProxyProvider" ) props["dfs.ha.automatic-failover.enabled"] = "true" props["dfs.ha.fencing.methods"] = "shell(/bin/true)" props["dfs.nameservices"] = "hdfs-ha" jns = utils.get_instances(cluster, p_common.JOURNAL_NODE) journalnodes_concat = ";".join(["%s:8485" % i.fqdn() for i in jns]) journalnodes_value = "qjournal://%s/hdfs-ha" % journalnodes_concat props["dfs.namenode.shared.edits.dir"] = journalnodes_value nns = utils.get_instances(cluster, p_common.NAMENODE) nn_id_concat = ",".join([i.instance_name for i in nns]) props["dfs.ha.namenodes.hdfs-ha"] = nn_id_concat props["dfs.namenode.http-address"] = "%s:50070" % nns[0].fqdn() props["dfs.namenode.https-address"] = "%s:50470" % nns[0].fqdn() for i in nns: props["dfs.namenode.http-address.hdfs-ha.%s" % i.instance_name] = "%s:50070" % i.fqdn() props["dfs.namenode.https-address.hdfs-ha.%s" % i.instance_name] = "%s:50470" % i.fqdn() props["dfs.namenode.rpc-address.hdfs-ha.%s" % i.instance_name] = "%s:8020" % i.fqdn() return blueprint
def test_get_instances(self): self.assertEqual(len(u.get_instances(self.c1)), 5) self.assertEqual(u.get_instances(self.c1, 'wrong-process'), []) self.assertEqual(u.get_instances(self.c1, 'nn'), self.ng1.instances) instances = list(self.ng2.instances) instances += self.ng3.instances self.assertEqual(u.get_instances(self.c1, 'dn'), instances)
def restart_nns_and_rms(cluster): nns = plugin_utils.get_instances(cluster, p_common.NAMENODE) for nn in nns: restart_namenode(cluster, nn) rms = plugin_utils.get_instances(cluster, p_common.RESOURCEMANAGER) for rm in rms: restart_resourcemanager(cluster, rm)
def deploy_kerberos(self, cluster): all_instances = plugin_utils.get_instances(cluster) namenodes = plugin_utils.get_instances(cluster, 'namenode') server = None if len(namenodes) > 0: server = namenodes[0] elif len(all_instances) > 0: server = all_instances[0] if server: krb.deploy_infrastructure(cluster, server)
def get_plain_instances(self): fs = self.get_fs_instances() zk = self.get_zk_instances() cldb = self.get_cldb_instances() zk_fs_cldb = zk + fs + cldb instances = u.get_instances(self.get_cluster()) return [i for i in instances if i not in zk_fs_cldb]
def start_cluster(self, cluster): nn_instance = utils.get_instance(cluster, "namenode") sm_instance = utils.get_instance(cluster, "master") dn_instances = utils.get_instances(cluster, "datanode") # Start the name node with remote.get_remote(nn_instance) as r: run.format_namenode(r) run.start_processes(r, "namenode") # start the data nodes self._start_slave_datanode_processes(dn_instances) LOG.info(_LI("Hadoop services in cluster %s have been started"), cluster.name) with remote.get_remote(nn_instance) as r: r.execute_command("sudo -u hdfs hdfs dfs -mkdir -p /user/$USER/") r.execute_command("sudo -u hdfs hdfs dfs -chown $USER " "/user/$USER/") # start spark nodes if sm_instance: with remote.get_remote(sm_instance) as r: run.start_spark_master(r, self._spark_home(cluster)) LOG.info(_LI("Spark service at '%s' has been started"), sm_instance.hostname()) LOG.info(_LI('Cluster %s has been started successfully'), cluster.name) self._set_cluster_info(cluster)
def start_cluster(cluster): if pu.get_oozie(cluster): _install_extjs(cluster) if pu.get_hive_metastore(cluster): _configure_hive(cluster) if pu.get_sentry(cluster): _configure_sentry(cluster) cu.first_run(cluster) if c_helper.is_swift_enabled(cluster): instances = gu.get_instances(cluster) _configure_swift(instances) if pu.get_hive_metastore(cluster): _put_hive_hdfs_xml(cluster) if pu.get_flumes(cluster): cm_cluster = cu.get_cloudera_cluster(cluster) flume = cm_cluster.get_service(cu.FLUME_SERVICE_NAME) cu.start_service(flume) cu.restart_mgmt_service(cluster)
def start_cluster(self, cluster): nn = vu.get_namenode(cluster) run.format_namenode(nn) run.start_hadoop_process(nn, 'namenode') for snn in vu.get_secondarynamenodes(cluster): run.start_hadoop_process(snn, 'secondarynamenode') rm = vu.get_resourcemanager(cluster) if rm: run.start_yarn_process(rm, 'resourcemanager') run.start_dn_nm_processes(utils.get_instances(cluster)) run.await_datanodes(cluster) hs = vu.get_historyserver(cluster) if hs: run.start_historyserver(hs) oo = vu.get_oozie(cluster) if oo: run.start_oozie_process(self.pctx, oo) hiveserver = vu.get_hiveserver(cluster) if hiveserver: run.start_hiveserver_process(self.pctx, hiveserver) self._set_cluster_info(cluster)
def _validate_existing_ng_scaling(self, cluster, existing): scalable_processes = self._get_scalable_processes() dn_to_delete = 0 for ng in cluster.node_groups: if ng.id in existing: if ng.count > existing[ng.id] and ("datanode" in ng.node_processes): dn_to_delete += ng.count - existing[ng.id] if not set(ng.node_processes).issubset(scalable_processes): raise ex.NodeGroupCannotBeScaled( ng.name, _("Spark plugin cannot scale nodegroup" " with processes: %s") % ' '.join(ng.node_processes)) dn_amount = len(utils.get_instances(cluster, "datanode")) rep_factor = utils.get_config_value_or_default('HDFS', "dfs.replication", cluster) if dn_to_delete > 0 and dn_amount - dn_to_delete < rep_factor: raise ex.ClusterCannotBeScaled( cluster.name, _("Spark plugin cannot shrink cluster because " "there would be not enough nodes for HDFS " "replicas (replication factor is %s)") % rep_factor)
def _extract_configs_to_extra(self, cluster): sp_master = utils.get_instance(cluster, "master") sp_slaves = utils.get_instances(cluster, "slave") extra = dict() config_master = config_slaves = '' if sp_master is not None: config_master = c_helper.generate_spark_env_configs(cluster) if sp_slaves is not None: slavenames = [] for slave in sp_slaves: slavenames.append(slave.hostname()) config_slaves = c_helper.generate_spark_slaves_configs(slavenames) else: config_slaves = "\n" # Any node that might be used to run spark-submit will need # these libs for swift integration config_defaults = c_helper.generate_spark_executor_classpath(cluster) extra['job_cleanup'] = c_helper.generate_job_cleanup_config(cluster) extra['sp_master'] = config_master extra['sp_slaves'] = config_slaves extra['sp_defaults'] = config_defaults if c_helper.is_data_locality_enabled(cluster): topology_data = th.generate_topology_map( cluster, CONF.enable_hypervisor_awareness) extra['topology_data'] = "\n".join( [k + " " + v for k, v in topology_data.items()]) + "\n" return extra
def setup_agents(cluster, instances=None): LOG.debug("Set up Ambari agents") manager_address = plugin_utils.get_instance( cluster, p_common.AMBARI_SERVER).fqdn() if not instances: instances = plugin_utils.get_instances(cluster) _setup_agents(instances, manager_address)
def _confgure_hbase_site(cluster, blueprint): props = _find_hbase_site(blueprint) props["hbase.regionserver.global.memstore.lowerLimit"] = "0.38" props["hbase.regionserver.global.memstore.upperLimit"] = "0.4" props["hbase.regionserver.handler.count"] = "60" props["hbase.regionserver.info.port"] = "60030" props["hbase.regionserver.storefile.refresh.period"] = "20" props["hbase.rootdir"] = "hdfs://hdfs-ha/apps/hbase/data" props["hbase.security.authentication"] = "simple" props["hbase.security.authorization"] = "false" props["hbase.superuser"] = "******" props["hbase.tmp.dir"] = "/hadoop/hbase" props["hbase.zookeeper.property.clientPort"] = "2181" zk_instances = utils.get_instances(cluster, p_common.ZOOKEEPER_SERVER) zk_quorum_value = ",".join([i.fqdn() for i in zk_instances]) props["hbase.zookeeper.quorum"] = zk_quorum_value props["hbase.zookeeper.useMulti"] = "true" props["hfile.block.cache.size"] = "0.40" props["zookeeper.session.timeout"] = "30000" props["zookeeper.znode.parent"] = "/hbase-unsecure" return blueprint
def _set_default_fs(cluster, blueprint, ha_type): if ha_type == p_common.NAMENODE_HA: _find_core_site(blueprint)["fs.defaultFS"] = "hdfs://hdfs-ha" elif ha_type == p_common.RESOURCEMANAGER_HA: nn_instance = utils.get_instances(cluster, p_common.NAMENODE)[0] _find_core_site(blueprint)["fs.defaultFS"] = "hdfs://%s:8020" % nn_instance.fqdn() return blueprint
def start_cluster(self, cluster): nn_instance = utils.get_instance(cluster, "namenode") dn_instances = utils.get_instances(cluster, "datanode") zep_instance = utils.get_instance(cluster, "zeppelin") # Start the name node self._start_namenode(nn_instance) # start the data nodes self._start_datanode_processes(dn_instances) LOG.info(_LI("Hadoop services have been started")) with remote.get_remote(nn_instance) as r: r.execute_command("sudo -u hdfs hdfs dfs -mkdir -p /user/$USER/") r.execute_command("sudo -u hdfs hdfs dfs -chown $USER " "/user/$USER/") # start spark nodes self.start_spark(cluster) # start zeppelin, if necessary if zep_instance: self._start_zeppelin(zep_instance) LOG.info(_LI('Cluster has been started successfully')) self._set_cluster_info(cluster)
def _extract_configs_to_extra(self, cluster): st_master = utils.get_instance(cluster, "nimbus") zk_servers = utils.get_instances(cluster, "zookeeper") extra = dict() config_instances = '' if st_master is not None: if zk_servers is not None: zknames = [] for zk in zk_servers: zknames.append(zk.hostname()) config_instances = c_helper.generate_storm_config( st_master.hostname(), zknames) config = self._convert_dict_to_yaml(config_instances) supervisor_conf = c_helper.generate_slave_supervisor_conf() nimbus_ui_conf = c_helper.generate_master_supervisor_conf() zk_conf = c_helper.generate_zookeeper_conf() for ng in cluster.node_groups: extra[ng.id] = { 'st_instances': config, 'slave_sv_conf': supervisor_conf, 'master_sv_conf': nimbus_ui_conf, 'zk_conf': zk_conf } return extra
def _set_primary_and_standby_namenode(cluster, blueprint): props = _find_hadoop_env(blueprint) nns = utils.get_instances(cluster, p_common.NAMENODE) props["dfs_ha_initial_namenode_active"] = nns[0].fqdn() props["dfs_ha_initial_namenode_standby"] = nns[1].fqdn() return blueprint
def start_cluster(self, cluster): self._set_cluster_info(cluster) deploy.start_cluster(cluster) cluster_instances = plugin_utils.get_instances(cluster) swift_helper.install_ssl_certs(cluster_instances) deploy.add_hadoop_swift_jar(cluster_instances) deploy.prepare_hive(cluster)
def _clear_exclude_files(cluster): for instance in u.get_instances(cluster): with instance.remote() as r: r.execute_command( 'sudo su - -c "echo > %s/dn-exclude" hadoop' % HADOOP_CONF_DIR) r.execute_command( 'sudo su - -c "echo > %s/nm-exclude" hadoop' % HADOOP_CONF_DIR)
def configure_cluster_for_hdfs(cluster, data_source_url): host = urlparse.urlparse(data_source_url).hostname etc_hosts_information = _get_cluster_hosts_information(host, cluster) if etc_hosts_information is None: # Ip address hasn't been resolved, the last chance is for VM itself return # If the cluster was already configured for this data source # there's no need to configure it again if _is_cluster_configured(cluster, etc_hosts_information.splitlines()): return etc_hosts_update = ('/tmp/etc-hosts-update' '.%s' % six.text_type(uuidutils.generate_uuid())) tmp_etc_hosts = ('/tmp/etc-hosts' '.%s' % six.text_type(uuidutils.generate_uuid())) update_etc_hosts_cmd = ( 'cat %(etc_hosts_update)s /etc/hosts | ' 'sort | uniq > %(tmp_etc_hosts)s && ' 'cat %(tmp_etc_hosts)s > /etc/hosts && ' 'rm -f %(tmp_etc_hosts)s %(etc_hosts_update)s' % {'etc_hosts_update': etc_hosts_update, 'tmp_etc_hosts': tmp_etc_hosts}) for inst in u.get_instances(cluster): with inst.remote() as r: r.write_file_to(etc_hosts_update, etc_hosts_information) r.execute_command(update_etc_hosts_cmd, run_as_root=True)
def _setup_instances(self, cluster, instances=None): extra = self._extract_configs_to_extra(cluster) if instances is None: instances = utils.get_instances(cluster) self._push_configs_to_nodes(cluster, extra, instances)
def start_cluster(self, cluster): sm_instance = utils.get_instance(cluster, "nimbus") sl_instances = utils.get_instances(cluster, "supervisor") zk_instances = utils.get_instances(cluster, "zookeeper") # start zookeeper processes self._start_zookeeper_processes(zk_instances) # start storm master if sm_instance: self._start_storm_master(sm_instance) # start storm slaves self._start_slave_processes(sl_instances) LOG.info(_LI("Cluster {cluster} has been started successfully").format(cluster=cluster.name)) self._set_cluster_info(cluster)
def _configure_yarn_site(cluster, blueprint): props = _find_yarn_site(blueprint) name = cluster.name rm_instances = utils.get_instances(cluster, p_common.RESOURCEMANAGER) props["hadoop.registry.rm.enabled"] = "false" zk_instances = utils.get_instances(cluster, p_common.ZOOKEEPER_SERVER) zks = ",".join(["%s:2181" % i.fqdn() for i in zk_instances]) props["yarn.resourcemanager.zk-address"] = zks hs = utils.get_instance(cluster, p_common.HISTORYSERVER) props["yarn.log.server.url"] = "%s:19888/jobhistory/logs/" % hs.fqdn() props["yarn.resourcemanager.address"] = "%s:8050" % rm_instances[0].fqdn() props["yarn.resourcemanager.admin.address"] = "%s:8141" % rm_instances[0].fqdn() props["yarn.resourcemanager.cluster-id"] = name props["yarn.resourcemanager.ha.automatic-failover.zk-base-path"] = "/yarn-leader-election" props["yarn.resourcemanager.ha.enabled"] = "true" rm_id_concat = ",".join([i.instance_name for i in rm_instances]) props["yarn.resourcemanager.ha.rm-ids"] = rm_id_concat for i in rm_instances: props["yarn.resourcemanager.hostname.%s" % i.instance_name] = i.fqdn() props["yarn.resourcemanager.webapp.address.%s" % i.instance_name] = "%s:8088" % i.fqdn() props["yarn.resourcemanager.webapp.https.address.%s" % i.instance_name] = "%s:8090" % i.fqdn() props["yarn.resourcemanager.hostname"] = rm_instances[0].fqdn() props["yarn.resourcemanager.recovery.enabled"] = "true" props["yarn.resourcemanager.resource-tracker.address"] = "%s:8025" % rm_instances[0].fqdn() props["yarn.resourcemanager.scheduler.address"] = "%s:8030" % rm_instances[0].fqdn() props["yarn.resourcemanager.store.class"] = ( "org.apache.hadoop.yarn.server.resourcemanager.recovery." "ZKRMStateStore" ) props["yarn.resourcemanager.webapp.address"] = "%s:8088" % rm_instances[0].fqdn() props["yarn.resourcemanager.webapp.https.address"] = "%s:8090" % rm_instances[0].fqdn() tls_instance = utils.get_instance(cluster, p_common.APP_TIMELINE_SERVER) props["yarn.timeline-service.address"] = "%s:10200" % tls_instance.fqdn() props["yarn.timeline-service.webapp.address"] = "%s:8188" % tls_instance.fqdn() props["yarn.timeline-service.webapp.https.address"] = "%s:8190" % tls_instance.fqdn() return blueprint
def setup_agents(cluster): LOG.debug("Set up Ambari agents") manager_address = plugin_utils.get_instance( cluster, p_common.AMBARI_SERVER).fqdn() with context.ThreadGroup() as tg: for inst in plugin_utils.get_instances(cluster): tg.spawn("hwx-agent-setup-%s" % inst.id, _setup_agent, inst, manager_address) LOG.debug("Ambari agents has been installed")
def _update_exclude_files(cluster, instances): datanodes = _get_instances_with_service(instances, "datanode") nodemanagers = _get_instances_with_service(instances, "nodemanager") dn_hosts = u.generate_fqdn_host_names(datanodes) nm_hosts = u.generate_fqdn_host_names(nodemanagers) for instance in u.get_instances(cluster): with instance.remote() as r: r.execute_command("sudo su - -c \"echo '%s' > %s/dn-exclude\" hadoop" % (dn_hosts, HADOOP_CONF_DIR)) r.execute_command("sudo su - -c \"echo '%s' > %s/nm-exclude\" hadoop" % (nm_hosts, HADOOP_CONF_DIR))
def disable_repos(cluster): if configs.use_base_repos_needed(cluster): LOG.debug("Using base repos") return instances = plugin_utils.get_instances(cluster) with context.ThreadGroup() as tg: for inst in instances: tg.spawn("disable-repos-%s" % inst.instance_name, _disable_repos_on_inst, inst)
def _get_cluster_hosts_information(host, cluster): for clust in conductor.cluster_get_all(context.ctx()): if clust.id == cluster.id: continue for i in u.get_instances(clust): if i.instance_name == host: return g.generate_etc_hosts(clust) return None
def _setup_instances(self, cluster, instances): if (CONF.use_identity_api_v3 and CONF.use_domain_for_proxy_users and vu.get_hiveserver(cluster) and c_helper.is_swift_enable(cluster)): cluster = proxy.create_proxy_user_for_cluster(cluster) instances = utils.get_instances(cluster) extra = self._extract_configs_to_extra(cluster) cluster = conductor.cluster_get(context.ctx(), cluster) self._push_configs_to_nodes(cluster, extra, instances)
def configure_swift(self, cluster, instances=None): if self.c_helper.is_swift_enabled(cluster): if not instances: instances = u.get_instances(cluster) cpo.add_provisioning_step(cluster.id, _("Configure Swift"), len(instances)) with context.ThreadGroup() as tg: for i in instances: tg.spawn("cdh-swift-conf-%s" % i.instance_name, self._configure_swift_to_inst, i) swift_helper.install_ssl_certs(instances)
def validate_cluster_creating(cluster): mng_count = _get_inst_count(cluster, 'CLOUDERA_MANAGER') if mng_count != 1: raise ex.InvalidComponentCountException('CLOUDERA_MANAGER', 1, mng_count) zk_count = _get_inst_count(cluster, 'ZOOKEEPER_SERVER') nn_count = _get_inst_count(cluster, 'HDFS_NAMENODE') if nn_count != 1: raise ex.InvalidComponentCountException('HDFS_NAMENODE', 1, nn_count) snn_count = _get_inst_count(cluster, 'HDFS_SECONDARYNAMENODE') if snn_count != 1: raise ex.InvalidComponentCountException('HDFS_SECONDARYNAMENODE', 1, snn_count) dn_count = _get_inst_count(cluster, 'HDFS_DATANODE') replicas = PU.get_config_value('HDFS', 'dfs_replication', cluster) if dn_count < replicas: raise ex.InvalidComponentCountException( 'HDFS_DATANODE', replicas, dn_count, _('Number of datanodes must be not less than dfs_replication.')) jn_count = _get_inst_count(cluster, 'HDFS_JOURNALNODE') require_anti_affinity = PU.c_helper.get_required_anti_affinity(cluster) if jn_count > 0: if jn_count < 3: raise ex.InvalidComponentCountException('HDFS_JOURNALNODE', _('not less than 3'), jn_count) if not jn_count % 2: raise ex.InvalidComponentCountException('HDFS_JOURNALNODE', _('be odd'), jn_count) if zk_count < 1: raise ex.RequiredServiceMissingException('ZOOKEEPER', required_by='HDFS HA') if require_anti_affinity: if 'HDFS_SECONDARYNAMENODE' not in _get_anti_affinity(cluster): raise ex.NameNodeHAConfigurationError( _('HDFS_SECONDARYNAMENODE should be enabled ' 'in anti_affinity.')) if 'HDFS_NAMENODE' not in _get_anti_affinity(cluster): raise ex.NameNodeHAConfigurationError( _('HDFS_NAMENODE should be enabled in anti_affinity.')) rm_count = _get_inst_count(cluster, 'YARN_RESOURCEMANAGER') if rm_count > 1: raise ex.InvalidComponentCountException('YARN_RESOURCEMANAGER', _('0 or 1'), rm_count) stdb_rm_count = _get_inst_count(cluster, 'YARN_STANDBYRM') if stdb_rm_count > 1: raise ex.InvalidComponentCountException('YARN_STANDBYRM', _('0 or 1'), stdb_rm_count) if stdb_rm_count > 0: if rm_count < 1: raise ex.RequiredServiceMissingException('YARN_RESOURCEMANAGER', required_by='RM HA') if zk_count < 1: raise ex.RequiredServiceMissingException('ZOOKEEPER', required_by='RM HA') if require_anti_affinity: if 'YARN_RESOURCEMANAGER' not in _get_anti_affinity(cluster): raise ex.ResourceManagerHAConfigurationError( _('YARN_RESOURCEMANAGER should be enabled in ' 'anti_affinity.')) if 'YARN_STANDBYRM' not in _get_anti_affinity(cluster): raise ex.ResourceManagerHAConfigurationError( _('YARN_STANDBYRM should be enabled in anti_affinity.')) hs_count = _get_inst_count(cluster, 'YARN_JOBHISTORY') if hs_count > 1: raise ex.InvalidComponentCountException('YARN_JOBHISTORY', _('0 or 1'), hs_count) if rm_count > 0 and hs_count < 1: raise ex.RequiredServiceMissingException( 'YARN_JOBHISTORY', required_by='YARN_RESOURCEMANAGER') nm_count = _get_inst_count(cluster, 'YARN_NODEMANAGER') if rm_count == 0: if nm_count > 0: raise ex.RequiredServiceMissingException( 'YARN_RESOURCEMANAGER', required_by='YARN_NODEMANAGER') oo_count = _get_inst_count(cluster, 'OOZIE_SERVER') if oo_count > 1: raise ex.InvalidComponentCountException('OOZIE_SERVER', _('0 or 1'), oo_count) if oo_count == 1: if dn_count < 1: raise ex.RequiredServiceMissingException( 'HDFS_DATANODE', required_by='OOZIE_SERVER') if nm_count < 1: raise ex.RequiredServiceMissingException( 'YARN_NODEMANAGER', required_by='OOZIE_SERVER') if hs_count != 1: raise ex.RequiredServiceMissingException( 'YARN_JOBHISTORY', required_by='OOZIE_SERVER') hms_count = _get_inst_count(cluster, 'HIVE_METASTORE') hvs_count = _get_inst_count(cluster, 'HIVE_SERVER2') whc_count = _get_inst_count(cluster, 'HIVE_WEBHCAT') if hms_count and rm_count < 1: raise ex.RequiredServiceMissingException('YARN_RESOURCEMANAGER', required_by='HIVE_METASTORE') if hms_count and not hvs_count: raise ex.RequiredServiceMissingException('HIVE_SERVER2', required_by='HIVE_METASTORE') if hvs_count and not hms_count: raise ex.RequiredServiceMissingException('HIVE_METASTORE', required_by='HIVE_SERVER2') if whc_count and not hms_count: raise ex.RequiredServiceMissingException('HIVE_METASTORE', required_by='HIVE_WEBHCAT') hue_count = _get_inst_count(cluster, 'HUE_SERVER') if hue_count > 1: raise ex.InvalidComponentCountException('HUE_SERVER', _('0 or 1'), hue_count) shs_count = _get_inst_count(cluster, 'SPARK_YARN_HISTORY_SERVER') if shs_count > 1: raise ex.InvalidComponentCountException('SPARK_YARN_HISTORY_SERVER', _('0 or 1'), shs_count) if shs_count and not rm_count: raise ex.RequiredServiceMissingException( 'YARN_RESOURCEMANAGER', required_by='SPARK_YARN_HISTORY_SERVER') if oo_count < 1 and hue_count: raise ex.RequiredServiceMissingException('OOZIE_SERVER', required_by='HUE_SERVER') if hms_count < 1 and hue_count: raise ex.RequiredServiceMissingException('HIVE_METASTORE', required_by='HUE_SERVER') hbm_count = _get_inst_count(cluster, 'HBASE_MASTER') hbr_count = _get_inst_count(cluster, 'HBASE_REGIONSERVER') if hbm_count >= 1: if zk_count < 1: raise ex.RequiredServiceMissingException('ZOOKEEPER', required_by='HBASE') if hbr_count < 1: raise ex.InvalidComponentCountException('HBASE_REGIONSERVER', _('at least 1'), hbr_count) elif hbr_count >= 1: raise ex.InvalidComponentCountException('HBASE_MASTER', _('at least 1'), hbm_count) a_count = _get_inst_count(cluster, 'FLUME_AGENT') if a_count >= 1: if dn_count < 1: raise ex.RequiredServiceMissingException('HDFS_DATANODE', required_by='FLUME_AGENT') snt_count = _get_inst_count(cluster, 'SENTRY_SERVER') if snt_count > 1: raise ex.InvalidComponentCountException('SENTRY_SERVER', _('0 or 1'), snt_count) if snt_count == 1: if dn_count < 1: raise ex.RequiredServiceMissingException( 'HDFS_DATANODE', required_by='SENTRY_SERVER') if zk_count < 1: raise ex.RequiredServiceMissingException( 'ZOOKEEPER', required_by='SENTRY_SERVER') slr_count = _get_inst_count(cluster, 'SOLR_SERVER') if slr_count >= 1: if dn_count < 1: raise ex.RequiredServiceMissingException('HDFS_DATANODE', required_by='SOLR_SERVER') if zk_count < 1: raise ex.RequiredServiceMissingException('ZOOKEEPER', required_by='SOLR_SERVER') s2s_count = _get_inst_count(cluster, 'SQOOP_SERVER') if s2s_count > 1: raise ex.InvalidComponentCountException('SQOOP_SERVER', _('0 or 1'), s2s_count) if s2s_count == 1: if dn_count < 1: raise ex.RequiredServiceMissingException( 'HDFS_DATANODE', required_by='SQOOP_SERVER') if nm_count < 1: raise ex.RequiredServiceMissingException( 'YARN_NODEMANAGER', required_by='SQOOP_SERVER') if hs_count != 1: raise ex.RequiredServiceMissingException( 'YARN_JOBHISTORY', required_by='SQOOP_SERVER') lhbi_count = _get_inst_count(cluster, 'HBASE_INDEXER') if lhbi_count >= 1: if dn_count < 1: raise ex.RequiredServiceMissingException( 'HDFS_DATANODE', required_by='HBASE_INDEXER') if zk_count < 1: raise ex.RequiredServiceMissingException( 'ZOOKEEPER', required_by='HBASE_INDEXER') if slr_count < 1: raise ex.RequiredServiceMissingException( 'SOLR_SERVER', required_by='HBASE_INDEXER') if hbm_count < 1: raise ex.RequiredServiceMissingException( 'HBASE_MASTER', required_by='HBASE_INDEXER') ics_count = _get_inst_count(cluster, 'IMPALA_CATALOGSERVER') iss_count = _get_inst_count(cluster, 'IMPALA_STATESTORE') id_count = _get_inst_count(cluster, 'IMPALAD') if ics_count > 1: raise ex.InvalidComponentCountException('IMPALA_CATALOGSERVER', _('0 or 1'), ics_count) if iss_count > 1: raise ex.InvalidComponentCountException('IMPALA_STATESTORE', _('0 or 1'), iss_count) if ics_count == 1: datanodes = set(u.get_instances(cluster, "HDFS_DATANODE")) impalads = set(u.get_instances(cluster, "IMPALAD")) if len(datanodes ^ impalads) > 0: raise ex.InvalidClusterTopology( _("IMPALAD must be installed on every HDFS_DATANODE")) if iss_count != 1: raise ex.RequiredServiceMissingException('IMPALA_STATESTORE', required_by='IMPALA') if id_count < 1: raise ex.RequiredServiceMissingException('IMPALAD', required_by='IMPALA') if dn_count < 1: raise ex.RequiredServiceMissingException('HDFS_DATANODE', required_by='IMPALA') if hms_count < 1: raise ex.RequiredServiceMissingException('HIVE_METASTORE', required_by='IMPALA') kms_count = _get_inst_count(cluster, 'KMS') if kms_count > 1: raise ex.InvalidComponentCountException('KMS', _('0 or 1'), kms_count)
def get_jns(self, cluster): return u.get_instances(cluster, 'HDFS_JOURNALNODE')
def start_cluster(self, cluster): instances = u.get_instances(cluster) cluster_context = self.get_context(cluster, added=instances) self._node_manager.start(cluster_context) self._configurer.post_start(cluster_context)
def configure_cluster(self, cluster): instances = utils.get_instances(cluster) self._setup_instances(cluster, instances)
def setup_maprfs_on_cluster(cluster, path_to_disk_setup_script): mapr_node_list = utils.get_instances(cluster, 'FileServer') for instance in mapr_node_list: setup_maprfs_on_instance(instance, path_to_disk_setup_script)
def get_instance(self, node_process): node_process_name = su.get_node_process_name(node_process) instances = u.get_instances(self.cluster, node_process_name) return instances[0] if instances else None
def get_hdfs_nodes(self, cluster, instances=None): instances = instances if instances else u.get_instances(cluster) return u.instances_with_services( instances, ["HDFS_DATANODE", "HDFS_NAMENODE", "HDFS_SECONDARYNAMENODE"])
def start_cluster(self, cluster): self.deploy_kerberos(cluster) with context.ThreadGroup() as tg: for instance in plugin_utils.get_instances(cluster): tg.spawn('fake-check-%s' % instance.id, self._check_ops, instance)
def get_instances(self, node_process=None): name = _get_node_process_name(node_process) return u.get_instances(self.cluster, name)
def start_zookeeper_nodes_on_cluster(cluster): zkeeper_node_list = utils.get_instances(cluster, names.ZOOKEEPER) for z_keeper_node in zkeeper_node_list: run_scripts.start_zookeeper(z_keeper_node.remote())
def configure_cluster(self, cluster): with context.ThreadGroup() as tg: for instance in plugin_utils.get_instances(cluster): tg.spawn('fake-write-%s' % instance.id, self._write_ops, instance)
def _is_cluster_configured(cluster, host_info): inst = u.get_instances(cluster)[0] cat_etc_hosts = 'cat /etc/hosts' with inst.remote() as r: exit_code, etc_hosts = r.execute_command(cat_etc_hosts) return all(host in etc_hosts for host in host_info)
def exec_configure_sh_on_cluster(cluster): inst_list = utils.get_instances(cluster) for n in inst_list: exec_configure_sh_on_instance(cluster, n)
def _set_cluster_info(self, cluster): ambari_ip = plugin_utils.get_instance( cluster, p_common.AMBARI_SERVER).get_ip_or_dns_name() ambari_port = "8080" info = { p_common.AMBARI_SERVER: { "Web UI": "http://{host}:{port}".format(host=ambari_ip, port=ambari_port), "Username": "******", "Password": cluster.extra["ambari_password"] } } nns = plugin_utils.get_instances(cluster, p_common.NAMENODE) info[p_common.NAMENODE] = {} for idx, namenode in enumerate(nns): info[p_common.NAMENODE]["Web UI %s" % (idx + 1)] = ( "http://%s:50070" % namenode.get_ip_or_dns_name()) rms = plugin_utils.get_instances(cluster, p_common.RESOURCEMANAGER) info[p_common.RESOURCEMANAGER] = {} for idx, resourcemanager in enumerate(rms): info[p_common.RESOURCEMANAGER]["Web UI %s" % (idx + 1)] = ( "http://%s:8088" % resourcemanager.get_ip_or_dns_name()) historyserver = plugin_utils.get_instance(cluster, p_common.HISTORYSERVER) if historyserver: info[p_common.HISTORYSERVER] = { "Web UI": "http://%s:19888" % historyserver.get_ip_or_dns_name() } atlserver = plugin_utils.get_instance(cluster, p_common.APP_TIMELINE_SERVER) if atlserver: info[p_common.APP_TIMELINE_SERVER] = { "Web UI": "http://%s:8188" % atlserver.get_ip_or_dns_name() } oozie = plugin_utils.get_instance(cluster, p_common.OOZIE_SERVER) if oozie: info[p_common.OOZIE_SERVER] = { "Web UI": "http://%s:11000/oozie" % oozie.get_ip_or_dns_name() } hbase_master = plugin_utils.get_instance(cluster, p_common.HBASE_MASTER) if hbase_master: info[p_common.HBASE_MASTER] = { "Web UI": "http://%s:60010" % hbase_master.get_ip_or_dns_name() } falcon = plugin_utils.get_instance(cluster, p_common.FALCON_SERVER) if falcon: info[p_common.FALCON_SERVER] = { "Web UI": "http://%s:15000" % falcon.get_ip_or_dns_name() } storm_ui = plugin_utils.get_instance(cluster, p_common.STORM_UI_SERVER) if storm_ui: info[p_common.STORM_UI_SERVER] = { "Web UI": "http://%s:8744" % storm_ui.get_ip_or_dns_name() } ranger_admin = plugin_utils.get_instance(cluster, p_common.RANGER_ADMIN) if ranger_admin: info[p_common.RANGER_ADMIN] = { "Web UI": "http://%s:6080" % ranger_admin.get_ip_or_dns_name(), "Username": "******", "Password": "******" } spark_hs = plugin_utils.get_instance(cluster, p_common.SPARK_JOBHISTORYSERVER) if spark_hs: info[p_common.SPARK_JOBHISTORYSERVER] = { "Web UI": "http://%s:18080" % spark_hs.get_ip_or_dns_name() } info.update(cluster.info.to_dict()) ctx = context.ctx() conductor.cluster_update(ctx, cluster, {"info": info}) cluster = conductor.cluster_get(ctx, cluster.id)
def _push_namenode_configs(self, cluster, r): r.write_file_to( '/etc/hadoop/dn.incl', utils.generate_fqdn_host_names( utils.get_instances(cluster, "datanode"))) r.write_file_to('/etc/hadoop/dn.excl', '')
def get_instance(self, node_process): name = _get_node_process_name(node_process) i = u.get_instances(self.cluster, name) return i[0] if i else None
def get_instances(self, node_process=None): if node_process is not None: node_process = su.get_node_process_name(node_process) return u.get_instances(self.cluster, node_process)
def get_hive_servers(self, cluster): return u.get_instances(cluster, 'HIVE_SERVER2')
def get_datanodes(self, cluster): return u.get_instances(cluster, 'HDFS_DATANODE')
def get_flumes(self, cluster): return u.get_instances(cluster, 'FLUME_AGENT')
def get_nodemanagers(self, cluster): return u.get_instances(cluster, 'YARN_NODEMANAGER')
def get_kafka_brokers(self, cluster): return u.get_instances(cluster, 'KAFKA_BROKER')
def get_zookeepers(self, cluster): return u.get_instances(cluster, 'ZOOKEEPER_SERVER')
def get_impalads(self, cluster): return u.get_instances(cluster, 'IMPALAD')
def get_solrs(self, cluster): return u.get_instances(cluster, 'SOLR_SERVER')
def configure_cluster(self, cluster): instances = u.get_instances(cluster) cluster_context = self.get_context(cluster, added=instances) self._configurer.configure(cluster_context)
def get_hbase_indexers(self, cluster): return u.get_instances(cluster, 'KEY_VALUE_STORE_INDEXER')
def get_kms(self, cluster): return u.get_instances(cluster, 'KMS')
def config_user_env(cluster): instances = utils.get_instances(cluster) user_env = files.get_file_text('plugins/sandbox/hadoop2/resources/user_env.template') for instance in instances: run.config_env(instance, user_env)
def wait_for_mfs_unlock(cluster, path_to_waiting_script): mapr_node_list = utils.get_instances(cluster, names.FILE_SERVER) for instance in mapr_node_list: create_waiting_script_file(instance, path_to_waiting_script) exec_waiting_script_on_instance(instance)