def start_cluster(self, cluster): nn_instance = utils.get_instance(cluster, "namenode") dn_instances = utils.get_instances(cluster, "datanode") zep_instance = utils.get_instance(cluster, "zeppelin") # Start the name node self._start_namenode(nn_instance) # start the data nodes self._start_datanode_processes(dn_instances) LOG.info(_LI("Hadoop services have been started")) with remote.get_remote(nn_instance) as r: r.execute_command("sudo -u hdfs hdfs dfs -mkdir -p /user/$USER/") r.execute_command("sudo -u hdfs hdfs dfs -chown $USER " "/user/$USER/") # start spark nodes self.start_spark(cluster) # start zeppelin, if necessary if zep_instance: self._start_zeppelin(zep_instance) LOG.info(_LI('Cluster has been started successfully')) self._set_cluster_info(cluster)
def _prepare_ranger(cluster): ranger = plugin_utils.get_instance(cluster, p_common.RANGER_ADMIN) if not ranger: return ambari = plugin_utils.get_instance(cluster, p_common.AMBARI_SERVER) with ambari.remote() as r: sudo = functools.partial(r.execute_command, run_as_root=True) sudo("yum install -y mysql-connector-java") sudo("ambari-server setup --jdbc-db=mysql " "--jdbc-driver=/usr/share/java/mysql-connector-java.jar") init_db_template = ( "create user 'root'@'%' identified by '{password}';\n" "set password for 'root'@'localhost' = password('{password}');") password = uuidutils.generate_uuid() extra = cluster.extra.to_dict() if cluster.extra else {} extra["ranger_db_password"] = password ctx = context.ctx() conductor.cluster_update(ctx, cluster, {"extra": extra}) with ranger.remote() as r: sudo = functools.partial(r.execute_command, run_as_root=True) # TODO(sreshetnyak): add ubuntu support sudo("yum install -y mysql-server") sudo("service mysqld start") r.write_file_to("/tmp/init.sql", init_db_template.format(password=password)) sudo("mysql < /tmp/init.sql") sudo("rm /tmp/init.sql")
def start_cluster(self, cluster): nn_instance = utils.get_instance(cluster, "namenode") sm_instance = utils.get_instance(cluster, "master") dn_instances = utils.get_instances(cluster, "datanode") # Start the name node with remote.get_remote(nn_instance) as r: run.format_namenode(r) run.start_processes(r, "namenode") # start the data nodes self._start_slave_datanode_processes(dn_instances) LOG.info(_LI("Hadoop services in cluster %s have been started"), cluster.name) with remote.get_remote(nn_instance) as r: r.execute_command("sudo -u hdfs hdfs dfs -mkdir -p /user/$USER/") r.execute_command("sudo -u hdfs hdfs dfs -chown $USER " "/user/$USER/") # start spark nodes if sm_instance: with remote.get_remote(sm_instance) as r: run.start_spark_master(r, self._spark_home(cluster)) LOG.info(_LI("Spark service at '%s' has been started"), sm_instance.hostname()) LOG.info(_LI('Cluster %s has been started successfully'), cluster.name) self._set_cluster_info(cluster)
def test_get_instance(self): self.assertRaises(ex.InvalidComponentCountException, pu.get_instance, self.cluster, None) res = pu.get_instance(self.cluster, "node_process") self.assertIsNone(res) res = pu.get_instance(self.cluster, "node_process1") self.assertEqual(FakeInstance("1"), res)
def _extract_configs_to_extra(self, cluster): nn = utils.get_instance(cluster, "namenode") sp_master = utils.get_instance(cluster, "master") sp_slaves = utils.get_instances(cluster, "slave") extra = dict() config_master = config_slaves = '' if sp_master is not None: config_master = c_helper.generate_spark_env_configs(cluster) if sp_slaves is not None: slavenames = [] for slave in sp_slaves: slavenames.append(slave.hostname()) config_slaves = c_helper.generate_spark_slaves_configs(slavenames) else: config_slaves = "\n" # Any node that might be used to run spark-submit will need # these libs for swift integration config_defaults = c_helper.generate_spark_executor_classpath(cluster) extra['job_cleanup'] = c_helper.generate_job_cleanup_config(cluster) for ng in cluster.node_groups: extra[ng.id] = { 'xml': c_helper.generate_xml_configs( ng.configuration(), ng.storage_paths(), nn.hostname(), None ), 'setup_script': c_helper.generate_hadoop_setup_script( ng.storage_paths(), c_helper.extract_hadoop_environment_confs( ng.configuration()) ), 'sp_master': config_master, 'sp_slaves': config_slaves, 'sp_defaults': config_defaults } if "zeppelin" in ng.node_processes: extra[ng.id].update({ "zeppelin_setup_script": c_helper.generate_zeppelin_setup_script(sp_master)}) if c_helper.is_data_locality_enabled(cluster): topology_data = th.generate_topology_map( cluster, CONF.enable_hypervisor_awareness) extra['topology_data'] = "\n".join( [k + " " + v for k, v in topology_data.items()]) + "\n" return extra
def scale_cluster(self, cluster, instances): master = utils.get_instance(cluster, "master") r_master = remote.get_remote(master) run.stop_spark(r_master, self._spark_home(cluster)) self._setup_instances(cluster, instances) nn = utils.get_instance(cluster, "namenode") run.refresh_nodes(remote.get_remote(nn), "dfsadmin") self._start_slave_datanode_processes(instances) run.start_spark_master(r_master, self._spark_home(cluster)) LOG.info(_LI("Spark master service at '%s' has been restarted"), master.hostname())
def scale_cluster(self, cluster, instances): master = utils.get_instance(cluster, "master") r_master = remote.get_remote(master) run.stop_spark(r_master, self._spark_home(cluster)) self._setup_instances(cluster, instances) nn = utils.get_instance(cluster, "namenode") run.refresh_nodes(remote.get_remote(nn), "dfsadmin") dn_instances = [instance for instance in instances if "datanode" in instance.node_group.node_processes] self._start_datanode_processes(dn_instances) run.start_spark_master(r_master, self._spark_home(cluster)) LOG.info(_LI("Spark master service has been restarted"))
def create_blueprint(cluster): _prepare_ranger(cluster) cluster = conductor.cluster_get(context.ctx(), cluster.id) host_groups = [] for ng in cluster.node_groups: procs = p_common.get_ambari_proc_list(ng) procs.extend(p_common.get_clients(cluster)) for instance in ng.instances: hg = { "name": instance.instance_name, "configurations": configs.get_instance_params(instance), "components": [] } for proc in procs: hg["components"].append({"name": proc}) host_groups.append(hg) bp = { "Blueprints": { "stack_name": "HDP", "stack_version": cluster.hadoop_version }, "host_groups": host_groups, "configurations": configs.get_cluster_params(cluster) } ambari = plugin_utils.get_instance(cluster, p_common.AMBARI_SERVER) password = cluster.extra["ambari_password"] with ambari_client.AmbariClient(ambari, password=password) as client: client.create_blueprint(cluster.name, bp)
def start_cluster(cluster): cl_tmpl = { "blueprint": cluster.name, "default_password": uuidutils.generate_uuid(), "host_groups": [] } for ng in cluster.node_groups: for instance in ng.instances: cl_tmpl["host_groups"].append({ "name": instance.instance_name, "hosts": [{"fqdn": instance.fqdn()}] }) ambari = plugin_utils.get_instance(cluster, p_common.AMBARI_SERVER) password = cluster.extra["ambari_password"] with ambari_client.AmbariClient(ambari, password=password) as client: req_id = client.create_cluster(cluster.name, cl_tmpl)["id"] while True: status = client.check_request_status(cluster.name, req_id) LOG.debug("Task %s in %s state. Completed %.1f%%" % ( status["request_context"], status["request_status"], status["progress_percent"])) if status["request_status"] == "COMPLETED": return if status["request_status"] in ["IN_PROGRESS", "PENDING"]: context.sleep(5) else: raise p_exc.HadoopProvisionError( _("Ambari request in %s state") % status["request_status"])
def _extract_configs_to_extra(self, cluster): st_master = utils.get_instance(cluster, "nimbus") zk_servers = utils.get_instances(cluster, "zookeeper") extra = dict() config_instances = '' if st_master is not None: if zk_servers is not None: zknames = [] for zk in zk_servers: zknames.append(zk.hostname()) config_instances = c_helper.generate_storm_config( st_master.hostname(), zknames) config = self._convert_dict_to_yaml(config_instances) supervisor_conf = c_helper.generate_slave_supervisor_conf() nimbus_ui_conf = c_helper.generate_master_supervisor_conf() zk_conf = c_helper.generate_zookeeper_conf() for ng in cluster.node_groups: extra[ng.id] = { 'st_instances': config, 'slave_sv_conf': supervisor_conf, 'master_sv_conf': nimbus_ui_conf, 'zk_conf': zk_conf } return extra
def __init__(self, cluster): super(EdpCdhSparkEngine, self).__init__(cluster) self.master = plugin_utils.get_instance(cluster, "CLOUDERA_MANAGER") self.plugin_params["spark-user"] = "******" self.plugin_params["spark-submit"] = "spark-submit" self.plugin_params["deploy-mode"] = "cluster" self.plugin_params["master"] = "yarn-cluster"
def _get_job_status_from_remote(self, job_execution, retries=3): topology_name, inst_id = self._get_instance_if_running( job_execution) if topology_name is None or inst_id is None: return edp.JOB_STATUSES_TERMINATED topology_name = self._get_topology_name(job_execution) master = plugin_utils.get_instance(self.cluster, "nimbus") cmd = ( "%(storm)s -c nimbus.host=%(host)s " "list | grep %(topology_name)s | awk '{print $2}'") % ( { "storm": "/usr/local/storm/bin/storm", "host": master.hostname(), "topology_name": topology_name }) for i in range(retries): with remote.get_remote(master) as r: ret, stdout = r.execute_command("%s " % (cmd)) # If the status is ACTIVE is there, it's still running if stdout.strip() == "ACTIVE": return {"status": edp.JOB_STATUS_RUNNING} else: if i == retries - 1: return {"status": edp.JOB_STATUS_KILLED} context.sleep(10)
def manage_host_components(cluster, instances): ambari = plugin_utils.get_instance(cluster, p_common.AMBARI_SERVER) password = cluster.extra["ambari_password"] requests_ids = [] with ambari_client.AmbariClient(ambari, password=password) as client: clients = p_common.get_clients(cluster) for instance in instances: services = p_common.get_ambari_proc_list(instance.node_group) services.extend(clients) for service in services: client.add_service_to_host(instance, service) requests_ids.append( client.start_service_on_host( instance, service, 'INSTALLED')) client.wait_ambari_requests(requests_ids, cluster.name) # all services added and installed, let's start them requests_ids = [] for instance in instances: services = p_common.get_ambari_proc_list(instance.node_group) services.extend(p_common.ALL_LIST) for service in services: requests_ids.append( client.start_service_on_host( instance, service, 'STARTED')) client.wait_ambari_requests(requests_ids, cluster.name)
def _extract_configs_to_extra(self, cluster): sp_master = utils.get_instance(cluster, "master") sp_slaves = utils.get_instances(cluster, "slave") extra = dict() config_master = config_slaves = '' if sp_master is not None: config_master = c_helper.generate_spark_env_configs(cluster) if sp_slaves is not None: slavenames = [] for slave in sp_slaves: slavenames.append(slave.hostname()) config_slaves = c_helper.generate_spark_slaves_configs(slavenames) else: config_slaves = "\n" # Any node that might be used to run spark-submit will need # these libs for swift integration config_defaults = c_helper.generate_spark_executor_classpath(cluster) extra['job_cleanup'] = c_helper.generate_job_cleanup_config(cluster) extra['sp_master'] = config_master extra['sp_slaves'] = config_slaves extra['sp_defaults'] = config_defaults if c_helper.is_data_locality_enabled(cluster): topology_data = th.generate_topology_map( cluster, CONF.enable_hypervisor_awareness) extra['topology_data'] = "\n".join( [k + " " + v for k, v in topology_data.items()]) + "\n" return extra
def setup_agents(cluster, instances=None): LOG.debug("Set up Ambari agents") manager_address = plugin_utils.get_instance( cluster, p_common.AMBARI_SERVER).fqdn() if not instances: instances = plugin_utils.get_instances(cluster) _setup_agents(instances, manager_address)
def get_alerts_data(self, service=None): if self._data is not None: # return cached data return self._data.get(service, []) if service else self._data self._data = {} self._cluster_services = [] try: ambari = plugin_utils.get_instance( self.cluster, p_common.AMBARI_SERVER) password = self.cluster.extra.get("ambari_password") with client.AmbariClient(ambari, password=password) as ambari: resp = ambari.get_alerts_data(self.cluster) for alert in resp: alert = alert.get('Alert', {}) service = alert.get('service_name').lower() if service not in self._data: self._data[service] = [] self._cluster_services.append(service) self._data[service].append(alert) except Exception as e: prefix = _("Can't get response from Ambari Monitor") msg = _("%(problem)s: %(description)s") % { 'problem': prefix, 'description': six.text_type(e)} # don't put in exception to logs, it will be done by log.exception LOG.exception(prefix) self._exception_store = msg
def generate_spark_env_configs(cluster): configs = [] # master configuration sp_master = utils.get_instance(cluster, "master") configs.append('SPARK_MASTER_IP=' + sp_master.hostname()) # point to the hadoop conf dir so that Spark can read things # like the swift configuration without having to copy core-site # to /opt/spark/conf configs.append('HADOOP_CONF_DIR=' + HADOOP_CONF_DIR) masterport = utils.get_config_value_or_default("Spark", "Master port", cluster) if masterport and masterport != _get_spark_opt_default("Master port"): configs.append('SPARK_MASTER_PORT=' + str(masterport)) masterwebport = utils.get_config_value_or_default("Spark", "Master webui port", cluster) if (masterwebport and masterwebport != _get_spark_opt_default("Master webui port")): configs.append('SPARK_MASTER_WEBUI_PORT=' + str(masterwebport)) # configuration for workers workercores = utils.get_config_value_or_default("Spark", "Worker cores", cluster) if workercores and workercores != _get_spark_opt_default("Worker cores"): configs.append('SPARK_WORKER_CORES=' + str(workercores)) workermemory = utils.get_config_value_or_default("Spark", "Worker memory", cluster) if (workermemory and workermemory != _get_spark_opt_default("Worker memory")): configs.append('SPARK_WORKER_MEMORY=' + str(workermemory)) workerport = utils.get_config_value_or_default("Spark", "Worker port", cluster) if workerport and workerport != _get_spark_opt_default("Worker port"): configs.append('SPARK_WORKER_PORT=' + str(workerport)) workerwebport = utils.get_config_value_or_default("Spark", "Worker webui port", cluster) if (workerwebport and workerwebport != _get_spark_opt_default("Worker webui port")): configs.append('SPARK_WORKER_WEBUI_PORT=' + str(workerwebport)) workerinstances = utils.get_config_value_or_default("Spark", "Worker instances", cluster) if (workerinstances and workerinstances != _get_spark_opt_default("Worker instances")): configs.append('SPARK_WORKER_INSTANCES=' + str(workerinstances)) return '\n'.join(configs)
def start_cluster(cluster): ambari_template = _build_ambari_cluster_template(cluster) ambari = plugin_utils.get_instance(cluster, p_common.AMBARI_SERVER) password = cluster.extra["ambari_password"] with ambari_client.AmbariClient(ambari, password=password) as client: req_id = client.create_cluster(cluster.name, ambari_template)["id"] client.wait_ambari_request(req_id, cluster.name)
def _set_cluster_info(self, cluster): nn = utils.get_instance(cluster, "namenode") sp_master = utils.get_instance(cluster, "master") info = {} if nn: address = utils.get_config_value_or_default("HDFS", "dfs.http.address", cluster) port = address[address.rfind(":") + 1 :] info["HDFS"] = {"Web UI": "http://%s:%s" % (nn.management_ip, port)} info["HDFS"]["NameNode"] = "hdfs://%s:8020" % nn.hostname() if sp_master: port = utils.get_config_value_or_default("Spark", "Master webui port", cluster) if port is not None: info["Spark"] = {"Web UI": "http://%s:%s" % (sp_master.management_ip, port)} ctx = context.ctx() conductor.cluster_update(ctx, cluster, {"info": info})
def manage_config_groups(cluster, instances): groups = [] ambari = plugin_utils.get_instance(cluster, p_common.AMBARI_SERVER) password = cluster.extra["ambari_password"] for instance in instances: groups.extend(configs.get_config_group(instance)) with ambari_client.AmbariClient(ambari, password=password) as client: client.create_config_group(cluster, groups)
def _configure_yarn_site(cluster, blueprint): props = _find_yarn_site(blueprint) name = cluster.name rm_instances = utils.get_instances(cluster, p_common.RESOURCEMANAGER) props["hadoop.registry.rm.enabled"] = "false" zk_instances = utils.get_instances(cluster, p_common.ZOOKEEPER_SERVER) zks = ",".join(["%s:2181" % i.fqdn() for i in zk_instances]) props["yarn.resourcemanager.zk-address"] = zks hs = utils.get_instance(cluster, p_common.HISTORYSERVER) props["yarn.log.server.url"] = "%s:19888/jobhistory/logs/" % hs.fqdn() props["yarn.resourcemanager.address"] = "%s:8050" % rm_instances[0].fqdn() props["yarn.resourcemanager.admin.address"] = "%s:8141" % rm_instances[0].fqdn() props["yarn.resourcemanager.cluster-id"] = name props["yarn.resourcemanager.ha.automatic-failover.zk-base-path"] = "/yarn-leader-election" props["yarn.resourcemanager.ha.enabled"] = "true" rm_id_concat = ",".join([i.instance_name for i in rm_instances]) props["yarn.resourcemanager.ha.rm-ids"] = rm_id_concat for i in rm_instances: props["yarn.resourcemanager.hostname.%s" % i.instance_name] = i.fqdn() props["yarn.resourcemanager.webapp.address.%s" % i.instance_name] = "%s:8088" % i.fqdn() props["yarn.resourcemanager.webapp.https.address.%s" % i.instance_name] = "%s:8090" % i.fqdn() props["yarn.resourcemanager.hostname"] = rm_instances[0].fqdn() props["yarn.resourcemanager.recovery.enabled"] = "true" props["yarn.resourcemanager.resource-tracker.address"] = "%s:8025" % rm_instances[0].fqdn() props["yarn.resourcemanager.scheduler.address"] = "%s:8030" % rm_instances[0].fqdn() props["yarn.resourcemanager.store.class"] = ( "org.apache.hadoop.yarn.server.resourcemanager.recovery." "ZKRMStateStore" ) props["yarn.resourcemanager.webapp.address"] = "%s:8088" % rm_instances[0].fqdn() props["yarn.resourcemanager.webapp.https.address"] = "%s:8090" % rm_instances[0].fqdn() tls_instance = utils.get_instance(cluster, p_common.APP_TIMELINE_SERVER) props["yarn.timeline-service.address"] = "%s:10200" % tls_instance.fqdn() props["yarn.timeline-service.webapp.address"] = "%s:8188" % tls_instance.fqdn() props["yarn.timeline-service.webapp.https.address"] = "%s:8190" % tls_instance.fqdn() return blueprint
def __init__(self, cluster): super(EdpSparkEngine, self).__init__(cluster) self.master = u.get_instance(cluster, "SPARK_YARN_HISTORY_SERVER") self.plugin_params["spark-user"] = "******" self.plugin_params["spark-submit"] = "spark-submit" self.plugin_params["deploy-mode"] = "cluster" self.plugin_params["master"] = "yarn-cluster" driver_cp = u.get_config_value_or_default("Spark", "Executor extra classpath", self.cluster) self.plugin_params["driver-class-path"] = driver_cp
def __init__(self, cluster): super(EDPSparkEngine, self).__init__(cluster) # searching for spark instance self.master = plugin_utils.get_instance( cluster, p_common.SPARK_JOBHISTORYSERVER) self.plugin_params["spark-user"] = "******" self.plugin_params["spark-submit"] = "spark-submit" self.plugin_params["deploy-mode"] = "cluster" self.plugin_params["master"] = "yarn-cluster"
def setup_ambari(cluster): LOG.debug("Set up Ambari management console") ambari = plugin_utils.get_instance(cluster, p_common.AMBARI_SERVER) with ambari.remote() as r: sudo = functools.partial(r.execute_command, run_as_root=True) sudo("ambari-server setup -s -j" " `cut -f2 -d \"=\" /etc/profile.d/99-java.sh`", timeout=1800) sudo("service ambari-server start") LOG.debug("Ambari management console installed")
def setup_agents(cluster): LOG.debug("Set up Ambari agents") manager_address = plugin_utils.get_instance( cluster, p_common.AMBARI_SERVER).fqdn() with context.ThreadGroup() as tg: for inst in plugin_utils.get_instances(cluster): tg.spawn("hwx-agent-setup-%s" % inst.id, _setup_agent, inst, manager_address) LOG.debug("Ambari agents has been installed")
def _set_cluster_info(self, cluster): st_master = utils.get_instance(cluster, "nimbus") info = {} if st_master: port = "8080" info["Strom"] = {"Web UI": "http://%s:%s" % (st_master.management_ip, port)} ctx = context.ctx() conductor.cluster_update(ctx, cluster, {"info": info})
def _wait_all_processes_removed(cluster, instance): ambari = plugin_utils.get_instance(cluster, p_common.AMBARI_SERVER) password = cluster.extra["ambari_password"] with ambari_client.AmbariClient(ambari, password=password) as client: while True: hdp_processes = client.list_host_processes(cluster.name, instance) if not hdp_processes: return context.sleep(5)
def update_default_ambari_password(cluster): ambari = plugin_utils.get_instance(cluster, p_common.AMBARI_SERVER) new_password = uuidutils.generate_uuid() with ambari_client.AmbariClient(ambari) as client: client.update_user_password("admin", "admin", new_password) extra = cluster.extra.to_dict() if cluster.extra else {} extra["ambari_password"] = new_password ctx = context.ctx() conductor.cluster_update(ctx, cluster, {"extra": extra}) cluster = conductor.cluster_get(ctx, cluster.id)
def _extract_configs_to_extra(self, cluster): nn = utils.get_instance(cluster, "namenode") sp_master = utils.get_instance(cluster, "master") sp_slaves = utils.get_instances(cluster, "slave") extra = dict() config_master = config_slaves = '' if sp_master is not None: config_master = c_helper.generate_spark_env_configs(cluster) if sp_slaves is not None: slavenames = [] for slave in sp_slaves: slavenames.append(slave.hostname()) config_slaves = c_helper.generate_spark_slaves_configs(slavenames) else: config_slaves = "\n" for ng in cluster.node_groups: extra[ng.id] = { 'xml': c_helper.generate_xml_configs( ng.configuration(), ng.storage_paths(), nn.hostname(), None, ), 'setup_script': c_helper.generate_hadoop_setup_script( ng.storage_paths(), c_helper.extract_hadoop_environment_confs( ng.configuration()) ), 'sp_master': config_master, 'sp_slaves': config_slaves } if c_helper.is_data_locality_enabled(cluster): topology_data = th.generate_topology_map( cluster, CONF.enable_hypervisor_awareness) extra['topology_data'] = "\n".join( [k + " " + v for k, v in topology_data.items()]) + "\n" return extra
def setup_ambari(cluster): LOG.debug("Set up Ambari management console") ambari = plugin_utils.get_instance(cluster, p_common.AMBARI_SERVER) with ambari.remote() as r: sudo = functools.partial(r.execute_command, run_as_root=True) sudo("ambari-server setup -s -j" " `cut -f2 -d \"=\" /etc/profile.d/99-java.sh`", timeout=1800) redirect_file = "/tmp/%s" % uuidutils.generate_uuid() sudo("service ambari-server start >{rfile} && " "cat {rfile} && rm {rfile}".format(rfile=redirect_file)) LOG.debug("Ambari management console installed")
def get_spark_historyserver(self, cluster): return u.get_instance(cluster, 'SPARK_YARN_HISTORY_SERVER')
def get_sentry(self, cluster): return u.get_instance(cluster, 'SENTRY_SERVER')
def _get_ambari_client(cluster): ambari = plugin_utils.get_instance(cluster, p_common.AMBARI_SERVER) password = cluster.extra["ambari_password"] return ambari_client.AmbariClient(ambari, password=password)
def get_secondarynamenode(self, cluster): return u.get_instance(cluster, 'HDFS_SECONDARYNAMENODE')
def get_sqoop(self, cluster): return u.get_instance(cluster, 'SQOOP_SERVER')
def wait_ambari_accessible(cluster): ambari = plugin_utils.get_instance(cluster, p_common.AMBARI_SERVER) kwargs = {"host": ambari.management_ip, "port": 8080} plugin_utils.poll(_check_port_accessible, kwargs=kwargs, timeout=300)
def get_hive_metastore(self, cluster): return u.get_instance(cluster, 'HIVE_METASTORE')
def get_namenode(cluster): return u.get_instance(cluster, 'HDFS_NAMENODE')
def start_spark(self, cluster): sm_instance = utils.get_instance(cluster, "master") if sm_instance: self._start_spark(cluster, sm_instance)
def get_hbase_master(self, cluster): return u.get_instance(cluster, 'HBASE_MASTER')
def _set_cluster_info(self, cluster): ambari_ip = plugin_utils.get_instance( cluster, p_common.AMBARI_SERVER).get_ip_or_dns_name() ambari_port = "8080" info = { p_common.AMBARI_SERVER: { "Web UI": "http://{host}:{port}".format(host=ambari_ip, port=ambari_port), "Username": "******", "Password": cluster.extra["ambari_password"] } } nns = plugin_utils.get_instances(cluster, p_common.NAMENODE) info[p_common.NAMENODE] = {} for idx, namenode in enumerate(nns): info[p_common.NAMENODE]["Web UI %s" % (idx + 1)] = ( "http://%s:50070" % namenode.get_ip_or_dns_name()) rms = plugin_utils.get_instances(cluster, p_common.RESOURCEMANAGER) info[p_common.RESOURCEMANAGER] = {} for idx, resourcemanager in enumerate(rms): info[p_common.RESOURCEMANAGER]["Web UI %s" % (idx + 1)] = ( "http://%s:8088" % resourcemanager.get_ip_or_dns_name()) historyserver = plugin_utils.get_instance(cluster, p_common.HISTORYSERVER) if historyserver: info[p_common.HISTORYSERVER] = { "Web UI": "http://%s:19888" % historyserver.get_ip_or_dns_name() } atlserver = plugin_utils.get_instance(cluster, p_common.APP_TIMELINE_SERVER) if atlserver: info[p_common.APP_TIMELINE_SERVER] = { "Web UI": "http://%s:8188" % atlserver.get_ip_or_dns_name() } oozie = plugin_utils.get_instance(cluster, p_common.OOZIE_SERVER) if oozie: info[p_common.OOZIE_SERVER] = { "Web UI": "http://%s:11000/oozie" % oozie.get_ip_or_dns_name() } hbase_master = plugin_utils.get_instance(cluster, p_common.HBASE_MASTER) if hbase_master: info[p_common.HBASE_MASTER] = { "Web UI": "http://%s:60010" % hbase_master.get_ip_or_dns_name() } falcon = plugin_utils.get_instance(cluster, p_common.FALCON_SERVER) if falcon: info[p_common.FALCON_SERVER] = { "Web UI": "http://%s:15000" % falcon.get_ip_or_dns_name() } storm_ui = plugin_utils.get_instance(cluster, p_common.STORM_UI_SERVER) if storm_ui: info[p_common.STORM_UI_SERVER] = { "Web UI": "http://%s:8744" % storm_ui.get_ip_or_dns_name() } ranger_admin = plugin_utils.get_instance(cluster, p_common.RANGER_ADMIN) if ranger_admin: info[p_common.RANGER_ADMIN] = { "Web UI": "http://%s:6080" % ranger_admin.get_ip_or_dns_name(), "Username": "******", "Password": "******" } spark_hs = plugin_utils.get_instance(cluster, p_common.SPARK_JOBHISTORYSERVER) if spark_hs: info[p_common.SPARK_JOBHISTORYSERVER] = { "Web UI": "http://%s:18080" % spark_hs.get_ip_or_dns_name() } info.update(cluster.info.to_dict()) ctx = context.ctx() conductor.cluster_update(ctx, cluster, {"info": info}) cluster = conductor.cluster_get(ctx, cluster.id)
def get_manager(self, cluster): return u.get_instance(cluster, 'CLOUDERA_MANAGER')
def get_namenode(self, cluster): return u.get_instance(cluster, "HDFS_NAMENODE")
def run_job(self, job_execution): ctx = context.ctx() job = conductor.job_get(ctx, job_execution.job_id) data_source_urls = {} additional_sources, updated_job_configs = ( job_utils.resolve_data_source_references( job_execution.job_configs, job_execution.id, data_source_urls) ) job_execution = conductor.job_execution_update( ctx, job_execution, {"data_source_urls": data_source_urls}) # We'll always run the driver program on the master master = plugin_utils.get_instance(self.cluster, "nimbus") # TODO(tmckay): wf_dir should probably be configurable. # The only requirement is that the dir is writable by the image user wf_dir = job_utils.create_workflow_dir(master, '/tmp/storm-edp', job, job_execution.id, "700") paths = self._upload_job_files(master, wf_dir, job, updated_job_configs) # We can shorten the paths in this case since we'll run out of wf_dir paths = [os.path.basename(p) for p in paths] app_jar = paths.pop(0) job_class = updated_job_configs["configs"]["edp.java.main_class"] topology_name = self._generate_topology_name(job.name) # Launch the storm job using storm jar host = master.hostname() args = updated_job_configs.get('args', []) args = " ".join([arg for arg in args]) if args: args = " " + args cmd = ( '%(storm_jar)s -c nimbus.host=%(host)s %(job_jar)s ' '%(main_class)s %(topology_name)s%(args)s' % ( { "storm_jar": "/usr/local/storm/bin/storm jar", "main_class": job_class, "job_jar": app_jar, "host": host, "topology_name": topology_name, "args": args })) job_execution = conductor.job_execution_get(ctx, job_execution.id) if job_execution.info['status'] == edp.JOB_STATUS_TOBEKILLED: return (None, edp.JOB_STATUS_KILLED, None) # If an exception is raised here, the job_manager will mark # the job failed and log the exception # The redirects of stdout and stderr will preserve output in the wf_dir with remote.get_remote(master) as r: # Upload the command launch script launch = os.path.join(wf_dir, "launch_command") r.write_file_to(launch, self._job_script()) r.execute_command("chmod +x %s" % launch) ret, stdout = r.execute_command( "cd %s; ./launch_command %s > /dev/null 2>&1 & echo $!" % (wf_dir, cmd)) if ret == 0: # Success, we'll add the wf_dir in job_execution.extra and store # topology_name@instance_id as the job id # We know the job is running so return "RUNNING" return (topology_name + "@" + master.id, edp.JOB_STATUS_RUNNING, {'storm-path': wf_dir}) # Hmm, no execption but something failed. # Since we're using backgrounding with redirect, this is unlikely. raise e.EDPError(_("Storm job execution failed. Exit status = " "%(status)s, stdout = %(stdout)s") % {'status': ret, 'stdout': stdout})
def get_hue(self, cluster): return u.get_instance(cluster, 'HUE_SERVER')
def get_oozie(self, cluster): return u.get_instance(cluster, 'OOZIE_SERVER')
def get_kdc_server(cluster): return plugin_utils.get_instance(cluster, p_common.AMBARI_SERVER)
def get_statestore(self, cluster): return u.get_instance(cluster, 'IMPALA_STATESTORE')
def run_job(self, job_execution): ctx = context.ctx() job = conductor.job_get(ctx, job_execution.job_id) # This will be a dictionary of tuples, (native_url, runtime_url) # keyed by data_source id data_source_urls = {} additional_sources, updated_job_configs = ( job_utils.resolve_data_source_references(job_execution.job_configs, job_execution.id, data_source_urls, self.cluster)) job_execution = conductor.job_execution_update( ctx, job_execution, {"data_source_urls": job_utils.to_url_dict(data_source_urls)}) # Now that we've recorded the native urls, we can switch to the # runtime urls data_source_urls = job_utils.to_url_dict(data_source_urls, runtime=True) # We'll always run the driver program on the master master = plugin_utils.get_instance(self.cluster, "nimbus") # TODO(tmckay): wf_dir should probably be configurable. # The only requirement is that the dir is writable by the image user wf_dir = job_utils.create_workflow_dir(master, '/tmp/storm-edp', job, job_execution.id, "700") paths = self._upload_job_files(master, wf_dir, job, updated_job_configs) # We can shorten the paths in this case since we'll run out of wf_dir paths = [os.path.basename(p) for p in paths] topology_name = self._set_topology_name(job_execution, job.name) # Launch the storm job using storm jar host = master.hostname() cmd = self._build_command(paths, updated_job_configs, host, topology_name) job_execution = conductor.job_execution_get(ctx, job_execution.id) if job_execution.info['status'] == edp.JOB_STATUS_TOBEKILLED: return (None, edp.JOB_STATUS_KILLED, None) ret, stdout = self._execute_remote_job(master, wf_dir, cmd) if ret == 0: # Success, we'll add the wf_dir in job_execution.extra and store # topology_name@instance_id as the job id # We know the job is running so return "RUNNING" return (topology_name + "@" + master.id, edp.JOB_STATUS_RUNNING, { 'storm-path': wf_dir }) # Hmm, no execption but something failed. # Since we're using backgrounding with redirect, this is unlikely. raise e.EDPError( _("Storm job execution failed. Exit status = " "%(status)s, stdout = %(stdout)s") % { 'status': ret, 'stdout': stdout })
def get_catalogserver(self, cluster): return u.get_instance(cluster, 'IMPALA_CATALOGSERVER')
def get_oozie_server(self, cluster): return plugin_utils.get_instance(cluster, p_common.OOZIE_SERVER)
def get_historyserver(self, cluster): return u.get_instance(cluster, 'YARN_JOBHISTORY')
def get_stdb_rm(self, cluster): return u.get_instance(cluster, 'YARN_STANDBYRM')
def get_oozie_server_uri(self, cluster): oozie = plugin_utils.get_instance(cluster, p_common.OOZIE_SERVER) return "http://%s:11000/oozie" % oozie.management_ip
def get_instance(self, node_process): return u.get_instance(self.cluster, node_process)
def get_resourcemanager(self, cluster): return u.get_instance(cluster, 'YARN_RESOURCEMANAGER')