def createClusterEntities(cls, colo, desc, name): try: from beaver.component.falcon import Falcon except ImportError: ## Import fails when Falcon is not installed on this machine. Nothing to do return from beaver.component.hadoop import Hadoop, HDFS, YARN write_endpoint = Hadoop.getFSDefaultValue() webhdfs_scheme = 'webhdfs' if HDFS.isHttpsEnabled(): webhdfs_scheme = 'swebhdfs' read_endpoint = '%s://%s:%s' % ( webhdfs_scheme, write_endpoint.split('/')[2].split(':')[0], HDFS.getNNWebPort()) execute_endpoint = YARN.getResourceManager() falconNode = Falcon.get_falcon_server() from beaver.component.oozie import Oozie oozieUrl = Oozie.getOozieUrl() entityText = "<?xml version=\"1.0\"?>" \ "<cluster colo=\"" + colo + "\" description=\"" + desc + "\" name=\"" + name + "\" " \ "xmlns=\"uri:falcon:cluster:0.1\"> " \ "<interfaces> " \ "<interface type=\"readonly\" endpoint=\""+read_endpoint+"\" version=\"0.20.2\"/> " \ "<interface type=\"write\" endpoint=\""+write_endpoint+"\" version=\"0.20.2\"/> " \ "<interface type=\"execute\" endpoint=\"" + execute_endpoint + "\" version=\"0.20.2\"/> " \ "<interface type=\"workflow\" endpoint=\"" + oozieUrl + "\" version=\"3.1\"/>" \ "<interface type=\"messaging\" endpoint=\"" \ "tcp://" + falconNode + ":61616?daemon=true\" version=\"5.1.6\"/>" \ "</interfaces>" \ "<locations>" \ "<location name=\"staging\" path=\"/apps/falcon/" + name + "/staging\" />" \ "<location name=\"temp\" path=\"/tmp\" />" \ "<location name=\"working\" path=\"/apps/falcon/" + name + "/working\" />" \ "</locations>" \ "<ACL owner=\"" + cls._job_user + "\" group=\"users\" permission=\"0755\"/>" if Hadoop.isSecure(): realm = HDFS.getConfigValue( 'dfs.namenode.kerberos.principal').split('@')[1] entityText += "<properties> <property name=\"dfs.namenode.kerberos.principal\" value=\"nn/_HOST@" + realm + "\"/> </properties>" entityText += "</cluster>" textFile = open(os.path.join(cls._local_workspace, name + ".xml"), "w") textFile.write("%s" % entityText) textFile.close() return
def formatNN_SetupHDFS(duReservedValue, mod_conf_path): """ Format NN. Setup HDFS dir for MR jobs. Note that this permission is too wide for default HDP use. """ datanodes = HDFS.getDatanodes() logger.info("datanodes = %s" % datanodes) HDFS.stopDatanodes() HDFS.stopNamenode() HDFS.formatNN(force=True, logoutput=True) for dn in datanodes: Machine.rm(user=Machine.getAdminUser(), host=dn, filepath="%s/current" % HDFS.getConfigValue("dfs.datanode.data.dir"), isdir=True) balancerModifyConfig(duReservedValue) HDFS.startNamenode(mod_conf_path) HDFS.startDatanodes(mod_conf_path) sleepTime = 45 logger.info("sleep for %s sec" % sleepTime) time.sleep(sleepTime) version = Hadoop.getShortVersion() paths = [ "/hdp", "/hdp/apps", "/hdp/apps/%s" % version, "/hdp/apps/%s/mapreduce" % version ] for path in paths: HDFS.mkdir(path=path, user=HDFS_USER) HDFS.chmod(runasUser=HDFS_USER, perm="777", directory="/hdp", recursive=True) HDFS.copyFromLocal( localpath="/usr/hdp/current/hadoop-client/mapreduce.tar.gz", hdfspath="/hdp/apps/%s/mapreduce/" % version) sleepTime = 45 logger.info("sleep for %s sec for MR tarball replication" % sleepTime) time.sleep(sleepTime) paths = [ "/app-logs", "/app-logs/hrt_qa", "/app-logs/hrt_qa/logs", "/mr-history" ] for path in paths: HDFS.mkdir(path=path, user=HDFS_USER) HDFS.chmod(runasUser=HDFS_USER, perm="777", directory="/app-logs", recursive=True) HDFS.chmod(runasUser=HDFS_USER, perm="777", directory="/mr-history", recursive=True) HDFS.mkdir(path="/user", user=HDFS_USER) HDFS.mkdir(path="/user/hrt_qa", user=HDFS_USER) HDFS.chown(runasUser=HDFS_USER, new_owner="hrt_qa:hrt_qa", directory="/user/hrt_qa", recursive=False) HDFS.chmod(runasUser="******", perm="770", directory="/user/hrt_qa", recursive=True)