def run_background_job(cls, runSmokeTestSetup=True, config=None, flagFile="/tmp/flagFile"): ''' Uploads Files to HDFS before upgrade starts and runs long running sleep job in background :return: number of application started ''' # start long running application which performs I/O operations (BUG-23838) #from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode #UpgradePerNode.reportProgress("### Background application for HDFS started ####") #jobArgs = {"mapred.job.queue.name" : cls._queue} #HadoopJobHelper.runSleepJob(numOfMaps = 1, numOfReduce = 1, mapSleepTime = "10000000", reduceSleepTime = "100", extraJobArg = jobArgs, runInBackground = True, config = config, directoutput = False ) #MAPRED.triggerSleepJob("1", "0", "100000", "1000000", 1, background = True) # load generator HADOOP_TEST_JAR = cls.get_hadoop_test_jar() TEST_USER = Config.get('hadoop', 'HADOOPQA_USER') HDFS.deleteDirectory(flagFile) slavelist = HDFS.getDatanodes() jobCmd = 'jar %s NNloadGenerator -Dmapred.job.queue.name=%s -mr 3 %s -root %s -numOfThreads 5 -maxDelayBetweenOps 1000 -elapsedTime 36000 -flagFile %s' % ( HADOOP_TEST_JAR, cls._queue, cls._lgTestOutputDir, cls._lgTestDataDir, flagFile) Hadoop.runInBackground(jobCmd) time.sleep(15) return 1
def updateJobProperties(cls, propFile, properties=None, haEnabled=False, debug=False): fileSystemName = Hadoop.getFSDefaultValue() jobTrackerIP = MAPRED.getJobtrackerAddress() jobTracker = jobTrackerIP[0] + ":" + jobTrackerIP[1] if not properties: properties = {} if not properties.has_key('nameNode'): properties['nameNode'] = fileSystemName if not properties.has_key('jobTracker'): properties['jobTracker'] = jobTracker if "hcatalog" in propFile: if Hadoop.isSecure(): kerberosPrincipal = Hive.getConfigValue( "hive.metastore.kerberos.principal") properties[ 'hive.metastore.kerberos.principal'] = kerberosPrincipal logger.info("Updating for hcatalog workflow") hcatNode = Hive.getConfigValue("hive.metastore.uris").replace( 'thrift', 'hcat') logger.info("Hcat node is " + hcatNode) properties['hcatNode'] = hcatNode if Hadoop.isSecure(): # determine the namenode and the jobtracker principal nnPrincipal = None if haEnabled: nnPrincipal = HDFS.getNameNodePrincipal().replace( '_HOST', HDFS.getNamenodeByState('active')) else: nnPrincipal = HDFS.getNameNodePrincipal().replace( '_HOST', HDFS.getNamenodeHttpAddress()[0]) jtPrincipal = MAPRED.getMasterPrincipal().replace( '_HOST', jobTrackerIP[0]) properties['dfs.namenode.kerberos.principal'] = nnPrincipal properties['mapreduce.jobtracker.kerberos.principal'] = jtPrincipal wfPath = util.getPropertyValueFromFile(propFile, "oozie.wf.application.path") if wfPath != None and wfPath.find("hdfs://localhost:9000") != -1: wfPath = wfPath.replace("hdfs://localhost:9000", fileSystemName) logger.info("Value of replaced oozie.wf.application.path is " + wfPath) properties['oozie.wf.application.path'] = wfPath util.writePropertiesToFile(propFile, propFile, properties) if debug: logger.info('Content of properties file %s' % propFile) f = open(propFile, 'r') # print the file to the console logger.info(f.read()) f.close()
def background_job_setup(cls, runSmokeTestSetup=True, config=None): ''' Upload Data to HDFS before Upgrade starts Creates /user/hrt_qa/test_rollingupgrade dir on HDFS Upload 20 files to /user/hrt_qa/test_rollingupgrade ''' if not cls._base_hdfs_dir: cls._base_hdfs_dir = '/user/%s/test_rollingupgrade' % Config.get( 'hadoop', 'HADOOPQA_USER') exit_code, stdout = HDFS.createDirectory(cls._base_hdfs_dir, force=True) ruAssert("HDFS", exit_code == 0, '[BGJobSetup] could not create dir on hdfs.') LOCAL_WORK_DIR = os.path.join(Config.getEnv('ARTIFACTS_DIR'), 'HDFS_RU_TEST') localTestWorkDir1 = os.path.join(LOCAL_WORK_DIR, "Temp_data") HadoopJobHelper.runCustomWordWriter(LOCAL_WORK_DIR, localTestWorkDir1, 20, 40, 1000) HDFS.copyFromLocal(os.path.join(localTestWorkDir1, "*"), cls._base_hdfs_dir) # set up for loadGenerator cls._lgTestDataDir = cls._base_hdfs_dir + '/testData' cls._lgTestOutputDir = cls._base_hdfs_dir + '/lg_job' cls._lgStructureDir = Machine.getTempDir() + "/structure" # test dir setup HDFS.deleteDirectory(cls._lgTestDataDir) HDFS.deleteDirectory(cls._lgTestOutputDir) command = "rm -rf " + cls._lgStructureDir exit_code, stdout = Machine.runas(Machine.getAdminUser(), command, None, None, None, "True", Machine.getAdminPasswd()) command = "mkdir " + cls._lgStructureDir stdout = Machine.runas(None, command, None, None, None, "True", None) Machine.chmod("777", cls._lgStructureDir, "True", Machine.getAdminUser(), None, Machine.getAdminPasswd()) HADOOP_TEST_JAR = cls.get_hadoop_test_jar() TEST_USER = Config.get('hadoop', 'HADOOPQA_USER') # structure generator jobCmd = 'jar %s NNstructureGenerator -maxDepth 5 -minWidth 2 -maxWidth 5 -numOfFiles 100 -avgFileSize 3 -outDir %s' % ( HADOOP_TEST_JAR, cls._lgStructureDir) exit_code, stdout = Hadoop.run(jobCmd) ruAssert("HDFS", exit_code == 0, "[BGJobSetup] StructureGenerator failed") # data generator jobCmd = 'jar %s NNdataGenerator -inDir %s -root %s' % ( HADOOP_TEST_JAR, cls._lgStructureDir, cls._lgTestDataDir) exit_code, stdout = Hadoop.run(jobCmd) ruAssert("HDFS", exit_code == 0, "[BGJobSetup] DataGenerator failed") if runSmokeTestSetup: logger.info("**** Running HDFS Smoke Test Setup ****") cls.smoke_test_setup()
def getComponnetsToTest(cls, compFile, depFile): ''' Get the components that are being tested according to depFile ''' # read in the config file conf = RuSetup.readJson(compFile) isStandalone = conf[RuSetup.CONF_STANDALONE] RuSetup._skipQueue = set(conf[RuSetup.CONF_SKIP_QUEUE]) RuSetup._defaultQueue = conf[RuSetup.CONF_DEFAULT_QUEUE] returnSet = None if isStandalone: # get the components to test returnSet = set(conf[RuSetup.CONF_COMPONENTS_TEST]) else: returnSet = set(RuSetup.getComponentsAffected(compFile, depFile)) # skip tests according to cluster settings if not HDFS.isHAEnabled(): logger.info("Skip HDFS since HA is not enabled") returnSet.discard("hdfs") # as discussed in Ru standup for 11/13, enabling storm-slider for non HA cluster and storm standalone for HA cluster if YARN.isHAEnabled(): returnSet.discard("storm-slider") else: returnSet.discard("storm") if Hadoop.isEncrypted(): returnSet.discard("knox") returnSet.discard("falcon") if Hadoop.isTez(): logger.info("Add tez since Hadoop.isTez()") returnSet.add("tez") else: logger.info( "Make sure tez is not in the list since Hadoop.isTez() is false" ) returnSet.discard("tez") # Note: component.xa is always available, even if xa is not installed # So this line should work even if the cluster does not have xa installed from beaver.component.xa import Xa if Xa.isArgusInstalled(): logger.info("Add argus since argus is there") returnSet.add("argus") else: logger.info( "Make sure argus is not in the list since it's not available") returnSet.discard("argus") return list(returnSet)
def restartRemoteActiveNN(cls, wait=10, host=None): if host == None: host = Config.get("falcon", "HOST2") active_namenode = cls.getActiveNN(host, "NAMENODE") logger.info("Current Active NameNode Before Killing %s" % active_namenode) Hadoop.killService('namenode', hdfs_user, active_namenode) cls.stopComponent(host, "NAMENODE", active_namenode) time.sleep(wait) cls.startComponent(host, "NAMENODE", active_namenode) logger.info("Active NameNode After Restart %s" % active_namenode) return
def getAttemptIdsForJobIdAndStoreInFile(jobId, myTask="map"): artifactsDir = CommonHadoopEnv.getArtifactsDir() saveFilePath = os.path.join(artifactsDir,"AttemptIdFile") listAttemptCmd = " job -list-attempt-ids "+ jobId +" "+ myTask + " running " out=Hadoop.run(listAttemptCmd) buf = StringIO.StringIO(out[1]) util.writeToFile(out[1],saveFilePath)
def submit_storm_hive_topology(cls, tcId, className, args, useStandaloneCmd): if Hadoop.isSecure(): if Config.hasOption('machine', 'USER_REALM'): user_realm = Config.get('machine', 'USER_REALM', '') else: nnKerbPrincipal = HDFS.getNameNodePrincipal(defaultValue='') atloc = nnKerbPrincipal.find("@") if (atloc != -1): user_realm = nnKerbPrincipal[atloc:] if user_realm != None: args += " " + Machine.getHeadlessUserKeytab( Config.getEnv('USER')) + " " + Config.getEnv( 'USER') + '@' + user_realm exit_code, stdout = Storm.runStormHdfsTopology( TARGET_HIVE_STORM_JAR, className, args, None, logoutput=True, inBackground=False, useStandaloneCmd=useStandaloneCmd) logger.info(exit_code) ruAssert("Storm", exit_code == 0, "[StormHiveSubmit] %s Failed" % (tcId))
def _postStopAction(cls, service): if service == 'hiveserver2': logger.info("Hard kill Tez sessions") yarn_user = YARN.getYarnUser() apps = YARN.getApplicationIDList(state='NEW,NEW_SAVING,SUBMITTED,ACCEPTED,RUNNING') if len(apps) > 0: for app in apps: YARN.killApplicationAs(app, user=yarn_user) time.sleep(5) logger.info("Hard kill the HS2 application if still running") admin_user = Machine.getAdminUser() hosts = cls.getServiceHosts(service) port = cls.getHiveserver2ThriftPort() for host in hosts: pid = Machine.getPIDByPort(port, host=host, user=admin_user) if pid: logger.info("Found process for '%s' with PID %d" % (service, pid)) Machine.killProcessRemote(pid, host=host, user=admin_user) time.sleep(2) logger.info("Hard Kill proc_llap daemon due to BUG-62657") allnodes = util.getAllNodes() if Machine.isHumboldt() else Hadoop.getAllNodes() for node in allnodes: proc_llap_pids = Machine.getProcessListRemote( node, format="%U %p %P %a", filter="proc_llap", logoutput=True ) if len(proc_llap_pids) != 0: proc_llap_pid = Machine.getPidFromString(proc_llap_pids[0], yarn_user) if proc_llap_pid: logger.info("Found proc_llap process with PID %d on %s" % (proc_llap_pid, node)) Machine.killProcessRemote(proc_llap_pid, host=node, user=admin_user) time.sleep(2)
def getHSIInstances(cls): """ Returns the list of active and passive hive server interactive instances. """ hsi_dict = {'active': [], 'passive': []} zkhosts = Zookeeper.getZKHosts() if Hadoop.isSecure(): zNode = 'hs2ActivePassiveHA-sasl' else: zNode = 'hs2ActivePassiveHA-unsecure' exit_code, stdout = Zookeeper.runZKCli("ls /%s/instances" % zNode, server=zkhosts[0]) instances = stdout.split('\n')[-1] logger.info(instances) if instances == '[]': return hsi_dict for each_bracket in ['[', ']']: instances = instances.replace(each_bracket, '') instances_list = instances.split(', ') for each_instance in instances_list: exit_code, out = Zookeeper.runZKCli("get /%s/instances/%s" % (zNode, each_instance), server=zkhosts[0]) for line in out.split("\n"): match = re.search(".*JSONServiceRecord.*", line) if match: json_data = line break instance_dict = ast.literal_eval(json_data) instance_host = instance_dict['hive.server2.thrift.bind.host'] if instance_dict['internal'][0]['api'] == 'passiveEndpoint': hsi_dict['passive'].append(instance_host) else: hsi_dict['active'].append(instance_host) logger.info(hsi_dict) return hsi_dict
def getClasspathForJdbcClient(): jdbc_standalone_jar = getStandaloneHiveJdbcJar() assert jdbc_standalone_jar, "No JDBC standalone jar found" classpath = [jdbc_standalone_jar] hadoop_home = Config.get('hadoop', 'HADOOP_HOME') if Hadoop.isHadoop2() and Machine.type() == 'Windows': hadoop_home = os.path.join(hadoop_home, "share", "hadoop", "common") hadoop_common_jar = util.findMatchingFiles(hadoop_home, "hadoop-common-*[!(tests)].jar", depth=1) assert len(hadoop_common_jar) > 0, "No hadoop-common.jar found" classpath.append(hadoop_common_jar[0]) if Hadoop.isSecure(): hadoop_auth_jar = util.findMatchingFiles(hadoop_home, "hadoop-auth-*[!(tests)].jar", depth=1) assert len(hadoop_auth_jar) > 0, "No hadoop-auth.jar found" classpath.append(hadoop_auth_jar[0]) classpath.append(Config.get('hadoop', 'HADOOP_CONF')) return (os.pathsep).join(classpath)
def __init__(self, host='localhost', port=None, isHttps=False): if port is None: if (Hadoop.isEncrypted() or Ambari.is_ambari_encrypted() and Machine.isHumboldt() == False): port = 8443 isHttps = True else: port = 8080 if isHttps or self.isCloudbreak(): self.baseUrl = 'https://' + host else: self.baseUrl = 'http://' + host if self.isCloudbreak(): self.baseUrl = self.baseUrl + '/ambari' else: self.baseUrl = self.baseUrl + ':' + str(port) if Machine.isHumboldt(): self.username_password = Config.get('ambari', 'AMBARI_USERNAME', 'admin') + ':HdpCli123!' ambari_gateway = Config.get('machine', 'GATEWAY').replace("-ssh", "") self.baseUrl = 'https://%s' % ambari_gateway elif Machine.getInstaller() == 'cloudbreak': self.username_password = Config.get('ambari', 'AMBARI_USERNAME', 'admin') + ':cloudbreak1' else: self.username_password = Config.get('ambari', 'AMBARI_USERNAME', 'admin' ) + ':' + Config.get('ambari', 'AMBARI_PASSWORD', 'admin') self.urlLogin = self.baseUrl + '#/login' self.urlGetClusters = self.baseUrl + '/api/v1/clusters' self.urlGetAmbClusters = self.baseUrl + '/api/v1/services' self.urlConfig = '/configurations' self.backupDataJson = dict() self.logger = logging.getLogger(__name__)
def run_client_smoketest(cls, config=None, env=None): ''' Run wordcount Job passing env variables :param config: Configuration location :param env: Set Environment variables ''' logger.info("**** Running HDFS CLI Test ****") from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode UpgradePerNode.reportProgress( "[INFO][HDFS][ClientSmoke] CLI test for HDFS started ") if not cls._SmokeInputDir: cls._SmokeInputDir = cls._base_hdfs_dir + "/smokeHdfsInput" SmokeOutputDir = cls._base_hdfs_dir + '/smokeHdfsOutput_cli' HDFS.deleteDirectory(SmokeOutputDir, Config.get('hadoop', 'HADOOPQA_USER')) jobCmd = 'jar %s wordcount \"-Dmapreduce.reduce.input.limit=-1\" \"-D%s=%s\" %s %s' % ( Config.get('hadoop', 'HADOOP_EXAMPLES_JAR'), "mapred.job.queue.name", cls._queue, cls._SmokeInputDir, SmokeOutputDir) exit_code, stdout = Hadoop.run(jobCmd, env=env) ruAssert("HDFS", exit_code == 0, "[ClientSmoke] Hdfs smoketest failed") HDFS.deleteDirectory(SmokeOutputDir) ruAssert("HDFS", exit_code == 0, "[ClientSmoke] could not delete: " + SmokeOutputDir) UpgradePerNode.reportProgress( "[INFO][HDFS][ClientSmoke] CLI test for HDFS Finished ")
def runas(cls, user, cmd, cwd=None, env=None, logoutput=True, runInBackground=False): runCmd = Config.get('pig', 'PIG_CMD') + " " + cmd # initialize env if not env: env = {} # get kerberos ticket if Hadoop.isSecure(): if user is None: user = Config.getEnv('USER') kerbTicket = Machine.getKerberosTicket(user) env['KRB5CCNAME'] = kerbTicket user = None if runInBackground: return Machine.runinbackgroundAs(user, runCmd, cwd=cwd, env=env) else: return Machine.runas(user, runCmd, cwd=cwd, env=env, logoutput=logoutput)
def share_lib_update(cls, fs=None): # only need to do this on windows oozie_server = cls.getOozieUrl() cmd = " admin -oozie " + oozie_server + " -sharelibupdate" if Hadoop.isSecure(): cmd = " -Doozie.auth.token.cache=false " + cmd return cls.runas(cls._oozie_user, cmd)
def getIsSecure(cls): _security_prop_value = Hadoop.getConfigValue("hadoop.security.authentication", "kerberos") isSecure = True if not _security_prop_value == "kerberos": isSecure =False return isSecure
def setup(cls, S3_AWS_ACCESS_KEY=None, S3_AWS_SECRET=None): Machine.installPackageWithPip(packages="awscli", hosts=Hadoop.getAllNodes(), logoutput=True) aws_home = "/root/.aws" if not os.path.exists(aws_home): Machine.makedirs(ADMIN_USER, GATEWAY_NODE, "/root/.aws", ADMIN_PWD) util.writeToFile("[default]\nregion = us-west-2\noutput=json", os.path.join(ARTIFACTS_DIR, "config")) if S3_AWS_ACCESS_KEY: cls._aws_access_key_id = S3_AWS_ACCESS_KEY else: cls._aws_access_key_id = Config.get('machine', 'S3_AWS_ACCESS_KEY') if S3_AWS_SECRET: cls._aws_secret_access_key = S3_AWS_SECRET else: cls._aws_secret_access_key = Config.get( 'machine', 'S3_AWS_SECRET') util.writeToFile( "[default]\naws_access_key_id = %s\naws_secret_access_key = %s" % (cls._aws_access_key_id, cls._aws_secret_access_key), os.path.join(ARTIFACTS_DIR, "credentials")) Machine.runas( ADMIN_USER, "chown %s '%s/config'" % (ADMIN_USER, ARTIFACTS_DIR), GATEWAY_NODE, ADMIN_PWD) Machine.runas( ADMIN_USER, "chown %s '%s/credentials'" % (ADMIN_USER, ARTIFACTS_DIR), GATEWAY_NODE, ADMIN_PWD) Machine.copy(os.path.join(ARTIFACTS_DIR, "config"), aws_home, ADMIN_USER, ADMIN_PWD) Machine.copy(os.path.join(ARTIFACTS_DIR, "credentials"), aws_home, ADMIN_USER, ADMIN_PWD)
def getNameNodeURL(nameservice2=False): if Hadoop.isEncrypted(): baseUrl = "https://%s" % (HDFS.getNamenodeHttpsAddress(nameservice2)) else: baseUrl = "http://%s" % (HDFS.getNamenodeHttpAddress(nameservice2)) logger.info("URL being returned is - %s" % baseUrl) return baseUrl
def balancerModifyConfig(duReservedValue): """ Create /tmp/hadoopConf in all nodes :param duReservedValue: :return: """ Hadoop.modifyConfig( { 'hdfs-site.xml': { 'dfs.datanode.du.reserved': duReservedValue, 'dfs.replication': 1, 'dfs.namenode.heartbeat.recheck-interval': 5000, 'dfs.namenode.stale.datanode.interval': 1000, 'dfs.namenode.replication.interval': 1 }, }, {'services': ['all']})
def verifyLongRunningJob(cls): ''' Validate long running background job after end of all component upgrade ''' from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode logger.info("Stop the Flume agents before verification") open(cls._data_stop, 'a').close() time.sleep(60) agent1.stop() agent2.stop() time.sleep(60) logger.info("Verifying the sinked data from Flume agent") exit_code, stdout, stderr = Hadoop.runas(cls._hdfs_user, "dfs -cat %s/*" % cls._hdfs_test_dir, logoutput=False, stderr_as_stdout=False) if exit_code != 0: logger.error( "Following error during the HDFS cat while fetching Flume data: %s" % stderr) if not util.compareOutputToFileIgnoreDupsAndOrder( stdout, cls._data_file): UpgradePerNode.reportProgress( "[FAILED][FLUME][BGJob] Long running test for Flume failed while verifying data" ) else: UpgradePerNode.reportProgress( "### [PASSED][FLUME][BGJob] Long running test validation for Flume passed ####" )
def createClusterEntities(cls, colo, desc, name): try: from beaver.component.falcon import Falcon except ImportError: ## Import fails when Falcon is not installed on this machine. Nothing to do return from beaver.component.hadoop import Hadoop, HDFS, YARN write_endpoint = Hadoop.getFSDefaultValue() webhdfs_scheme = 'webhdfs' if HDFS.isHttpsEnabled(): webhdfs_scheme = 'swebhdfs' read_endpoint = '%s://%s:%s' % ( webhdfs_scheme, write_endpoint.split('/')[2].split(':')[0], HDFS.getNNWebPort()) execute_endpoint = YARN.getResourceManager() falconNode = Falcon.get_falcon_server() from beaver.component.oozie import Oozie oozieUrl = Oozie.getOozieUrl() entityText = "<?xml version=\"1.0\"?>" \ "<cluster colo=\"" + colo + "\" description=\"" + desc + "\" name=\"" + name + "\" " \ "xmlns=\"uri:falcon:cluster:0.1\"> " \ "<interfaces> " \ "<interface type=\"readonly\" endpoint=\""+read_endpoint+"\" version=\"0.20.2\"/> " \ "<interface type=\"write\" endpoint=\""+write_endpoint+"\" version=\"0.20.2\"/> " \ "<interface type=\"execute\" endpoint=\"" + execute_endpoint + "\" version=\"0.20.2\"/> " \ "<interface type=\"workflow\" endpoint=\"" + oozieUrl + "\" version=\"3.1\"/>" \ "<interface type=\"messaging\" endpoint=\"" \ "tcp://" + falconNode + ":61616?daemon=true\" version=\"5.1.6\"/>" \ "</interfaces>" \ "<locations>" \ "<location name=\"staging\" path=\"/apps/falcon/" + name + "/staging\" />" \ "<location name=\"temp\" path=\"/tmp\" />" \ "<location name=\"working\" path=\"/apps/falcon/" + name + "/working\" />" \ "</locations>" \ "<ACL owner=\"" + cls._job_user + "\" group=\"users\" permission=\"0755\"/>" if Hadoop.isSecure(): realm = HDFS.getConfigValue( 'dfs.namenode.kerberos.principal').split('@')[1] entityText += "<properties> <property name=\"dfs.namenode.kerberos.principal\" value=\"nn/_HOST@" + realm + "\"/> </properties>" entityText += "</cluster>" textFile = open(os.path.join(cls._local_workspace, name + ".xml"), "w") textFile.write("%s" % entityText) textFile.close() return
def getLLAPDaemonPidsHosts(): hosts = [] llapdaemon_pids = [] if not Machine.isHumboldt(): nodes = Hadoop.getAllNodes() else: nodes = HDFS.getDatanodes() for node in nodes: pids = Machine.getProcessListRemote( node, format="%U %p %P %a", filter="org.apache.hadoop.hive.llap.daemon.impl.LlapDaemon", logoutput=True ) if pids: if Hadoop.isSecure(): pid = Machine.getPidFromString(pids[0], Config.get('hive', 'HIVE_USER')) else: pid = Machine.getPidFromString(pids[0], Config.get('hadoop', 'YARN_USER')) llapdaemon_pids.append(pid) hosts.append(node) return llapdaemon_pids, hosts
def insertFileIntoHdfs(fileName): pathFileName = '/user/' + HADOOPQA_USER + '/' + fileName if (not(HDFS.fileExists(pathFileName))): sourceFile = DATA_PATH + '/' + fileName destFile = '/user/' + HADOOPQA_USER + '/' + fileName putCmd = "dfs -put " + sourceFile + ' ' + destFile out = Hadoop.run(putCmd) return out
def getAmbariURL(): ambariHost = socket.getfqdn() if Hadoop.isEncrypted(): baseUrl = "https://%s:8443" % (ambariHost) else: baseUrl = "http://%s:8080" % (ambariHost) logger.info("URL being returned is - %s" % baseUrl) return baseUrl
def setup_storm_hdfs_topology(cls, useStandaloneCmd): storm_version = Storm.getVersion(useStandaloneCmd=True) try: file_obj = open(HDFS_CONFIG_FILE, 'w') if Hadoop.isSecure(): file_obj.write('hdfs.keytab.file: ' + Machine.getHeadlessUserKeytab( user=HADOOPQA_USER) + '\n') file_obj.write('hdfs.kerberos.principal: ' + Machine.get_user_principal(user=HADOOPQA_USER) + '\n') finally: file_obj.close() HDFS.createDirectory("/tmp/mySeqTopology", HDFS_USER, "777", False) HDFS.createDirectory("/tmp/dest", HDFS_USER, "777", False) HDFS.createDirectory("/tmp/dest2", HDFS_USER, "777", False) HDFS.createDirectory("/tmp/foo", HDFS_USER, "777", False) HDFS.createDirectory("/tmp/trident", HDFS_USER, "777", False) HDFS.createDirectory("/tmp/trident-seq", HDFS_USER, "777", False) Machine.copy(JAVA_HDFS_SRC_DIR, LOCAL_HDFS_WORK_DIR, user=None, passwd=None) if not Machine.isWindows(): (exit_code, _) = Maven.run('package', cwd=LOCAL_HDFS_WORK_DIR, env={ HADOOP_VERSION_MAVEN_PARAMETER: HADOOP_VERSION, STORM_VERSION_MAVEN_PARAMETER: storm_version, HADOOP_CONF_MAVEN_PARAMETER: HADOOP_CONF, HDFS_FILE_MAVEN_PARAMETER: HDFS_FILE, HADOOP_CORE_MAVEN_PARAMETER: HADOOP_CONF, CORE_FILE_MAVEN_PARAMETER: CORE_FILE, PUBLIC_REPO_MAVEN_PARAMETER: Maven.getPublicRepoUrl() }) else: filepath = os.path.join(MOD_CONF_PATH, "core-site.xml") (exit_code, _) = Maven.run( 'package -D%s=%s -D%s=%s -D%s=%s -D%s=%s -D%s=%s -D%s=%s -D%s=%s' % (HADOOP_VERSION_MAVEN_PARAMETER, HADOOP_VERSION, STORM_VERSION_MAVEN_PARAMETER, storm_version, HADOOP_CONF_MAVEN_PARAMETER, HADOOP_CONF, HDFS_FILE_MAVEN_PARAMETER, HDFS_FILE, HADOOP_CORE_MAVEN_PARAMETER, HADOOP_CONF, CORE_FILE_MAVEN_PARAMETER, CORE_FILE, PUBLIC_REPO_MAVEN_PARAMETER, Maven.getPublicRepoUrl()), cwd=LOCAL_HDFS_WORK_DIR) ruAssert("Storm", exit_code == 0, "[StormHDFSSetup] maven package command failed")
def getAllNodes(cls): ''' Get all nodes according to the platform used ''' if Machine.isHumboldt(): allNodes = util.getAllNodes() else: allNodes = Hadoop.getAllNodes() return allNodes
def getBaseUrl(self): from beaver.component.ambari import Ambari GRAFANA_HOST = Ambari.getHostsForComponent('METRICS_GRAFANA')[0] if Hadoop.isEncrypted() or Machine.isHumboldt(): GRAFANA_URL = "https://%s:3000/dashboard/db/hbase-tuning" % (GRAFANA_HOST) else: GRAFANA_URL = "http://%s:3000/dashboard/db/hbase-tuning" % (GRAFANA_HOST) self.base_url = GRAFANA_URL return self.base_url
def background_job_setup(cls, runSmokeTestSetup=True, config=None): ''' Setup for background long running job :param runSmokeTestSetup: Runs smoke test setup if set to true ''' from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode UpgradePerNode.reportProgress( "[INFO][FLUME][BGJobSetup] Long running job setup for Flume component started" ) from beaver.component.flume import Agent global agent1 global agent2 if not os.path.exists(cls._local_work_dir): os.mkdir(cls._local_work_dir) shutil.copy(cls._flume_datagen_src, cls._local_work_dir) agent1 = Agent(cls._local_work_dir) agent2 = Agent(cls._local_work_dir) for outdir in (cls._agent1_chkpt_dir, cls._agent1_data_dir, cls._agent2_chkpt_dir, cls._agent2_data_dir): os.mkdir(outdir) logger.info("Preparing the Flume configs for long running test") propertyMap = {} namenode = Hadoop.getFSDefaultValue() propertyMap['agent2.sinks.hdfsSink.hdfs.path'] = "%s%s" % ( namenode, cls._hdfs_test_dir) if Hadoop.isSecure(): if Config.hasOption('machine', 'USER_REALM'): user_realm = Config.get('machine', 'USER_REALM', '') else: nnKerbPrincipal = HDFS.getNameNodePrincipal(defaultValue='') atloc = nnKerbPrincipal.find("@") if atloc != -1: user_realm = nnKerbPrincipal[atloc:] if user_realm: propertyMap[ 'agent2.sinks.hdfsSink.hdfs.kerberosPrincipal'] = cls._test_user + '@' + user_realm propertyMap[ 'agent2.sinks.hdfsSink.hdfs.kerberosKeytab'] = Machine.getHeadlessUserKeytab( cls._test_user) util.writePropertiesToFile( os.path.join(cls._flume_test_conf, 'longrunning.properties'), cls._flume_test_src, propertyMap)
def verifyOozieAppsAndJobsSucceeded(workflowIds, logPrefix, localDir, testMap, action_name='wc', checkJob=True): ''' Verifies if all apps and jobs submitted/created via. Oozie have succeed all the validations. :param workflowIDs: List of workflow ids to verify. :param logPrefix: log prefix for YARN app logs. :param localDir: Path to local log dir. :return: Bool status indicating if validation succeeded. ''' appIds = [] jobIds = [] dLog = {} appStatus = True jobStatus = True wprStatus = True # check the job and app status for each workflow we launched. if Hadoop.isHadoop2(): # get all the app and job ids for workflowId in workflowIds: if action_name != 'None': stdout = Oozie.getJobInfo('%s@%s' % (workflowId, action_name), verbose=True, retry=True) else: stdout = Oozie.getJobInfo('%s' % (workflowId), verbose=True, retry=True) ids = Oozie.getJobAndAppIds(stdout) for id in ids: appIds.append(id['application']) jobIds.append(id['job']) # get the app and job status for all the jobs we found appStatus, appLog = YARN.checkAppsSucceeded(appIds, logPrefix=logPrefix, localDir=localDir) dLog.update(appLog) if checkJob: jobStatus, jobLog = YARN.checkJobsSucceeded(jobIds) dLog.update(jobLog) for key, value in dLog.items(): logger.info("%s -> %s" % (key, value)) wprStatus, d = verifyWorkPreservingRMRestart(jobIds, testMap) for k, v in d.items(): logger.info("%s -> %s" % (k, v)) logger.info("appStatus: %s jobStatus: %s wprStatus: %s" % (appStatus, jobStatus, wprStatus)) return appStatus and jobStatus and wprStatus
def runOozieJobsCmdAs(cls, user, cmd, cwd=None, env=None, logoutput=True, retry=False, num_of_retries=5, wait=30, oozie_server=None): if not env: env = {} if Hadoop.isSecure(): if user is None: user = Config.getEnv('USER') kerbTicket = Machine.getKerberosTicket(user) env['KRB5CCNAME'] = kerbTicket user = None # if oozie server is not sent find it. logger.info("OOZIE SERVER:%s" % oozie_server) if not oozie_server: oozie_server = cls.getOozieUrl() logger.info("OOZIE SERVER:%s" % oozie_server) env['JAVA_HOME'] = Config.get("machine", "JAVA_HOME") if Machine.type() == 'Windows': paramsList = cmd.split() escapedCmd = "" for param in paramsList: if param[0] != '"' and param[-1] != '"': escapedCmd = escapedCmd + "\"" + param + "\"" + " " else: escapedCmd = escapedCmd + param + " " oozie_cmd = OOZIE_CMD + " jobs -oozie " + oozie_server + " " + escapedCmd else: oozie_cmd = OOZIE_CMD + " jobs -oozie " + oozie_server + " " + cmd exit_code, output = Machine.runas(user, oozie_cmd, cwd=cwd, env=env, logoutput=logoutput) count = 1 # Handle retires if the user selects while retry and exit_code != 0 and count < num_of_retries: exit_code, output = Machine.runas(user, oozie_cmd, cwd=cwd, env=env, logoutput=logoutput) count += 1 time.sleep(wait) return exit_code, output
def ensure_jns_have_new_txn(cls, nodes, last_tx_id): num_of_jns = len(nodes) actual_tx_ids = {} jns_updated = 0 protocol = 'http' jn_port = '8480' if Hadoop.isEncrypted(): protocol = 'https' jn_port = '8481' # time out of 3 mins time_out = 3 * 60 # stop time for 10s step_time = 10 itr = int(time_out / step_time) for i in range(itr): logger.info( '******************** Check if all Journal Nodes are updated Iteration %s or %s *************************' % (i + 1, itr)) for node in nodes: # if all JNS are updated break if jns_updated == num_of_jns: return try: # if JN is already ahead skip it if actual_tx_ids[node] and int( actual_tx_ids[node]) >= last_tx_id: continue except KeyError: pass # other wise get the data and compare it url = '%s://%s:%s/jmx' % (protocol, node, jn_port) actual_tx_ids[node] = util.getJMXData( url, 'Hadoop:service=JournalNode,name=Journal-', 'LastWrittenTxId') logger.info( '******************** JN: %s LAST TX ID: %s *************************' % (node, last_tx_id)) if int(actual_tx_ids[node]) >= last_tx_id: jns_updated += 1 # if all JNS are updated break if jns_updated == num_of_jns: return time.sleep(step_time) ruAssert("HDFS", jns_updated == num_of_jns)
def share_lib_setup(cls, fs=None): # only need to do this on windows if not Machine.isWindows(): # return exit code of 0 and empty stdout return 0, '' oozie_setup_cmd = os.path.join(OOZIE_HOME, 'bin', 'oozie-setup.cmd') oozie_sharelib_location = os.path.join(OOZIE_HOME, "share") if not fs: fs = Hadoop.getFSDefaultValue() cmd = 'sharelib create -fs %s -locallib %s' % (fs, oozie_sharelib_location) return cls.runas(cls._oozie_user, cmd, base_cmd=oozie_setup_cmd)
def runLoadGenerator(cls, numOfNodes=1, elapsedTime=100): from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode flagFile = UpgradePerNode._HDFS_FLAG_FILE # get the jar again as we dont know what version we will be running this job with. HADOOP_TEST_JAR = cls.get_hadoop_test_jar() TEST_USER = Config.get('hadoop', 'HADOOPQA_USER') # load generator jobCmd = 'jar %s NNloadGenerator -Dmapred.job.queue.name=hdfs -mr %s %s -root %s -numOfThreads 5 -maxDelayBetweenOps 1000 -elapsedTime %s -flagFile %s' % ( HADOOP_TEST_JAR, numOfNodes, cls._lgTestOutputDir, cls._lgTestDataDir, elapsedTime, flagFile) exit_code, stdout = Hadoop.run(jobCmd) ruAssert("HDFS", exit_code == 0, "[BGJobSetup] LoadGenerator failed")
def serverStop(cls, cwd=None, env=None, logoutput=True): cmd = SQOOP2_SERVER + "stop" #Config.get('sqoop2', 'SQOOP2_CMD') osenv = {"JAVA_HOME": Config.get('machine', 'JAVA_HOME')} if Hadoop.isSecure(): if user is None: user = Config.getEnv('USER') kerbTicket = Machine.getKerberosTicket(user) osenv['KRB5CCNAME'] = kerbTicket user = None if env: for key, value in env.items(): osenv[key] = value return Machine.run(cmd, cwd=cwd, env=osenv, logoutput=logoutput)
def test_CleanUpOfFilesAfterFailedJob(): testCaseDescription="testCleanUpOfFilesAfterFailedJob" testCaseId = "cleanup03" util.displayTestCaseMessage(testCaseDescription,testCaseId) fileCreated="FileCreatedByJob.log" out=insertFileIntoHdfs(CREATE_FILE) assert out[0] == 0 logger.info("Try to get Job Tracker") JOBTRACKER = MAPRED.getJobTracker() assert JOBTRACKER != None logger.info( "Submitting a streaming job that will create a file ") localFilePath = getFullPathOfFile(CREATE_FILE_PATH_IN_LOCAL) hadoopStreamingCmdFormat = 'jar %s -files %s -input %s -output %s -mapper "python %s" -reducer NONE' jobJarHadoopStreamingCmd = hadoopStreamingCmdFormat % (HADOOP_STREAMING_JAR, localFilePath, CREATE_FILE_PATH_IN_HADOOP, OUT_PATH_IN_HADOOP, CREATE_FILE) logger.info(jobJarHadoopStreamingCmd) out = Hadoop.runInBackground(jobJarHadoopStreamingCmd) time.sleep(20) logger.info("Try to get job id.....") for i in range(1, 5): jobId = MAPRED.getJobID() if (validateJobId(jobId)): break time.sleep(10) assert jobId.startswith('job_') == True logger.info(" Get JobId: " + jobId + " successfully") logger.info("Try to get Attempt ID....") attemptId = MAPRED.getAttemptIdsForJobId(jobId) assert attemptId.startswith("attempt_") == True logger.info("Try to get Task Tracker...") taskTrackersList = Hadoop.getTasktrackers() taskTracker = taskTrackersList[0].rstrip("\n") logger.info(" Task Tracker running the map task is " + taskTracker) time.sleep(20) logFileDirList = getLocalDirInfo(taskTracker); logger.info("Log file list: " + logFileDirList) logFileDirList = logFileDirList.split(',') isExistedTempFile = False for logFileDir in logFileDirList: logger.info("Directory of log file: " + logFileDir) isExistedTempFile = checkJobCreatedTempFileInTT(logFileDir, HADOOPQA_USER, jobId, attemptId, fileCreated, taskTracker) if isExistedTempFile == True: break assert isExistedTempFile == True # Now fail the job getAttemptIdsForJobIdAndStoreInFile(jobId) attemptIdCount=MAPRED.checkForNewAttemptIds(jobId) assert len(attemptIdCount) != 0 while len(attemptIdCount) != 0: logger.info(" Since there are attempts ids proceeding to kill them ") MAPRED.failAttempts(attemptIdCount) attemptIdCount=MAPRED.checkForNewAttemptIds(jobId) logger.info("Check job status") isJobFailed=MAPRED.isJobFailed(jobId) if isJobFailed==False: logger.info( " The job could not be failed successfully and unable to proceed with the tests ") assert isJobFailed==True isExistedTempFile = checkJobCreatedTempFileInTT(logFileDir,HADOOPQA_USER,jobId,attemptId,fileCreated,taskTracker) if isExistedTempFile: logger.info(" The test case to check the files cleared after killing of jobs failed ") logger.info(" The file created by the job still exists even after the job is successfully killed ") assert isExistedTempFile==False
def test_CleanUpOfFilesAfterJobCompletionForFilesWithSymLink(): testCaseDescription="test_CleanUpOfFilesAfterJobCompletionForFilesWithSymLink" testCaseId = "cleanup04" util.displayTestCaseMessage(testCaseDescription,testCaseId) fileCreated = "mysymlink.txt" out = insertFileIntoHdfs(CREATE_FILE_2) assert out[0] == 0 time.sleep(15) logger.info("Try to get Job Tracker") JOBTRACKER = MAPRED.getJobTracker() assert JOBTRACKER != None localFilePath = getFullPathOfFile(CREATE_FILE_2_PATH_IN_LOCAL) hadoopStreamingCmdFormat = 'jar %s -files %s -input %s -output %s -mapper "python %s" -reducer NONE' jobJarHadoopStreamingCmd = hadoopStreamingCmdFormat % (HADOOP_STREAMING_JAR, localFilePath, CREATE_FILE_2_PATH_IN_HADOOP, OUT_PATH_IN_HADOOP, CREATE_FILE_2) logger.info(jobJarHadoopStreamingCmd) out = Hadoop.runInBackground(jobJarHadoopStreamingCmd) time.sleep(15) logger.info("Try to get job id.....") for i in range(1, 5): jobId = MAPRED.getJobID() if (validateJobId(jobId)): break time.sleep(10) assert jobId.startswith('job_') == True logger.info(" Get JobId: " + jobId + " successfully") logger.info("Try to get Attempt ID....") attemptId = MAPRED.getAttemptIdsForJobId(jobId) assert attemptId.startswith("attempt_") == True logger.info("Try to get Task Tracker...") taskTrackersList = Hadoop.getTasktrackers() taskTracker = taskTrackersList[0].rstrip("\n") logger.info(" Task Tracker running the map task is " + taskTracker) time.sleep(40) logFileDirList = getLocalDirInfo(taskTracker); logger.info("Log file list: " + logFileDirList) logFileDirList = logFileDirList.split(',') isExistedTempFile = False for logFileDir in logFileDirList: logger.info("Directory of log file: " + logFileDir) isExistedTempFile = checkJobCreatedTempFileInTT(logFileDir, HADOOPQA_USER, jobId, attemptId, fileCreated, taskTracker) if isExistedTempFile == True: break assert isExistedTempFile == True logger.info("Check job is completed or not") for i in range(1, 10): isJobCompleted = MAPRED.checkForJobCompletion(jobId) if isJobCompleted == True: break time.sleep(20) assert isJobCompleted == True logger.info("Job is completed!") # Now check for the file to be cleared off after the job is complete isExistedTempFile = checkJobCreatedTempFileInTT(logFileDir,HADOOPQA_USER,jobId,attemptId,fileCreated,taskTracker) if isExistedTempFile == True: logger.info(" The test case to check the files cleared after killing of jobs failed ") logger.info(" The file created by the job still exists even after the job is successfully killed ") assert isExistedTempFile==False