def insertFileIntoHdfs(fileName): pathFileName = '/user/' + HADOOPQA_USER + '/' + fileName if (not(HDFS.fileExists(pathFileName))): sourceFile = DATA_PATH + '/' + fileName destFile = '/user/' + HADOOPQA_USER + '/' + fileName putCmd = "dfs -put " + sourceFile + ' ' + destFile out = Hadoop.run(putCmd) return out
def verifyLongRunningJob(cls): ''' Verify long running background job after it finishes :return: ''' ruAssert( "Tez", YARN.getAppFinalStateFromID( cls._background_job_appId) == 'SUCCEEDED') for output in cls._hdfsOutputList: ruAssert("Tez", HDFS.fileExists(output + '/part*')) logger.info("**** Verified long running job for Tez ****")
def verifyLongRunningJob(cls): ''' Validate long running background job after end of all component upgrade ''' from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode UpgradePerNode.reportProgress( "### Verifying Long running job for Pig ####") # Check if the Long Running process is not None if cls._process is not None: # If process poll() returns None, it means the process has not finished yet. if cls._process.poll() is None: UpgradePerNode.reportProgress( "### Long running job for Pig has not Finished yet. Waiting for it to complete ####" ) # Wait for _long_running_job_timeout_secs for Job to complete starttime = time.time() while (starttime - time.time() < cls._long_running_job_timeout_secs ) and cls._process.poll() is None: time.sleep(5) exit_code = cls._process.poll() if exit_code is None: logger.info("Killing Pig Long running job process '%d'" % cls._process.pid) Machine.killProcess(cls._process.pid) UpgradePerNode.reportProgress( "### [FAILED][Pig][BGJob] Long running job for Pig Failed to finish ####" ) elif exit_code != 0: UpgradePerNode.reportProgress( "### [FAILED][Pig][BGJob] Long running job for Pig Failed and Exited with '%d' ####" % exit_code) else: UpgradePerNode.reportProgress( "### Long running job for Pig Finished ####") #Check for _SUCCESS file in HDFS Path if HDFS.fileExists(cls._hdfs_success_filepath, cls._job_user): UpgradePerNode.reportProgress( "### [PASSED][Pig][BGJob] Found _SUCCESS file in HDFS for Pig Long running job ####" ) else: UpgradePerNode.reportProgress( "### [FAILED][Pig][BGJob] Not Found _SUCCESS file in HDFS for Pig Long running job. ####" ) else: UpgradePerNode.reportProgress( "### [FAILED][Pig][BGJob] Long Running Pig Job Failed. No Process found ####" )
def insertCSVDataViaCSVBuildLoad(cls, csvFile, tableName, putIntoHDFS=True, deleteAfterExec=True, runInBackground=False, user=None, config=None, optionAndParameter="", env=None, delimiter=",", arrayDelimiter=None, schema=None): """ By default, the files will be allocated under /tmp/ folder in HDFS. """ global ZK_ZPARENT if Slider.isSlider(): ZK_ZPARENT = util.getPropertyValueFromConfigXMLFile( os.path.join(Config.get('hbase', 'HBASE_CONF_DIR'), 'hbase-site.xml'), "zookeeper.znode.parent") if Machine.isLinux(): clientjar = Machine.find(user=Machine.getAdminUser(), host="localhost", filepath=PHOENIX_HOME, searchstr="phoenix-*[0-9]-client.jar", passwd=Machine.getAdminPasswd()) else: clientjar = Machine.find(user=Machine.getAdminUser(), host="localhost", filepath=PHOENIX_HOME, searchstr="phoenix-*-client.jar", passwd=Machine.getAdminPasswd()) if Machine.isWindows(): clientjar = (clientjar[0].strip("\\localhost")).replace("$", ":") fileName = csvFile.split('\\')[-1] else: clientjar = clientjar[0] fileName = csvFile.split('/')[-1] # If we need to, we insert it into HDFS, since the library will take it from there. executingUser = (HADOOPQA_USER) if user is None else user if putIntoHDFS: if not HDFS.fileExists('/tmp/'): HDFS.mkdir('/tmp/') HDFS.copyFromLocal(csvFile, '/tmp/', executingUser, config, optionAndParameter) hbaseConfDir = HBASE_CONF_DIR if Slider.isSlider(): hbaseConfDir = Config.get('hbase', 'HBASE_CONF_DIR') classpath = hbaseConfDir finalCommand = "%s jar %s org.apache.phoenix.mapreduce.CsvBulkLoadTool --table %s --input %s" \ % (HADOOP_CMD, clientjar, tableName, '/tmp/%s' % fileName) if schema is not None: finalCommand = '%s -schema %s' % (finalCommand, schema) if Machine.isWindows(): os.environ['HADOOP_USER_CLASSPATH_FIRST'] = 'true' os.environ['HADOOP_CLASSPATH'] = classpath if delimiter != "," or arrayDelimiter != None: finalCommand = "%s -d `\\\"`%s`\\\" -a `\\\"`%s`\\\"" \ % (finalCommand, delimiter, arrayDelimiter.strip("'")) finalCommand = "%s --zookeeper %s" % (finalCommand, ZK_HOST) if runInBackground: exit_code = 0 stdout = '' Machine.runinbackground( finalCommand, env=dict(env.items() + ENVIRONMENT.items() if env is not None else ENVIRONMENT)) else: exit_code, stdout = Machine.run( finalCommand, env=dict(env.items() + ENVIRONMENT.items() if env is not None else ENVIRONMENT)) else: # delimiter options if delimiter != "," or arrayDelimiter != None: finalCommand = "%s --delimiter %s --array-delimiter %s" % ( finalCommand, delimiter, arrayDelimiter) # ZKHosts options finalCommand = "%s --zookeeper %s" % (finalCommand, cls.getZKConnectString()) if runInBackground: exit_code = 0 stdout = '' Machine.runinbackground( "HADOOP_CLASSPATH=%s %s" % (classpath, finalCommand), env=dict(env.items() + ENVIRONMENT.items() if env is not None else ENVIRONMENT)) else: exit_code, stdout = Machine.run( "HADOOP_CLASSPATH=%s %s" % (classpath, finalCommand), env=dict(env.items() + ENVIRONMENT.items() if env is not None else ENVIRONMENT)) # If selected, after insertion into HBase we will delete the csvFile from HDFS if deleteAfterExec and not runInBackground: # Does not work for "run in background" option HDFS.deleteFile('/tmp/%s' % fileName, executingUser) # return 0,"" return exit_code, stdout