def background_job_setup(cls, runSmokeTestSetup=True, config=None): ''' Create 5 input datasets for TestOrderedWordCount :param runSmokeTestSetup: Runs smoke test setup if set to true ''' logger.info("*** Start background job setup for Tez ***") Machine.rm(user=HADOOPQA_USER, host=None, filepath=LOCAL_WORK_DIR, isdir=True) os.mkdir(LOCAL_WORK_DIR) for i in range(0, 4, 1): inputDirName = "HDFS_INPUT%d" % i inputDirPath = os.path.join(LOCAL_WORK_DIR, inputDirName) HadoopJobHelper.runCustomWordWriter(LOCAL_WORK_DIR, inputDirPath, 10, 400, 10000) hdfsInputDir = "/user/%s/Input%d" % (HADOOPQA_USER, i) hdfsOutputDir = "/user/%s/output%d" % (HADOOPQA_USER, i) #In case already present, delete the input directory HDFS.deleteDirectory(hdfsInputDir) HDFS.createDirectory(hdfsInputDir) HDFS.deleteDirectory(hdfsOutputDir) HDFS.copyFromLocal(inputDirPath, hdfsInputDir) cls._hdfsInputList.append(hdfsInputDir + "/" + inputDirName) cls._hdfsOutputList.append(hdfsOutputDir) logger.info("Created data for input %d", i) logger.info("*** End background job setup for Tez ***")
def run_background_job(cls, runSmokeTestSetup=True, config=None): ''' Runs background long running Flume Job :param runSmokeTestSetup: Runs smoke test setup if set to true :param config: expected configuration location :return: Total number of long running jobs started ''' from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode HDFS.createDirectory(cls._hdfs_test_dir, perm="777", force=True) UpgradePerNode.reportProgress( "[INFO][FLUME][BGJob] Long running job for Flume component started" ) logger.info("Starting the Flume Agent Topology") addlParams = "-Dflume.log.dir=%s -Dflume.log.file=agent2.log" % cls._local_work_dir agent2.start("agent2", cls._flume_test_src, addlParams=addlParams, enableDebugLogOnConsole=False) logger.info( "Sleeping for 10 seconds before starting the other Flume agent") time.sleep(10) addlParams = "-Dflume.log.dir=%s -Dflume.log.file=agent.log" % cls._local_work_dir agent1.start("agent", cls._flume_test_src, addlParams=addlParams, enableDebugLogOnConsole=False) time.sleep(5) return 1
def setupAcidDataset(testsuite, LOCAL_DIR): ddl_location = None if testsuite == 'acid': ddl_location = os.path.join(LOCAL_DIR, "ddl", "acid-tpch-tablesetup.sql") elif testsuite == 'unbucketed': ddl_location = os.path.join(LOCAL_DIR, "ddl", "acid-tpch-unbucketed-tablesetup.sql") else: assert 1 == 0, "The testsuite passed in not correct. Please use value 'acid' or 'unbuckted'" # change timezone on test machines Machine.resetTimeZoneOnCluster() # Download TPCH acids data tpch_newdata_dir = os.path.join(LOCAL_DIR, "tpch_newdata_5G") TPCH_STAGE_TGZ = os.path.join(LOCAL_DIR, "tpch_newdata_5G.tgz") if not os.path.isfile(TPCH_STAGE_TGZ): assert util.downloadUrl(Config.get('hive', 'TPCH_NEWDATA_5G_DNLD_URL'), TPCH_STAGE_TGZ) Machine.tarExtractAll(TPCH_STAGE_TGZ, LOCAL_DIR) # Load the acid tables in Hive HADOOPQA_USER = Config.get("hadoop", 'HADOOPQA_USER') HDFS.createDirectory("/tmp/lineitem_acid", user=HADOOPQA_USER, perm='777', force=True) HDFS.copyFromLocal(os.path.join(tpch_newdata_dir, "lineitem*"), "/tmp/lineitem_acid", HADOOPQA_USER) HDFS.chmod(None, 777, "/tmp/lineitem_acid", recursive=True) exit_code, stdout, stderr = Hive.runQueryOnBeeline( ddl_location, hivevar={'HDFS_LOCATION': '/tmp'}, logoutput=True, queryIsFile=True ) assert exit_code == 0, "Failed to populate the TPCH acid data in Hive"
def setupMondrianDataset(): DATABASE_NAME = 'foodmart' LOCAL_DATA_DIR = os.path.join(Config.getEnv('ARTIFACTS_DIR'), DATABASE_NAME) FOODMART_DDL = os.path.join(LOCAL_DATA_DIR, "foodmart.ddl") HADOOPQA_USER = Config.get("hadoop", 'HADOOPQA_USER') logger.info("Setup Mondrian dataset") if not os.path.exists(LOCAL_DATA_DIR): MONDRIAN_DATA_TGZ = LOCAL_DATA_DIR + ".tgz" assert util.downloadUrl(Config.get('hive', 'MONDRIAN_DATASET'), MONDRIAN_DATA_TGZ) Machine.tarExtractAll(MONDRIAN_DATA_TGZ, Config.getEnv('ARTIFACTS_DIR')) assert os.path.isdir(LOCAL_DATA_DIR) logger.info("create foodmart database and tables") HDFS.createDirectory("/tmp/mondrian", HADOOPQA_USER, perm='777', force=True) HDFS.copyFromLocal(LOCAL_DATA_DIR, "/tmp/mondrian", HADOOPQA_USER) HDFS.chmod(None, 777, "/tmp/mondrian", recursive=True) exit_code, stdout, stderr = Hive.runQueryOnBeeline( FOODMART_DDL, hivevar={ 'DB': 'foodmart', 'LOCATION': '/tmp/mondrian/foodmart' }, logoutput=True, queryIsFile=True ) assert exit_code == 0, "Unable to deploy foodmart dataset"
def background_job_setup(cls, runSmokeTestSetup=True, config=None): ''' Setup for background long running job :param runSmokeTestSetup: Runs smoke test setup if set to true ''' logger.info("runSmokeTestSetup = %s, config = %s", runSmokeTestSetup, config) HDFS.createDirectory(cls.HDFS_CLUSTER_INPUT_DIR)
def checkClasspathVersion(cls, Version_Num, config=None): Local_Test_dir = os.path.join(Config.getEnv("WORKSPACE"), "tests", "rolling_upgrade", "yarn") Multi_Version_App_Dir = os.path.join(Local_Test_dir, "data") Mapper = "data/versionVerifyMapper.py" Reducer = "data/versionVerifyReducer.py" Verify_File_Name = "test.txt" Verify_Test_File = os.path.join(Multi_Version_App_Dir, Verify_File_Name) # Set up env mapred_app_path = MAPRED.getConfigValue( "mapreduce.application.framework.path", None) mapred_classpath = MAPRED.getConfigValue( "mapreduce.application.classpath", None) env = { "mapreduce.application.framework.path": mapred_app_path, "mapreduce.application.classpath": mapred_classpath } verifyInput = cls._hdfs_input + "/verify" HDFS.createDirectory(verifyInput, None, "777", False) # Copy template files for the verifier streaming job templateFile = open(Verify_Test_File, 'w') templateFile.write(Version_Num) templateFile.close() HDFS.copyFromLocal(Verify_Test_File, verifyInput, user=Config.get('hadoop', 'HADOOPQA_USER')) # Submit the special streaming job shortStreamingId = HadoopJobHelper.runStreamJob( Mapper, Reducer, verifyInput, cls._hdfs_output_verify, files=Multi_Version_App_Dir, config=config, extraJobArg=cls._jobArgs, env=env, proposedJobName=cls._shortStreamingName) MAPRED.waitForJobDoneOrTimeout(shortStreamingId, timeoutInSec=180) # Make sure task succeeded #assert YARN.getAppFinalStateFromID(appId) == 'SUCCEEDED' # Check result content retVal, checkContent = HDFS.cat(cls._hdfs_output_verify + '/part-00000') logger.info("CHECK CLASSPATH VERSION OUTPUT") logger.info(retVal) logger.info(checkContent) ruAssert("YARN", retVal == 0) ruAssert("YARN", 'True' in checkContent, "[VersionVerify] Stream job returns false: " + checkContent) #assert retVal == 0 #assert 'True' in checkContent, "Stream job returns false: " + checkContent #assert 'False' not in checkContent, "Stream job returns false: " + checkContent HDFS.deleteDirectory(cls._hdfs_output_verify, user=Config.get('hadoop', 'HADOOPQA_USER'))
def generate_test_data(cls, hdfs_test_dir, num_of_rows): test_data_file = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "sqooptest.dat") f = open(test_data_file, 'w') userid = 100000 for i in xrange(num_of_rows): f.write("%d,%d\n" % (userid + i, random.randint(10, 80))) f.close() HDFS.createDirectory(hdfs_test_dir, user=cls._hdfs_user, perm='777', force=True) HDFS.copyFromLocal(test_data_file, hdfs_test_dir)
def stopYarnLongRunningJob(cls): ''' Stop Long running Yarn Dshell Job ''' logger.info("**** Touch the file ****") HDFS.createDirectory(cls._multi_version_signal_file_dir, user=None, perm="777", force=False) multi_version_signal_file_path = cls._multi_version_signal_file_dir + "/signal" HDFS.touchz(multi_version_signal_file_path) #YARN.waitForApplicationFinish(cls._background_job_appId) time.sleep(2) logger.info("**** Done checking status ****")
def doSetup(cls, hdfs_test_dir, tbl_name, num_of_rows, type): from beaver.component.hive import Hive from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode logger.info("Generating test table dataset with %d rows" % num_of_rows) test_data_file = os.path.join(Config.getEnv('ARTIFACTS_DIR'), tbl_name + ".dat") f = open(test_data_file, 'w') userid = 100000 for i in xrange(num_of_rows): for j in range(random.randint(3, 8)): f.write("%d|%d\n" % (userid + i, random.randint(10, 80))) f.close() hdfs_tbl_dir = hdfs_test_dir + "/" + tbl_name logger.info("Copying the test dataset to HDFS directory '%s'" % hdfs_tbl_dir) HDFS.createDirectory(hdfs_test_dir, user=cls._hdfs_user, perm='777', force=True) HDFS.createDirectory(hdfs_tbl_dir, perm='777') HDFS.copyFromLocal(test_data_file, hdfs_tbl_dir) HDFS.chmod(cls._hdfs_user, '777', hdfs_tbl_dir) logger.info("Creating table '%s' and verification tables" % tbl_name) query = "drop table if exists %s;\n" % tbl_name query += "create external table %s (userid string, age int) row format delimited fields terminated by '|' stored as textfile location '%s';\n" % ( tbl_name, hdfs_tbl_dir) query += "drop table if exists %s_hive_verify;\n" % tbl_name query += "create table %s_hive_verify (userid string, age int);\n" % tbl_name if type == "Long running": for i in range(cls._num_of_webhcat_bgj): query += "drop table if exists %s_wh_%d;\n" % (tbl_name, i + 1) query += "create table %s_wh_%d (userid string, age int);\n" % ( tbl_name, i + 1) hivesetupfile = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "hivesetup.sql") util.writeToFile(query, hivesetupfile) exit_code, stdout = Hive.run("-f " + hivesetupfile, logoutput=False) if type: msg = "%s job setup for Hive component" % type if exit_code != 0: UpgradePerNode.reportProgress( "[FAILED][Hive][Setup] %s failed due to exitcode = %d" % (msg, exit_code)) else: UpgradePerNode.reportProgress( "[PASSED][Hive][Setup] %s finished successfully" % msg)
def background_job_setup(cls, runSmokeTestSetup=True, config=None): ''' Setup for background long running job :param runSmokeTestSetup: Runs smoke test setup if set to true ''' cls.run_JHS_test(config=config) logger.info("**** Run Yarn long running application setup ****") HDFS.createDirectory(cls._hdfs_input, None, "777", False) #touch a fake file to trick hadoop streaming HDFS.touchz(cls._hdfs_input + "/input.txt") HDFS.deleteDirectory(cls._hdfs_output, user=Config.get('hadoop', 'HADOOPQA_USER')) if runSmokeTestSetup: logger.info("**** Running HDFS Smoke Test Setup ****") cls.smoke_test_setup()
def setupSchemaEvolutionDataset(): logger.info("Setup Schema Evolution dataset") HDFS.createDirectory(HCAT_TEST_DIR, user=HDFS_USER, perm='777', force=True) HDFS.createDirectory(HDFS_TEST_DIR, user=HDFS_USER, perm='777', force=True) HIVE_TEST_CMD = "-Dhive.use.beeline=true -Dhadoop.home=%s -Dhive.home=%s -Dhcat.home=%s -Dpig.home=%s -Dhbase.home=%s" % ( HADOOP_HOME, HIVE_HOME, HCATALOG_HOME, PIG_HOME, HIVE_HOME ) if Hadoop.isHadoop2(): HIVE_TEST_CMD += " -Dmapred.home=%s -Dhadoop.conf.dir=%s" % (Config.get('hadoop', 'MAPRED_HOME'), HADOOP_CONF) hiveServer2Url = str(Hive.getHiveServer2Url()) exit_code, stdout = Ant.run( HIVE_TEST_CMD + " deploy-schemaevolution", cwd=SRC_DIR, env={"HIVE_SERVER2_URL": hiveServer2Url} ) assert exit_code == 0
def setupTableauDataset(): LOCAL_DATA_DIR = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "tableau") DATA_DIR = os.path.join(LOCAL_DATA_DIR, 'data') SCHEMA_SQL_DIR = os.path.join(LOCAL_DATA_DIR, 'schema_3.0') HIVE_TABLES = [ 'Batters', 'Calcs', 'DateBins', 'DateTime', 'Election', 'FischerIris', 'Loan', 'NumericBins', 'REI', 'SeattleCrime', 'Securities', 'SpecialData', 'Staples', 'Starbucks', 'UTStarcom', 'xy' ] TABLEAU_TEST_DIR = "/user/hrt_qa/tableau" DATABASE_NAME = 'tableau' logger.info("Setup Tableau dataset") if not os.path.exists(LOCAL_DATA_DIR): TABLEAU_DATA_TGZ = LOCAL_DATA_DIR + ".tgz" assert util.downloadUrl(Config.get('hive', 'TABLEAU_DATASET'), TABLEAU_DATA_TGZ) Machine.tarExtractAll(TABLEAU_DATA_TGZ, Config.getEnv('ARTIFACTS_DIR')) assert os.path.isdir(LOCAL_DATA_DIR) logger.info("create test directory on hdfs to store tableau data files") HDFS.createDirectory(TABLEAU_TEST_DIR, user=HDFS_USER, perm='777', force=True) logger.info("create tableau database before creating tables") Hive.runQueryOnBeeline("DROP DATABASE IF EXISTS %s" % DATABASE_NAME) Hive.runQueryOnBeeline("CREATE DATABASE IF NOT EXISTS %s" % DATABASE_NAME) for tbl in HIVE_TABLES: hdfsDir = TABLEAU_TEST_DIR + '/%s' % tbl hdfsFile = hdfsDir + '/%s' % tbl localFile = os.path.join(DATA_DIR, '%s.tbl' % tbl) sqlFile = os.path.join(SCHEMA_SQL_DIR, '%s.sql' % tbl) logger.info("create directory for %s table" % tbl) exit_code, stdout = HDFS.createDirectory(hdfsDir, perm='777', force=True) assert exit_code == 0, 'Could not create dir for table %s on hdfs.' % tbl logger.info("copy file for table %s to hdfs" % tbl) exit_code, stdout = HDFS.copyFromLocal(localFile, hdfsFile) assert exit_code == 0, 'Could not copy file for table %s to hdfs.' % tbl logger.info("create %s table " % tbl) # thing-to-do Modify Hive.runQueryonBeeline to accept query file name exit_code, stdout, stderr = Hive.runQueryOnBeeline( ReadFromFile(sqlFile), readFromFile=True, hivevar={'HDFS_LOCATION': hdfsDir}, logoutput=True ) assert exit_code == 0, '%s table creation failed' % tbl
def setupMergeScaleDataset(LOCAL_DIR): # change timezone on test machines Machine.resetTimeZoneOnCluster() # Download the TPCH dataset if not there tpch_data_dir = os.path.join(LOCAL_DIR, "data") TPCH_DATA_TGZ = os.path.join(LOCAL_DIR, "tpch_data.tgz") if not os.path.isfile(TPCH_DATA_TGZ): assert util.downloadUrl(Config.get('hive', 'TPCH_DNLD_URL'), TPCH_DATA_TGZ) Machine.tarExtractAll(TPCH_DATA_TGZ, LOCAL_DIR) # Load the tables in Hive HADOOPQA_USER = Config.get("hadoop", 'HADOOPQA_USER') HDFS.createDirectory("/tmp/tpch", user=HADOOPQA_USER, perm='777', force=True) HDFS.copyFromLocal(tpch_data_dir, "/tmp/tpch", user=HADOOPQA_USER) HDFS.chmod(None, 777, "/tmp/tpch", recursive=True) exit_code, stdout, stderr = Hive.runQueryOnBeeline( os.path.join(LOCAL_DIR, "ddl", "merge-tpch-tablesetup.sql"), hivevar={'HDFS_LOCATION': '/tmp/tpch/data'}, logoutput=True, queryIsFile=True ) assert exit_code == 0, "Failed to populate the TPCH data in Hive" # Download TPCH staging data tpch_stage_dir = os.path.join(LOCAL_DIR, "tpch_newdata_5G") TPCH_STAGE_TGZ = os.path.join(LOCAL_DIR, "tpch_newdata_5G.tgz") if not os.path.isfile(TPCH_STAGE_TGZ): assert util.downloadUrl(Config.get('hive', 'TPCH_NEWDATA_5G_DNLD_URL'), TPCH_STAGE_TGZ) Machine.tarExtractAll(TPCH_STAGE_TGZ, LOCAL_DIR) # Load the staged tables in Hive HDFS.createDirectory( "/tmp/lineitem_stage /tmp/orders_stage /tmp/delete_stage", user=HADOOPQA_USER, perm='777', force=True ) HDFS.copyFromLocal(os.path.join(tpch_stage_dir, "lineitem*"), "/tmp/lineitem_stage", HADOOPQA_USER) HDFS.copyFromLocal(os.path.join(tpch_stage_dir, "order*"), "/tmp/orders_stage", HADOOPQA_USER) HDFS.copyFromLocal(os.path.join(tpch_stage_dir, "delete*"), "/tmp/delete_stage", HADOOPQA_USER) HDFS.chmod(None, 777, "/tmp/lineitem_stage /tmp/orders_stage /tmp/delete_stage", recursive=True) exit_code, stdout, stderr = Hive.runQueryOnBeeline( os.path.join(LOCAL_DIR, "ddl", "merge-staged-tpch-tablesetup.sql"), hivevar={'HDFS_LOCATION': '/tmp'}, logoutput=True, queryIsFile=True ) assert exit_code == 0, "Failed to populate the TPCH staging data in Hive"
def setupHS2ConcurrTestData(stdauth=True): # hive.support.concurrency is not in the whitelist, as this is a server setting and not something that user should/can set in a session. # In a case of Ranger and SQL std authorization, set hive.support.concurrency to true and restart HS2 changes = { 'hive-site.xml': { 'hive.txn.manager': 'org.apache.hadoop.hive.ql.lockmgr.DbTxnManager', 'hive.support.concurrency': 'true', 'hive.compactor.initiator.on': 'true', 'hive.compactor.worker.threads': '3', 'hive.compactor.check.interval': '10', 'hive.timedout.txn.reaper.interval': '20s' }, 'hiveserver2-site.xml': { 'hive.compactor.initiator.on': 'false', 'hive.exec.dynamic.partition.mode': 'nonstrict' } } if not Hive.isHive2(): changes['hiveserver2-site.xml']['hive.enforce.bucketing'] = 'true' else: changes['hiveserver2-site.xml']['hive.server2.enable.doAs'] = 'false' changes['hiveserver2-site.xml']['hive.txn.manager'] = 'org.apache.hadoop.hive.ql.lockmgr.DbTxnManager' changes['hiveserver2-site.xml']['hive.support.concurrency'] = 'true' Hive.modifyConfig(changes) time.sleep(60) data_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "hs2concur-test-data") data_tgz = os.path.join(Config.getEnv('WORKSPACE'), "hs2concur-test-data.tgz") if not os.path.isfile(data_tgz): assert util.downloadUrl(Config.get('hive', 'HS2CONCURR_TEST_DATA'), data_tgz) Machine.tarExtractAll(data_tgz, data_dir) # load data into HDFS hdfs_user = Config.get("hadoop", 'HDFS_USER') test_user = Config.get("hadoop", 'HADOOPQA_USER') HDFS.createDirectory("/tmp/hs2data", user=test_user, perm='777', force=True) HDFS.createDirectory("/tmp/hs2data/student", user=test_user, perm='777', force=True) HDFS.copyFromLocal(os.path.join(data_dir, 'studenttab10k'), "/tmp/hs2data/student") HDFS.createDirectory("/tmp/hs2data/voter", perm='777', force=True) HDFS.copyFromLocal(os.path.join(data_dir, 'votertab10k'), "/tmp/hs2data/voter") HDFS.createDirectory("/tmp/hs2data/customer_address", perm='777', force=True) HDFS.copyFromLocal(os.path.join(data_dir, 'customer_address10k'), "/tmp/hs2data/customer_address") query = """drop table if exists student; create external table student (name string, age int, gpa double) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/student'; drop table if exists voter; create external table voter (name string, age int, registration string, contributions float) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/voter'; drop table if exists customer_address; create external table customer_address (ca_address_sk int, ca_address_id string, ca_street_number string, ca_street_name string, ca_street_type string, ca_suite_number string, ca_city string, ca_county string, ca_state string, ca_zip string, ca_country string, ca_gmt_offset decimal(5,2), ca_location_type string) row format delimited fields terminated by '|' stored as textfile location '/tmp/hs2data/customer_address'; drop table if exists customer_address_partitioned; create table customer_address_partitioned (ca_address_sk int, ca_address_id string, ca_street_number string, ca_street_name string, ca_street_type string, ca_suite_number string, ca_city string, ca_county string, ca_state string, ca_zip string, ca_country string, ca_gmt_offset decimal(5,2)) partitioned by (ca_location_type string) clustered by (ca_state) into 50 buckets stored as orc tblproperties('transactional'='true'); insert into table customer_address_partitioned partition(ca_location_type) select ca_address_sk, ca_address_id, ca_street_number, ca_street_name, ca_street_type, ca_suite_number, ca_city, ca_county, ca_state, ca_zip, ca_country, ca_gmt_offset, ca_location_type from customer_address;""" if stdauth: query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table student to role public with grant option;" query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table voter to role public with grant option;" query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table customer_address_partitioned to role public with grant option;" exit_code, stdout, stderr = Hive.runQueryOnBeeline(query, readFromFile=True, logoutput=True) assert exit_code == 0, "Test data creation failed"
def downloadDataset(dataDir, dataTgz, downloadUrl, hdfsLocalCopy, textDataDir): HDFS.createDirectory(HCAT_TEST_DIR, user=HDFS_USER, perm='777', force=True) HDFS.createDirectory(HDFS_TEST_DIR, user=HDFS_USER, perm='777', force=True) # change timezone on test machines Machine.resetTimeZoneOnCluster() # Download the TPCDS dataset if not there if not os.path.isfile(dataTgz): assert util.downloadUrl(downloadUrl, dataTgz) Machine.tarExtractAll(dataTgz, dataDir) os.makedirs(hdfsLocalCopy) for filename in os.listdir(textDataDir): hdfs_localcopy_table_dir = os.path.join(hdfsLocalCopy, filename[:-4]) os.mkdir(hdfs_localcopy_table_dir) shutil.copy(os.path.join(textDataDir, filename), hdfs_localcopy_table_dir) HDFS.copyFromLocal(hdfsLocalCopy, HDFS_TEST_DIR) HDFS.chmod(None, '777', HDFS_TEST_DIR, recursive=True)
def background_job_setup(cls, runSmokeTestSetup=True, config=None): ''' Upload Data to HDFS before Upgrade starts Creates /user/hrt_qa/test_rollingupgrade dir on HDFS Upload 20 files to /user/hrt_qa/test_rollingupgrade ''' if not cls._base_hdfs_dir: cls._base_hdfs_dir = '/user/%s/test_rollingupgrade' % Config.get( 'hadoop', 'HADOOPQA_USER') exit_code, stdout = HDFS.createDirectory(cls._base_hdfs_dir, force=True) ruAssert("HDFS", exit_code == 0, '[BGJobSetup] could not create dir on hdfs.') LOCAL_WORK_DIR = os.path.join(Config.getEnv('ARTIFACTS_DIR'), 'HDFS_RU_TEST') localTestWorkDir1 = os.path.join(LOCAL_WORK_DIR, "Temp_data") HadoopJobHelper.runCustomWordWriter(LOCAL_WORK_DIR, localTestWorkDir1, 20, 40, 1000) HDFS.copyFromLocal(os.path.join(localTestWorkDir1, "*"), cls._base_hdfs_dir) # set up for loadGenerator cls._lgTestDataDir = cls._base_hdfs_dir + '/testData' cls._lgTestOutputDir = cls._base_hdfs_dir + '/lg_job' cls._lgStructureDir = Machine.getTempDir() + "/structure" # test dir setup HDFS.deleteDirectory(cls._lgTestDataDir) HDFS.deleteDirectory(cls._lgTestOutputDir) command = "rm -rf " + cls._lgStructureDir exit_code, stdout = Machine.runas(Machine.getAdminUser(), command, None, None, None, "True", Machine.getAdminPasswd()) command = "mkdir " + cls._lgStructureDir stdout = Machine.runas(None, command, None, None, None, "True", None) Machine.chmod("777", cls._lgStructureDir, "True", Machine.getAdminUser(), None, Machine.getAdminPasswd()) HADOOP_TEST_JAR = cls.get_hadoop_test_jar() TEST_USER = Config.get('hadoop', 'HADOOPQA_USER') # structure generator jobCmd = 'jar %s NNstructureGenerator -maxDepth 5 -minWidth 2 -maxWidth 5 -numOfFiles 100 -avgFileSize 3 -outDir %s' % ( HADOOP_TEST_JAR, cls._lgStructureDir) exit_code, stdout = Hadoop.run(jobCmd) ruAssert("HDFS", exit_code == 0, "[BGJobSetup] StructureGenerator failed") # data generator jobCmd = 'jar %s NNdataGenerator -inDir %s -root %s' % ( HADOOP_TEST_JAR, cls._lgStructureDir, cls._lgTestDataDir) exit_code, stdout = Hadoop.run(jobCmd) ruAssert("HDFS", exit_code == 0, "[BGJobSetup] DataGenerator failed") if runSmokeTestSetup: logger.info("**** Running HDFS Smoke Test Setup ****") cls.smoke_test_setup()
def setupHS2ConcurrencyDataset(): logger.info("Setup test data") data_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "hs2concur-test-data") data_tgz = os.path.join(Config.getEnv('WORKSPACE'), "hs2concur-test-data.tgz") if not os.path.isfile(data_tgz): assert util.downloadUrl(Config.get('hive', 'HS2CONCURR_TEST_DATA'), data_tgz) Machine.tarExtractAll(data_tgz, data_dir) # load data into HDFS hdfs_user = Config.get("hadoop", 'HDFS_USER') HDFS.createDirectory("/tmp/hs2data", user=hdfs_user, perm='777', force=True) HDFS.createDirectory("/tmp/hs2data/student", perm='777', force=True) HDFS.copyFromLocal(os.path.join(data_dir, 'studenttab10k'), "/tmp/hs2data/student") HDFS.createDirectory("/tmp/hs2data/voter", perm='777', force=True) HDFS.copyFromLocal(os.path.join(data_dir, 'votertab10k'), "/tmp/hs2data/voter") query = """drop table if exists student_txt; create external table student_txt (name string, age int, gpa double) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/student'; drop table if exists voter_txt; create external table voter_txt (name string, age int, registration string, contributions float) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/voter'; drop table if exists student; create table student (name string, age int, gpa double) CLUSTERED BY (name) INTO 20 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true'); drop table if exists voter; create table voter (name string, age int, registration string, contributions float) CLUSTERED BY (name) INTO 20 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true'); Insert into table student select * from student_txt; Insert into table voter select * from voter_txt;""" exit_code, stdout, stderr = Hive.runQueryOnBeeline(query, readFromFile=True, logoutput=True) assert exit_code == 0, "Test data creation failed"
def doBackgroundJobSetup(cls, hdfs_test_dir): from beaver.component.hive import Hive from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode logger.info("Preparing the test setup for Hive background job") udfjar = os.path.join(Config.getEnv('WORKSPACE'), "tests", "hive", "hive-udf", "hive-udfs-0.1.jar") HDFS.createDirectory(hdfs_test_dir, user=cls._hdfs_user, perm='777', force=True) HDFS.copyFromLocal(udfjar, hdfs_test_dir) query = "drop function sleep; create function sleep as 'org.apache.hive.udf.generic.GenericUDFSleep' using jar 'hdfs://%s/hive-udfs-0.1.jar';" % hdfs_test_dir exit_code, stdout = Hive.runQuery(query) if exit_code != 0: UpgradePerNode.reportProgress( "[FAILED][Hive][Setup] Long running failed due to exitcode = %d" % exit_code) else: UpgradePerNode.reportProgress( "[PASSED][Hive][Setup] Long running finished successfully")
def background_job_setup(cls, runSmokeTestSetup=True, config=None): ''' Setup for background long running job Upload Data to HDFS before Upgrade starts Creates /user/hrt_qa/ru-pig dir on HDFS Creates and Upload large data file to /user/hrt_qa/ru-pig/input/ :param runSmokeTestSetup: Runs smoke test setup if set to true ''' from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode UpgradePerNode.reportProgress( "### Running Pig BackGround Job Setup ####") HDFS.deleteDirectory(cls._base_hdfs_dir) exit_code, stdout = HDFS.createDirectory(cls._base_hdfs_dir, user=cls._job_user, perm=777, force=True) ruAssert("Pig", exit_code == 0, '[BGJobSetup] could not create dir on hdfs.') HDFS.createDirectory(cls._hdfs_input_dir, force=True) srcFile = os.path.join(cls._artifacts_dir, 'pig-ru-input.txt') if os.path.exists(srcFile): os.remove(srcFile) tmpFile = os.path.join(cls._artifacts_dir, 'pig-ru-tmp-input.txt') if os.path.exists(tmpFile): os.remove(tmpFile) util.copyFileToAnotherFile(cls._golden_src_file, srcFile) util.copyFileToAnotherFile(srcFile, tmpFile) itr = 12 if Machine.isFlubber(): itr = 16 for i in range(itr): util.copyFileToAnotherFile(srcFile, tmpFile) util.copyFileToAnotherFile(tmpFile, srcFile) exit_code, stdout = HDFS.copyFromLocal(srcFile, cls._hdfs_input_path) ruAssert("Pig", exit_code == 0, '[BGJobSetup] Data Load failed') if runSmokeTestSetup: cls.smoke_test_setup()
def setupTestData(stdauth=True): data_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "hive-test-data") data_tgz = os.path.join(Config.getEnv('WORKSPACE'), "hive-simple-test-data.tgz") if not os.path.isfile(data_tgz): assert util.downloadUrl(Config.get('hive', 'HIVE_TEST_DATA'), data_tgz) Machine.tarExtractAll(data_tgz, data_dir) # load data into HDFS HDFS.createDirectory("/tmp/hs2data", user=HDFS_USER, perm='777', force=True) HDFS.createDirectory("/tmp/hs2data/student", perm='777', force=True) HDFS.copyFromLocal(os.path.join(data_dir, 'studenttab10k'), "/tmp/hs2data/student") HDFS.createDirectory("/tmp/hs2data/voter", perm='777', force=True) HDFS.copyFromLocal(os.path.join(data_dir, 'votertab10k'), "/tmp/hs2data/voter") query = """drop table if exists student; create external table student (name string, age int, gpa double) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/student'; drop table if exists voter; create external table voter (name string, age int, registration string, contributions float) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/voter';""" if stdauth: query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table student to role public with grant option;" query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table voter to role public with grant option;" exit_code, stdout, stderr = Hive.runQueryOnBeeline(query, readFromFile=True, logoutput=True) assert exit_code == 0, "Test data creation failed"
def setup_storm_hdfs_topology(cls, useStandaloneCmd): storm_version = Storm.getVersion(useStandaloneCmd=True) try: file_obj = open(HDFS_CONFIG_FILE, 'w') if Hadoop.isSecure(): file_obj.write('hdfs.keytab.file: ' + Machine.getHeadlessUserKeytab( user=HADOOPQA_USER) + '\n') file_obj.write('hdfs.kerberos.principal: ' + Machine.get_user_principal(user=HADOOPQA_USER) + '\n') finally: file_obj.close() HDFS.createDirectory("/tmp/mySeqTopology", HDFS_USER, "777", False) HDFS.createDirectory("/tmp/dest", HDFS_USER, "777", False) HDFS.createDirectory("/tmp/dest2", HDFS_USER, "777", False) HDFS.createDirectory("/tmp/foo", HDFS_USER, "777", False) HDFS.createDirectory("/tmp/trident", HDFS_USER, "777", False) HDFS.createDirectory("/tmp/trident-seq", HDFS_USER, "777", False) Machine.copy(JAVA_HDFS_SRC_DIR, LOCAL_HDFS_WORK_DIR, user=None, passwd=None) if not Machine.isWindows(): (exit_code, _) = Maven.run('package', cwd=LOCAL_HDFS_WORK_DIR, env={ HADOOP_VERSION_MAVEN_PARAMETER: HADOOP_VERSION, STORM_VERSION_MAVEN_PARAMETER: storm_version, HADOOP_CONF_MAVEN_PARAMETER: HADOOP_CONF, HDFS_FILE_MAVEN_PARAMETER: HDFS_FILE, HADOOP_CORE_MAVEN_PARAMETER: HADOOP_CONF, CORE_FILE_MAVEN_PARAMETER: CORE_FILE, PUBLIC_REPO_MAVEN_PARAMETER: Maven.getPublicRepoUrl() }) else: filepath = os.path.join(MOD_CONF_PATH, "core-site.xml") (exit_code, _) = Maven.run( 'package -D%s=%s -D%s=%s -D%s=%s -D%s=%s -D%s=%s -D%s=%s -D%s=%s' % (HADOOP_VERSION_MAVEN_PARAMETER, HADOOP_VERSION, STORM_VERSION_MAVEN_PARAMETER, storm_version, HADOOP_CONF_MAVEN_PARAMETER, HADOOP_CONF, HDFS_FILE_MAVEN_PARAMETER, HDFS_FILE, HADOOP_CORE_MAVEN_PARAMETER, HADOOP_CONF, CORE_FILE_MAVEN_PARAMETER, CORE_FILE, PUBLIC_REPO_MAVEN_PARAMETER, Maven.getPublicRepoUrl()), cwd=LOCAL_HDFS_WORK_DIR) ruAssert("Storm", exit_code == 0, "[StormHDFSSetup] maven package command failed")
def setupOozieDataDir(cls, directory): HDFS.deleteDirectory(directory) HDFS.createDirectory(directory)
def background_job_setup(cls, runSmokeTestSetup=True, config=None): ''' Upload Data to HDFS before Upgrade starts Creates /user/hrt_qa/falcon/ dir on HDFS Upload demo files to /user/hrt_qa/falcon ''' logger.info("Falcon - starting background job setup") from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode UpgradePerNode.reportProgress( "[INFO][Falcon][BGJobSetup] starting Falcon background job setup") from beaver.component.hadoop import HDFS if not cls._base_falcon_dir: cls._base_falcon_dir = '/user/%s/falcon' % cls._job_user exit_code, stdout = HDFS.createDirectory(cls._base_falcon_dir, user=cls._job_user, perm=777, force=True) ruAssert("Falcon", exit_code == 0, '[BGJobSetup] could not create dir on hdfs.') HDFS.copyFromLocal(os.path.join(cls._local_workspace, "tests", "rolling_upgrade", "falcon", "demo"), cls._base_falcon_dir, user=cls._job_user) ## Create dirs for falcon clusters exit_code, stdout = HDFS.createDirectory( "/apps/falcon/primaryCluster/staging", user=cls._falcon_user, perm=777, force=True) ruAssert("Falcon", exit_code == 0, '[BGJobSetup] could not create staging dir on hdfs ') exit_code, stdout = HDFS.createDirectory( "/apps/falcon/primaryCluster/working", user=cls._falcon_user, perm=755, force=True) ruAssert("Falcon", exit_code == 0, '[BGJobSetup] could not create dir on hdfs.') exit_code, stdout = HDFS.createDirectory( "/apps/falcon/backupCluster/staging", user=cls._falcon_user, perm=777, force=True) ruAssert("Falcon", exit_code == 0, '[BGJobSetup] could not create dir on hdfs.') exit_code, stdout = HDFS.createDirectory( "/apps/falcon/backupCluster/working", user=cls._falcon_user, perm=755, force=True) ruAssert("Falcon", exit_code == 0, '[BGJobSetup] could not create dir on hdfs.') ## Create cluster entities. cls.createClusterEntities("USWestOregon", "oregonHadoopCluster", "primaryCluster") cls.createClusterEntities("USEastVirginia", "virginiaHadoopCluster", "backupCluster") if runSmokeTestSetup: logger.info("**** Running Falcon Smoke Test Setup ****") cls.smoke_test_setup() logger.info("Falcon - completed background job setup") return