def updateJobProperties(cls, propFile, properties=None, haEnabled=False, debug=False): fileSystemName = Hadoop.getFSDefaultValue() jobTrackerIP = MAPRED.getJobtrackerAddress() jobTracker = jobTrackerIP[0] + ":" + jobTrackerIP[1] if not properties: properties = {} if not properties.has_key('nameNode'): properties['nameNode'] = fileSystemName if not properties.has_key('jobTracker'): properties['jobTracker'] = jobTracker if "hcatalog" in propFile: if Hadoop.isSecure(): kerberosPrincipal = Hive.getConfigValue( "hive.metastore.kerberos.principal") properties[ 'hive.metastore.kerberos.principal'] = kerberosPrincipal logger.info("Updating for hcatalog workflow") hcatNode = Hive.getConfigValue("hive.metastore.uris").replace( 'thrift', 'hcat') logger.info("Hcat node is " + hcatNode) properties['hcatNode'] = hcatNode if Hadoop.isSecure(): # determine the namenode and the jobtracker principal nnPrincipal = None if haEnabled: nnPrincipal = HDFS.getNameNodePrincipal().replace( '_HOST', HDFS.getNamenodeByState('active')) else: nnPrincipal = HDFS.getNameNodePrincipal().replace( '_HOST', HDFS.getNamenodeHttpAddress()[0]) jtPrincipal = MAPRED.getMasterPrincipal().replace( '_HOST', jobTrackerIP[0]) properties['dfs.namenode.kerberos.principal'] = nnPrincipal properties['mapreduce.jobtracker.kerberos.principal'] = jtPrincipal wfPath = util.getPropertyValueFromFile(propFile, "oozie.wf.application.path") if wfPath != None and wfPath.find("hdfs://localhost:9000") != -1: wfPath = wfPath.replace("hdfs://localhost:9000", fileSystemName) logger.info("Value of replaced oozie.wf.application.path is " + wfPath) properties['oozie.wf.application.path'] = wfPath util.writePropertiesToFile(propFile, propFile, properties) if debug: logger.info('Content of properties file %s' % propFile) f = open(propFile, 'r') # print the file to the console logger.info(f.read()) f.close()
def background_job_when_master_upgrade(cls): ''' Start a background application which runs while component master service gets upgraded :return: ''' from beaver.component.hive import Hive from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode UpgradePerNode.reportProgress( "[INFO][Hive][BGJob] Background Job test setup when upgrading Hive started" ) logger.info("Creating hive tables for short background jobs") query = "drop table if exists shortlr_hive_verify;\n" query += "create table shortlr_hive_verify (userid string, age int);\n" query += "drop table if exists shortlr_bline_verify;\n" query += "create table shortlr_bline_verify (userid string, age int);\n" query += "drop table if exists shortlr_bline_verify;\n" query += "create table shortlr_bline_verify (userid string, age int);\n" short_bgjob_setupfile = os.path.join(Config.getEnv('ARTIFACTS_DIR'), 'shortlrsetup.sql') util.writeToFile(query, short_bgjob_setupfile) exit_code, stdout = Hive.run("-f " + short_bgjob_setupfile) if exit_code != 0: UpgradePerNode.reportProgress( "[FAILED][Hive][BGJob] Background Job test setup when Hive upgrades failed due to exitcode = %d" % exit_code) logger.info("Running the Background Job when upgrading Hive") UpgradePerNode.reportProgress( "[INFO][Hive][BGJob] Long running job for Hive component upgrades started" ) setqueue = "" if Hive.isTezEnabled(): setqueue = "set tez.queue.name=%s; " % cls._yarn_queue else: setqueue = "set mapred.job.queue.name=%s; " % cls._yarn_queue logger.info("**** Running Hive CLI Test ****") query = setqueue + " insert overwrite table shortlr_hive_verify select userid, avg(age) from %s group by userid order by userid;" % cls._bgjtest_tbl cls._shortbgj_hive_process = Hive.runQuery(query, background=True) # Sleeping for 10 seconds to make sure that query initializes before Metastore is restarted time.sleep(10) logger.info("**** Running Beeline CLI Test ****") query = setqueue + "\ninsert overwrite table shortlr_bline_verify select userid, avg(age) from %s group by userid order by userid;" % cls._bgjtest_tbl cls._shortbgj_bline_process = Hive.runQueryOnBeeline(query, readFromFile=True, background=True) UpgradePerNode.reportProgress( "[INFO][Hive][BGJob] Background Job test setup when Hive upgrades finished" )
def setupHS2ConcurrTestData(stdauth=True): # hive.support.concurrency is not in the whitelist, as this is a server setting and not something that user should/can set in a session. # In a case of Ranger and SQL std authorization, set hive.support.concurrency to true and restart HS2 changes = { 'hive-site.xml': { 'hive.txn.manager': 'org.apache.hadoop.hive.ql.lockmgr.DbTxnManager', 'hive.support.concurrency': 'true', 'hive.compactor.initiator.on': 'true', 'hive.compactor.worker.threads': '3', 'hive.compactor.check.interval': '10', 'hive.timedout.txn.reaper.interval': '20s' }, 'hiveserver2-site.xml': { 'hive.compactor.initiator.on': 'false', 'hive.exec.dynamic.partition.mode': 'nonstrict' } } if not Hive.isHive2(): changes['hiveserver2-site.xml']['hive.enforce.bucketing'] = 'true' else: changes['hiveserver2-site.xml']['hive.server2.enable.doAs'] = 'false' changes['hiveserver2-site.xml']['hive.txn.manager'] = 'org.apache.hadoop.hive.ql.lockmgr.DbTxnManager' changes['hiveserver2-site.xml']['hive.support.concurrency'] = 'true' Hive.modifyConfig(changes) time.sleep(60) data_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "hs2concur-test-data") data_tgz = os.path.join(Config.getEnv('WORKSPACE'), "hs2concur-test-data.tgz") if not os.path.isfile(data_tgz): assert util.downloadUrl(Config.get('hive', 'HS2CONCURR_TEST_DATA'), data_tgz) Machine.tarExtractAll(data_tgz, data_dir) # load data into HDFS hdfs_user = Config.get("hadoop", 'HDFS_USER') test_user = Config.get("hadoop", 'HADOOPQA_USER') HDFS.createDirectory("/tmp/hs2data", user=test_user, perm='777', force=True) HDFS.createDirectory("/tmp/hs2data/student", user=test_user, perm='777', force=True) HDFS.copyFromLocal(os.path.join(data_dir, 'studenttab10k'), "/tmp/hs2data/student") HDFS.createDirectory("/tmp/hs2data/voter", perm='777', force=True) HDFS.copyFromLocal(os.path.join(data_dir, 'votertab10k'), "/tmp/hs2data/voter") HDFS.createDirectory("/tmp/hs2data/customer_address", perm='777', force=True) HDFS.copyFromLocal(os.path.join(data_dir, 'customer_address10k'), "/tmp/hs2data/customer_address") query = """drop table if exists student; create external table student (name string, age int, gpa double) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/student'; drop table if exists voter; create external table voter (name string, age int, registration string, contributions float) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/voter'; drop table if exists customer_address; create external table customer_address (ca_address_sk int, ca_address_id string, ca_street_number string, ca_street_name string, ca_street_type string, ca_suite_number string, ca_city string, ca_county string, ca_state string, ca_zip string, ca_country string, ca_gmt_offset decimal(5,2), ca_location_type string) row format delimited fields terminated by '|' stored as textfile location '/tmp/hs2data/customer_address'; drop table if exists customer_address_partitioned; create table customer_address_partitioned (ca_address_sk int, ca_address_id string, ca_street_number string, ca_street_name string, ca_street_type string, ca_suite_number string, ca_city string, ca_county string, ca_state string, ca_zip string, ca_country string, ca_gmt_offset decimal(5,2)) partitioned by (ca_location_type string) clustered by (ca_state) into 50 buckets stored as orc tblproperties('transactional'='true'); insert into table customer_address_partitioned partition(ca_location_type) select ca_address_sk, ca_address_id, ca_street_number, ca_street_name, ca_street_type, ca_suite_number, ca_city, ca_county, ca_state, ca_zip, ca_country, ca_gmt_offset, ca_location_type from customer_address;""" if stdauth: query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table student to role public with grant option;" query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table voter to role public with grant option;" query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table customer_address_partitioned to role public with grant option;" exit_code, stdout, stderr = Hive.runQueryOnBeeline(query, readFromFile=True, logoutput=True) assert exit_code == 0, "Test data creation failed"
def verifyLogMessageInServiceLog(text, service, timestamp=0, dateTimeFormat=None): ''' Returns true when given log message appears in service log ''' hiveLog = Hive.getServiceLog(service) if not hiveLog or not text: return None hiveHost = Hive.getHiveHost(service) destlog = os.path.join(Config.getEnv('ARTIFACTS_DIR'), 'tmp-%d.log' % int(999999 * random.random())) Machine.copyToLocal(None, hiveHost, hiveLog, destlog) return util.findMatchingPatternInFileAfterTimestamp(destlog, text, timestamp, dateTimeFormat=dateTimeFormat)
def startLLAPWithChaosMonkey(interval='300'): hive_changes = {'tez-site.xml': {'tez.am.task.max.failed.attempts': '0'}} Hive.modifyConfig(hive_changes, services=['hiveserver2']) AMBARI_AGENT_TMP_DIR = '/var/lib/ambari-agent/tmp' ARTIFACTS_DIR = Config.getEnv('ARTIFACTS_DIR') LLAP_START_USER = Config.get('hive', 'HIVE_USER') dirs = [ name for name in os.listdir(AMBARI_AGENT_TMP_DIR) if os.path.isdir(os.path.join(AMBARI_AGENT_TMP_DIR, name)) ] llap_dirs = [] for dir in dirs: if dir.startswith('llap-slider'): llap_dirs.append(dir) if len(llap_dirs) < 1: logger.info("Could not find llap dir under %s" % AMBARI_AGENT_TMP_DIR) Hive.startService(services=['hiveserver2']) else: llap_dir = llap_dirs[-1] resourceConfig = os.path.join(AMBARI_AGENT_TMP_DIR, llap_dir, 'resources.json') tmpResourceConfig = os.path.join(ARTIFACTS_DIR, 'resources.json') propertyMap = [(["components", "LLAP"], {"yarn.container.failure.threshold": "1000"})] util.writePropertiesToConfigJSONFileMulti(resourceConfig, tmpResourceConfig, propertyMap) Machine.copy(tmpResourceConfig, resourceConfig, user=Machine.getAdminUser(), passwd=Machine.getAdminPasswd()) appConfig = os.path.join(AMBARI_AGENT_TMP_DIR, llap_dir, 'appConfig.json') tmpAppConfig = os.path.join(ARTIFACTS_DIR, 'appConfig.json') propertyMap = [ ( ["global"], { "internal.chaos.monkey.probability.containerfailure": "10000", "internal.chaos.monkey.interval.seconds": interval, "internal.chaos.monkey.enabled": "True" } ) ] util.writePropertiesToConfigJSONFileMulti(appConfig, tmpAppConfig, propertyMap) Machine.copy(tmpAppConfig, appConfig, user=Machine.getAdminUser(), passwd=Machine.getAdminPasswd()) llapShellScript = os.path.join(AMBARI_AGENT_TMP_DIR, llap_dir, 'run.sh') exit_code, stdout = Machine.runas(LLAP_START_USER, llapShellScript) if exit_code != 0: logger.info("LLAP Shell Script failed to run successfully with %d" % exit_code) for i in range(10): time.sleep(30) logger.info("@%d: Check if LLAP cluster is successfully deployed" % i) exit_code, stdout = Machine.runas(LLAP_START_USER, 'slider status llap0') if exit_code == 0: break elif i == 9: logger.info("LLAP cluster failed to deploy")
def perform_post_upgrade_steps(self): if Config.getEnv("HDP_STACK_INSTALLED").lower() == "true": from beaver.component.hadoop import Hadoop, HDFS from beaver.component.hive import Hive COMPONENT = str(self.COMPONENT) HDFS_USER = Config.get('hadoop', 'HDFS_USER') if 'experiment' in COMPONENT and Hive.isInstalled(): HIVE_WAREHOUSE_DIR = Hive.getConfigValue( "hive.metastore.warehouse.dir", defaultValue="/apps/hive/warehouse" ) HDFS.chmod(HDFS_USER, 777, HIVE_WAREHOUSE_DIR, True) else: UpgradeLogger.reportProgress("No additional post-upgrade steps defined for EU", True) else: logger.info("No additional post-upgrade steps defined for EU on HDF")
def setupHS2ConcurrencyDataset(): logger.info("Setup test data") data_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "hs2concur-test-data") data_tgz = os.path.join(Config.getEnv('WORKSPACE'), "hs2concur-test-data.tgz") if not os.path.isfile(data_tgz): assert util.downloadUrl(Config.get('hive', 'HS2CONCURR_TEST_DATA'), data_tgz) Machine.tarExtractAll(data_tgz, data_dir) # load data into HDFS hdfs_user = Config.get("hadoop", 'HDFS_USER') HDFS.createDirectory("/tmp/hs2data", user=hdfs_user, perm='777', force=True) HDFS.createDirectory("/tmp/hs2data/student", perm='777', force=True) HDFS.copyFromLocal(os.path.join(data_dir, 'studenttab10k'), "/tmp/hs2data/student") HDFS.createDirectory("/tmp/hs2data/voter", perm='777', force=True) HDFS.copyFromLocal(os.path.join(data_dir, 'votertab10k'), "/tmp/hs2data/voter") query = """drop table if exists student_txt; create external table student_txt (name string, age int, gpa double) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/student'; drop table if exists voter_txt; create external table voter_txt (name string, age int, registration string, contributions float) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/voter'; drop table if exists student; create table student (name string, age int, gpa double) CLUSTERED BY (name) INTO 20 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true'); drop table if exists voter; create table voter (name string, age int, registration string, contributions float) CLUSTERED BY (name) INTO 20 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true'); Insert into table student select * from student_txt; Insert into table voter select * from voter_txt;""" exit_code, stdout, stderr = Hive.runQueryOnBeeline(query, readFromFile=True, logoutput=True) assert exit_code == 0, "Test data creation failed"
def setupAcidDataset(testsuite, LOCAL_DIR): ddl_location = None if testsuite == 'acid': ddl_location = os.path.join(LOCAL_DIR, "ddl", "acid-tpch-tablesetup.sql") elif testsuite == 'unbucketed': ddl_location = os.path.join(LOCAL_DIR, "ddl", "acid-tpch-unbucketed-tablesetup.sql") else: assert 1 == 0, "The testsuite passed in not correct. Please use value 'acid' or 'unbuckted'" # change timezone on test machines Machine.resetTimeZoneOnCluster() # Download TPCH acids data tpch_newdata_dir = os.path.join(LOCAL_DIR, "tpch_newdata_5G") TPCH_STAGE_TGZ = os.path.join(LOCAL_DIR, "tpch_newdata_5G.tgz") if not os.path.isfile(TPCH_STAGE_TGZ): assert util.downloadUrl(Config.get('hive', 'TPCH_NEWDATA_5G_DNLD_URL'), TPCH_STAGE_TGZ) Machine.tarExtractAll(TPCH_STAGE_TGZ, LOCAL_DIR) # Load the acid tables in Hive HADOOPQA_USER = Config.get("hadoop", 'HADOOPQA_USER') HDFS.createDirectory("/tmp/lineitem_acid", user=HADOOPQA_USER, perm='777', force=True) HDFS.copyFromLocal(os.path.join(tpch_newdata_dir, "lineitem*"), "/tmp/lineitem_acid", HADOOPQA_USER) HDFS.chmod(None, 777, "/tmp/lineitem_acid", recursive=True) exit_code, stdout, stderr = Hive.runQueryOnBeeline( ddl_location, hivevar={'HDFS_LOCATION': '/tmp'}, logoutput=True, queryIsFile=True ) assert exit_code == 0, "Failed to populate the TPCH acid data in Hive"
def getJobAndAppIds(text): ''' getJobAndAppIds text - Text from which to get the application and the job id ''' ids = [] # pattern to look for is different when tez is enabled. if Hive.isTezEnabled(): # For this method to be backward compatible, we need to check for 2 patterns # The following pattern is applicable for pre-champlain releases. pattern = 'Status: Running \(application id: (.*)\)' for line in re.finditer(pattern, text): # with tez we only get the application id ids.append({'application': line.group(1)}) # The following pattern is applicable for champlain and above release. if len(ids) == 0: pattern = 'Status: Running \(Executing on YARN cluster with App id (.*)\)' for line in re.finditer(pattern, text): # with tez we only get the application id ids.append({'application': line.group(1)}) else: pattern = 'Starting Job = (.*), Tracking URL = h.*://.*:?\d+?/proxy/(.*)/' for line in re.finditer(pattern, text): ids.append({'job': line.group(1), 'application': line.group(2)}) return ids
def setupMondrianDataset(): DATABASE_NAME = 'foodmart' LOCAL_DATA_DIR = os.path.join(Config.getEnv('ARTIFACTS_DIR'), DATABASE_NAME) FOODMART_DDL = os.path.join(LOCAL_DATA_DIR, "foodmart.ddl") HADOOPQA_USER = Config.get("hadoop", 'HADOOPQA_USER') logger.info("Setup Mondrian dataset") if not os.path.exists(LOCAL_DATA_DIR): MONDRIAN_DATA_TGZ = LOCAL_DATA_DIR + ".tgz" assert util.downloadUrl(Config.get('hive', 'MONDRIAN_DATASET'), MONDRIAN_DATA_TGZ) Machine.tarExtractAll(MONDRIAN_DATA_TGZ, Config.getEnv('ARTIFACTS_DIR')) assert os.path.isdir(LOCAL_DATA_DIR) logger.info("create foodmart database and tables") HDFS.createDirectory("/tmp/mondrian", HADOOPQA_USER, perm='777', force=True) HDFS.copyFromLocal(LOCAL_DATA_DIR, "/tmp/mondrian", HADOOPQA_USER) HDFS.chmod(None, 777, "/tmp/mondrian", recursive=True) exit_code, stdout, stderr = Hive.runQueryOnBeeline( FOODMART_DDL, hivevar={ 'DB': 'foodmart', 'LOCATION': '/tmp/mondrian/foodmart' }, logoutput=True, queryIsFile=True ) assert exit_code == 0, "Unable to deploy foodmart dataset"
def getHiveQueryOutput(cls, query, willRunMR=True, delim=",", useStandaloneCmd=True): from beaver.component.hive import Hive hiveconf = {} if willRunMR: hiveconf = { 'hive.input.format': 'org.apache.hadoop.hive.ql.io.HiveInputFormat', 'hive.vectorized.execution.enabled': 'false', 'hive.txn.manager': 'org.apache.hadoop.hive.ql.lockmgr.DbTxnManager', 'hive.support.concurrency': 'true' } exit_code, stdout, stderr = Hive.runQuery( cls.get_set_queue_cmd(useStandaloneCmd) + query, hiveconf=hiveconf, stderr_as_stdout=False) ruAssert("Storm", exit_code == 0, "[HiveQueryOutput] Failed to run Hive query [%s]" % query) return stdout.replace('\t', delim)
def background_job_teardown(cls): ''' Cleanup for long running Hive jobs ''' from beaver.component.hive import Hive logger.info( "Make sure to switch the HiveServer2 to use the default port") adminUser = Machine.getAdminUser() hiveHost = Hive.getHiveHost() for port in cls._hs2_live_ports: pid = Machine.getPIDByPort(port, host=hiveHost, user=adminUser) if pid: Machine.killProcessRemote(pid, host=hiveHost, user=adminUser) time.sleep(2) if len(cls._hs2_live_ports) > 0: Hive.startService(services=["hiveserver2"])
def setupMergeScaleDataset(LOCAL_DIR): # change timezone on test machines Machine.resetTimeZoneOnCluster() # Download the TPCH dataset if not there tpch_data_dir = os.path.join(LOCAL_DIR, "data") TPCH_DATA_TGZ = os.path.join(LOCAL_DIR, "tpch_data.tgz") if not os.path.isfile(TPCH_DATA_TGZ): assert util.downloadUrl(Config.get('hive', 'TPCH_DNLD_URL'), TPCH_DATA_TGZ) Machine.tarExtractAll(TPCH_DATA_TGZ, LOCAL_DIR) # Load the tables in Hive HADOOPQA_USER = Config.get("hadoop", 'HADOOPQA_USER') HDFS.createDirectory("/tmp/tpch", user=HADOOPQA_USER, perm='777', force=True) HDFS.copyFromLocal(tpch_data_dir, "/tmp/tpch", user=HADOOPQA_USER) HDFS.chmod(None, 777, "/tmp/tpch", recursive=True) exit_code, stdout, stderr = Hive.runQueryOnBeeline( os.path.join(LOCAL_DIR, "ddl", "merge-tpch-tablesetup.sql"), hivevar={'HDFS_LOCATION': '/tmp/tpch/data'}, logoutput=True, queryIsFile=True ) assert exit_code == 0, "Failed to populate the TPCH data in Hive" # Download TPCH staging data tpch_stage_dir = os.path.join(LOCAL_DIR, "tpch_newdata_5G") TPCH_STAGE_TGZ = os.path.join(LOCAL_DIR, "tpch_newdata_5G.tgz") if not os.path.isfile(TPCH_STAGE_TGZ): assert util.downloadUrl(Config.get('hive', 'TPCH_NEWDATA_5G_DNLD_URL'), TPCH_STAGE_TGZ) Machine.tarExtractAll(TPCH_STAGE_TGZ, LOCAL_DIR) # Load the staged tables in Hive HDFS.createDirectory( "/tmp/lineitem_stage /tmp/orders_stage /tmp/delete_stage", user=HADOOPQA_USER, perm='777', force=True ) HDFS.copyFromLocal(os.path.join(tpch_stage_dir, "lineitem*"), "/tmp/lineitem_stage", HADOOPQA_USER) HDFS.copyFromLocal(os.path.join(tpch_stage_dir, "order*"), "/tmp/orders_stage", HADOOPQA_USER) HDFS.copyFromLocal(os.path.join(tpch_stage_dir, "delete*"), "/tmp/delete_stage", HADOOPQA_USER) HDFS.chmod(None, 777, "/tmp/lineitem_stage /tmp/orders_stage /tmp/delete_stage", recursive=True) exit_code, stdout, stderr = Hive.runQueryOnBeeline( os.path.join(LOCAL_DIR, "ddl", "merge-staged-tpch-tablesetup.sql"), hivevar={'HDFS_LOCATION': '/tmp'}, logoutput=True, queryIsFile=True ) assert exit_code == 0, "Failed to populate the TPCH staging data in Hive"
def setupTableauDataset(): LOCAL_DATA_DIR = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "tableau") DATA_DIR = os.path.join(LOCAL_DATA_DIR, 'data') SCHEMA_SQL_DIR = os.path.join(LOCAL_DATA_DIR, 'schema_3.0') HIVE_TABLES = [ 'Batters', 'Calcs', 'DateBins', 'DateTime', 'Election', 'FischerIris', 'Loan', 'NumericBins', 'REI', 'SeattleCrime', 'Securities', 'SpecialData', 'Staples', 'Starbucks', 'UTStarcom', 'xy' ] TABLEAU_TEST_DIR = "/user/hrt_qa/tableau" DATABASE_NAME = 'tableau' logger.info("Setup Tableau dataset") if not os.path.exists(LOCAL_DATA_DIR): TABLEAU_DATA_TGZ = LOCAL_DATA_DIR + ".tgz" assert util.downloadUrl(Config.get('hive', 'TABLEAU_DATASET'), TABLEAU_DATA_TGZ) Machine.tarExtractAll(TABLEAU_DATA_TGZ, Config.getEnv('ARTIFACTS_DIR')) assert os.path.isdir(LOCAL_DATA_DIR) logger.info("create test directory on hdfs to store tableau data files") HDFS.createDirectory(TABLEAU_TEST_DIR, user=HDFS_USER, perm='777', force=True) logger.info("create tableau database before creating tables") Hive.runQueryOnBeeline("DROP DATABASE IF EXISTS %s" % DATABASE_NAME) Hive.runQueryOnBeeline("CREATE DATABASE IF NOT EXISTS %s" % DATABASE_NAME) for tbl in HIVE_TABLES: hdfsDir = TABLEAU_TEST_DIR + '/%s' % tbl hdfsFile = hdfsDir + '/%s' % tbl localFile = os.path.join(DATA_DIR, '%s.tbl' % tbl) sqlFile = os.path.join(SCHEMA_SQL_DIR, '%s.sql' % tbl) logger.info("create directory for %s table" % tbl) exit_code, stdout = HDFS.createDirectory(hdfsDir, perm='777', force=True) assert exit_code == 0, 'Could not create dir for table %s on hdfs.' % tbl logger.info("copy file for table %s to hdfs" % tbl) exit_code, stdout = HDFS.copyFromLocal(localFile, hdfsFile) assert exit_code == 0, 'Could not copy file for table %s to hdfs.' % tbl logger.info("create %s table " % tbl) # thing-to-do Modify Hive.runQueryonBeeline to accept query file name exit_code, stdout, stderr = Hive.runQueryOnBeeline( ReadFromFile(sqlFile), readFromFile=True, hivevar={'HDFS_LOCATION': hdfsDir}, logoutput=True ) assert exit_code == 0, '%s table creation failed' % tbl
def grantPrivilegesToUsersOnTable(users, tableName, privilege="all"): query = "" for user in users: query += "grant %s on table %s to user %s with grant option;\n" % (privilege, tableName, user) exit_code, stdout, stderr = Hive.runQueryOnBeeline(query, readFromFile=True, logoutput=True) assert exit_code == 0, "Failed to grant privilege [%s] on table [%s] to users [%s]" % ( privilege, tableName, ",".join(users) )
def setupTPCDSOriginalDataset(CURR_DIR): tpcds_data_dir = os.path.join(SRC_DIR, "data", "tpcds") TPCDS_DATA_TGZ = os.path.join(tpcds_data_dir, "tpcds_original.tgz") hdfs_localcopy_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), 'tpcds_original', 'data') tpcds_text_data_dir = os.path.join(tpcds_data_dir, 'data') downloadDataset( tpcds_data_dir, TPCDS_DATA_TGZ, Config.get('hive', 'TPCDS_ORIGINAL_DNLD_URL'), hdfs_localcopy_dir, tpcds_text_data_dir ) HIVE_TEST_CMD = "-Dhive.use.beeline=true -Dhadoop.home=%s -Dhive.home=%s -Dhcat.home=%s -Dpig.home=%s -Dhbase.home=%s" % ( HADOOP_HOME, HIVE_HOME, HCATALOG_HOME, PIG_HOME, HIVE_HOME ) if Hadoop.isHadoop2(): HIVE_TEST_CMD += " -Dmapred.home=%s -Dhadoop.conf.dir=%s" % (Config.get('hadoop', 'MAPRED_HOME'), HADOOP_CONF) if Machine.type() == 'Windows': HIVE_TEST_CMD += ' -Dharness.conf=conf\windows.conf' query_file_1 = os.path.join(CURR_DIR, 'ddl_queries', 'alltables_text.sql') query_file_2 = os.path.join(CURR_DIR, 'ddl_queries', 'alltables_orc.sql') exit_code, stdout, stderr = Hive.runQueryOnBeeline( query_file_1, hivevar={ 'LOCATION': HDFS_TEST_DIR + '/data', 'DB': 'tpcds_src' }, cwd=CURR_DIR, logoutput=True, queryIsFile=True ) logger.info("Check if populating the data in Hive for text tables is successful") assert exit_code == 0, "Failed to populate the data in Hive" exit_code, stdout, stderr = Hive.runQueryOnBeeline( query_file_2, hivevar={ 'FILE': 'ORC', 'SOURCE': 'tpcds_src' }, cwd=CURR_DIR, logoutput=True, queryIsFile=True ) logger.info("Check if populating the data in Hive for ORC tables is successful") assert exit_code == 0, "Failed to populate the data in Hive"
def Hive_getHiveLogDir(cls, logoutput=True): try: from beaver.component.hive import Hive return Hive.getHiveLogDir(logoutput) except Exception: if logoutput: logger.error( "Exception occured during Hive_getHiveLogDir() call") logger.error(traceback.format_exc()) return None
def getDatabaseFlavor(cls): dbdriver = Hive.getConfigValue("javax.jdo.option.ConnectionDriverName") if ("oracle" in dbdriver): return "oracle" elif ("postgresql" in dbdriver): dbUrl = Hive.getConfigValue("javax.jdo.option.ConnectionURL") m = re.search('jdbc:postgresql://(.*):.*', dbUrl) dbHost = Machine.getfqdn() if m and m.group(1): dbHost = m.group(1) dbVersion = Machine.getDBVersion('postgres', host=dbHost) if dbVersion: return "postgres-%s" % dbVersion else: return "postgres" elif ("derby" in dbdriver): return "derby" elif ("mysql" in dbdriver): return "mysql" return ""
def doSetup(cls, hdfs_test_dir, tbl_name, num_of_rows, type): from beaver.component.hive import Hive from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode logger.info("Generating test table dataset with %d rows" % num_of_rows) test_data_file = os.path.join(Config.getEnv('ARTIFACTS_DIR'), tbl_name + ".dat") f = open(test_data_file, 'w') userid = 100000 for i in xrange(num_of_rows): for j in range(random.randint(3, 8)): f.write("%d|%d\n" % (userid + i, random.randint(10, 80))) f.close() hdfs_tbl_dir = hdfs_test_dir + "/" + tbl_name logger.info("Copying the test dataset to HDFS directory '%s'" % hdfs_tbl_dir) HDFS.createDirectory(hdfs_test_dir, user=cls._hdfs_user, perm='777', force=True) HDFS.createDirectory(hdfs_tbl_dir, perm='777') HDFS.copyFromLocal(test_data_file, hdfs_tbl_dir) HDFS.chmod(cls._hdfs_user, '777', hdfs_tbl_dir) logger.info("Creating table '%s' and verification tables" % tbl_name) query = "drop table if exists %s;\n" % tbl_name query += "create external table %s (userid string, age int) row format delimited fields terminated by '|' stored as textfile location '%s';\n" % ( tbl_name, hdfs_tbl_dir) query += "drop table if exists %s_hive_verify;\n" % tbl_name query += "create table %s_hive_verify (userid string, age int);\n" % tbl_name if type == "Long running": for i in range(cls._num_of_webhcat_bgj): query += "drop table if exists %s_wh_%d;\n" % (tbl_name, i + 1) query += "create table %s_wh_%d (userid string, age int);\n" % ( tbl_name, i + 1) hivesetupfile = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "hivesetup.sql") util.writeToFile(query, hivesetupfile) exit_code, stdout = Hive.run("-f " + hivesetupfile, logoutput=False) if type: msg = "%s job setup for Hive component" % type if exit_code != 0: UpgradePerNode.reportProgress( "[FAILED][Hive][Setup] %s failed due to exitcode = %d" % (msg, exit_code)) else: UpgradePerNode.reportProgress( "[PASSED][Hive][Setup] %s finished successfully" % msg)
def setupSchemaEvolutionDataset(): logger.info("Setup Schema Evolution dataset") HDFS.createDirectory(HCAT_TEST_DIR, user=HDFS_USER, perm='777', force=True) HDFS.createDirectory(HDFS_TEST_DIR, user=HDFS_USER, perm='777', force=True) HIVE_TEST_CMD = "-Dhive.use.beeline=true -Dhadoop.home=%s -Dhive.home=%s -Dhcat.home=%s -Dpig.home=%s -Dhbase.home=%s" % ( HADOOP_HOME, HIVE_HOME, HCATALOG_HOME, PIG_HOME, HIVE_HOME ) if Hadoop.isHadoop2(): HIVE_TEST_CMD += " -Dmapred.home=%s -Dhadoop.conf.dir=%s" % (Config.get('hadoop', 'MAPRED_HOME'), HADOOP_CONF) hiveServer2Url = str(Hive.getHiveServer2Url()) exit_code, stdout = Ant.run( HIVE_TEST_CMD + " deploy-schemaevolution", cwd=SRC_DIR, env={"HIVE_SERVER2_URL": hiveServer2Url} ) assert exit_code == 0
def get_set_queue_cmd(cls, useStandaloneCmd): #For https://hortonworks.jira.com/browse/BUG-27221 from beaver.component.hive import Hive if useStandaloneCmd == True: YARN_QUEUE = "storm" else: YARN_QUEUE = "storm-slider" if Hive.isTezEnabled(): # this wont work because when hive CLI starts hive does not know queues that are not set in hive-site.xml. # See Deepesh email on 10/14/2014. setqueue = "set tez.queue.name=%s; " % YARN_QUEUE else: setqueue = "set mapred.job.queue.name=%s; " % YARN_QUEUE return setqueue
def setupTestData(stdauth=True): data_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "hive-test-data") data_tgz = os.path.join(Config.getEnv('WORKSPACE'), "hive-simple-test-data.tgz") if not os.path.isfile(data_tgz): assert util.downloadUrl(Config.get('hive', 'HIVE_TEST_DATA'), data_tgz) Machine.tarExtractAll(data_tgz, data_dir) # load data into HDFS HDFS.createDirectory("/tmp/hs2data", user=HDFS_USER, perm='777', force=True) HDFS.createDirectory("/tmp/hs2data/student", perm='777', force=True) HDFS.copyFromLocal(os.path.join(data_dir, 'studenttab10k'), "/tmp/hs2data/student") HDFS.createDirectory("/tmp/hs2data/voter", perm='777', force=True) HDFS.copyFromLocal(os.path.join(data_dir, 'votertab10k'), "/tmp/hs2data/voter") query = """drop table if exists student; create external table student (name string, age int, gpa double) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/student'; drop table if exists voter; create external table voter (name string, age int, registration string, contributions float) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/voter';""" if stdauth: query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table student to role public with grant option;" query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table voter to role public with grant option;" exit_code, stdout, stderr = Hive.runQueryOnBeeline(query, readFromFile=True, logoutput=True) assert exit_code == 0, "Test data creation failed"
def tear_down_hive_topology(cls, topologyName, useStandaloneCmd): """ tear down hbase topology. """ from beaver.component.hive import Hive Machine.rm(user=None, host="localhost", filepath=LOCAL_HIVE_WORK_DIR, isdir=True, passwd=None) Storm.killTopology(topologyName, logoutput=True, useStandaloneCmd=useStandaloneCmd) #Hive.restoreConfig(services=['metastore']) drop_table_q = "use %s; drop table if exists %s; " % (DATABASE_NAME, HIVE_TABLE_NAME) exit_code, stdout = Hive.runQuery( cls.get_set_queue_cmd(useStandaloneCmd) + drop_table_q) ruAssert("Storm", exit_code == 0)
def doBackgroundJobSetup(cls, hdfs_test_dir): from beaver.component.hive import Hive from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode logger.info("Preparing the test setup for Hive background job") udfjar = os.path.join(Config.getEnv('WORKSPACE'), "tests", "hive", "hive-udf", "hive-udfs-0.1.jar") HDFS.createDirectory(hdfs_test_dir, user=cls._hdfs_user, perm='777', force=True) HDFS.copyFromLocal(udfjar, hdfs_test_dir) query = "drop function sleep; create function sleep as 'org.apache.hive.udf.generic.GenericUDFSleep' using jar 'hdfs://%s/hive-udfs-0.1.jar';" % hdfs_test_dir exit_code, stdout = Hive.runQuery(query) if exit_code != 0: UpgradePerNode.reportProgress( "[FAILED][Hive][Setup] Long running failed due to exitcode = %d" % exit_code) else: UpgradePerNode.reportProgress( "[PASSED][Hive][Setup] Long running finished successfully")
def setupTPCDSDataset(): tpcds_data_dir = os.path.join(SRC_DIR, "data", "tpcds") TPCDS_DATA_TGZ = os.path.join(tpcds_data_dir, "tpcds_data.tgz") hdfs_localcopy_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), 'data') tpcds_text_data_dir = os.path.join(tpcds_data_dir, 'data') downloadDataset( tpcds_data_dir, TPCDS_DATA_TGZ, Config.get('hive', 'TPCDS_DNLD_URL_HDP3'), hdfs_localcopy_dir, tpcds_text_data_dir ) HIVE_TEST_CMD = "-Dhive.use.beeline=true -Dhadoop.home=%s -Dhive.home=%s -Dhcat.home=%s -Dpig.home=%s -Dhbase.home=%s" % ( HADOOP_HOME, HIVE_HOME, HCATALOG_HOME, PIG_HOME, HIVE_HOME ) if Hadoop.isHadoop2(): HIVE_TEST_CMD += " -Dmapred.home=%s -Dhadoop.conf.dir=%s" % (Config.get('hadoop', 'MAPRED_HOME'), HADOOP_CONF) if Machine.type() == 'Windows': HIVE_TEST_CMD += ' -Dharness.conf=conf\windows.conf' hiveServer2Url = str(Hive.getHiveServer2Url()) # generate data exit_code, stdout = Ant.run( HIVE_TEST_CMD + " deploy-tpcds-orc", cwd=SRC_DIR, env={"HIVE_SERVER2_URL": hiveServer2Url} ) assert exit_code == 0 exit_code, stdout = Ant.run(HIVE_TEST_CMD + " deploy-tpcds", cwd=SRC_DIR, env={"HIVE_SERVER2_URL": hiveServer2Url}) assert exit_code == 0 exit_code, stdout = Ant.run( HIVE_TEST_CMD + " deploy-tpcds-parquet", cwd=SRC_DIR, env={"HIVE_SERVER2_URL": hiveServer2Url} ) assert exit_code == 0
def run_client_smoketest(cls, config=None, env=None): ''' Run Smoke test after upgrading Client :param config: Configuration location :param env: Set Environment variables ''' from beaver.component.hive import Hive from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode UpgradePerNode.reportProgress( "[INFO][Hive][Smoke] Smoke test for Hive component started") setqueue = "" if Hive.isTezEnabled(): setqueue = "set tez.queue.name=%s; " % cls._yarn_queue else: setqueue = "set mapred.job.queue.name=%s; " % cls._yarn_queue logger.info("**** Running Hive CLI Test ****") query = setqueue + " insert overwrite table %s_hive_verify select userid, avg(age) from %s group by userid order by userid; " % ( cls._smoketest_tbl, cls._smoketest_tbl) query += "select count(*) from %s_hive_verify;" % cls._smoketest_tbl exit_code, stdout, stderr = Hive.runQuery(query, stderr_as_stdout=False) if exit_code != 0: UpgradePerNode.reportProgress( "[FAILED][Hive][Smoke] Smoke test for Hive Metastore failed with exit code '%d'" % exit_code) logger.error( "Smoke test for Hive failed with the following error: " + stderr) elif stdout.find("%d" % cls._num_of_rows_smoke) == -1: UpgradePerNode.reportProgress( "[FAILED][Hive][Smoke] Smoke test for Hive Metastore failed to verify number of rows in output" ) logger.error( "Smoke test for Hive failed to find [%d] in output [%s]" % (cls._num_of_rows_smoke, stdout)) else: UpgradePerNode.reportProgress( "[PASSED][Hive][Smoke] Smoke test for Hive Metastore succeeded" ) logger.info("Smoke test for Hive Metastore succeeded") logger.info("**** Running Beeline CLI Test ****") query = setqueue + "\ndrop table if exists %s_bline_verify;\n" % cls._smoketest_tbl query += "create table %s_bline_verify (userid string, age int);\n" % cls._smoketest_tbl query += "insert overwrite table %s_bline_verify select userid, avg(age) from %s group by userid order by userid;\n" % ( cls._smoketest_tbl, cls._smoketest_tbl) query += "select count(*) from %s_bline_verify;\n" % cls._smoketest_tbl exit_code, stdout, stderr = Hive.runQueryOnBeeline(query, readFromFile=True) if exit_code != 0: UpgradePerNode.reportProgress( "[FAILED][Hive][Smoke] Smoke test for HiveServer2 failed with exit code '%d'" % exit_code) logger.error( "Smoke test for HiveServer2 failed with the following error: " + stderr) elif stdout.find("%d" % cls._num_of_rows_smoke) == -1: UpgradePerNode.reportProgress( "[FAILED][Hive][Smoke] Smoke test for HiveServer2 failed to verify number of rows in output" ) logger.error( "Smoke test for HiveServer2 failed to find [%d] in output [%s]" % (cls._num_of_rows_smoke, stdout)) else: logger.info("Smoke test for HiveServer2 succeeded") logger.info("**** Running WebHCat Smoke Test ****") query = "show tables;" webhcatHost = Config.get('templeton', 'TEMPLETON_HOST', default=Machine.getfqdn()) webhcatPort = Config.get('templeton', 'TEMPLETON_PORT', default="50111") url = "http://%s:%s/templeton/v1/ddl" % (webhcatHost, webhcatPort) params = {'exec': query} status_code, stdout = util.curl(url, method='POST', params=params) if status_code != 200: UpgradePerNode.reportProgress( "[FAILED][Hive][Smoke] Smoke test for WebHCat failed due to status code = %d" % status_code) else: logger.info("Smoke test for WebHCat succeeded") UpgradePerNode.reportProgress( "[INFO][Hive][Smoke] Smoke test for Hive component finished")
def switch_master_version(cls, action, version, config=None): ''' Switches Hive master services' version :param action: Whether to "upgrade" or "downgrade" :param version: Version to be switched to :param config: Configuration location ''' from beaver.component.rollingupgrade.ruCommon import hdpSelect from beaver.component.hive import Hive currentHiveVersion = Hive.getVersion() if action == 'upgrade': # Backup the database used by the Hive Metastore logger.info( "Performing backup of the Hive Metastore DB before starting the upgrade" ) Hive.backupMetastoreDB(cls._metastore_backup_file) node = Hive.getHiveHost() # Stop the old Hive Metastore logger.info("Stopping the Hive Metastore") Hive.stopService(services=["metastore"]) # Upgrade Hive Metastore servers to new version hdpSelect.changeVersion("hive-metastore", version, node) if action == 'upgrade': logger.info("Upgrading the Hive metastore schema") Hive.upgradeSchema() # Restart Hive Metastore servers one at a time logger.info("Restarting the Hive Metastore") Hive.startService(services=["metastore"]) # Start new Hive Server 2 instance confHS2Port = Hive.getHiveserver2ThriftPort() hs2port = util.getNextAvailablePort(node, confHS2Port) hdpSelect.changeVersion("hive-server2", version, node) Hive.modifyConfig(config, services=['hiveserver2'], restartService=False) logger.info( "Starting a new HiveServer2 at port '%d' for assisting rolling-upgrade" % hs2port) if hs2port != confHS2Port: changes = {'hive-site.xml': {'hive.server2.thrift.port': hs2port}} Hive.modifyConfig(changes, services=["hiveserver2"], restartService=False) Hive.startService(services=["hiveserver2"]) cls._hs2_live_ports = [Hive.getHiveserver2ThriftPort(), hs2port] # Deregister the old Hive Server 2 instances logger.info("Deregistering the HiveServer2 on version '%s'" % currentHiveVersion) Hive.deregisterHiveServer2(version=currentHiveVersion) from beaver.component.hcatalog import Hcatalog # Stop the old WebHCat server logger.info("Stopping the WebHCat server") node = Config.get('templeton', 'TEMPLETON_HOST', default=Machine.getfqdn()) webhcatPort = Config.get('templeton', 'TEMPLETON_PORT', default="50111") # Stop the old WebHCat server logger.info("Stop the WebHCat server") Hcatalog.stop(node) # Upgrade WebHCat to the new version hdpSelect.changeVersion("hive-webhcat", version, node) # Start the WebHCat server logger.info("Restarting the WebHCat server") newConfDir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), 'localWebhcatConf') if os.path.exists(newConfDir): Hcatalog.start(node, hcat_confdir=newConfDir) else: Hcatalog.start(node)
def runJdbcMultiSessionDriver( testDir, addlClasspath=[], connectionUrl=None, skippedTests=[], addlArgs=[], reuseConnections=False, testFilter=None, logsDir=None, queryTimeout=3600 ): ''' Run the Hive Jdbc MultiSession Test Driver ''' harnessDir = os.path.join(Config.getEnv('WORKSPACE'), 'datateamtest', 'hive_jdbc_multisession') logger.info("Build the TestDriver to run tests") exit_code, stdout = Maven.run("clean package", cwd=harnessDir) assert exit_code == 0, "Failed to build the test driver" classpath = [ os.path.join(harnessDir, "target", "hive-multisession-test-0.1.jar"), Config.get('hadoop', 'HADOOP_CONF') ] if len(addlClasspath) == 0: hiveJdbcDriver = getStandaloneHiveJdbcJar() classpath.insert(0, hiveJdbcDriver) else: classpath = addlClasspath + classpath cobert_tool_version = "cobertura-2.1.1" COBERTURA_CLASSPTH = os.path.join( tempfile.gettempdir(), "coverage-tmp", cobert_tool_version, cobert_tool_version + ".jar" ) if Machine.pathExists(Machine.getAdminUser(), None, COBERTURA_CLASSPTH, Machine.getAdminPasswd()): classpath.append(COBERTURA_CLASSPTH) args = ["-t " + testDir] if connectionUrl is None: connectionUrl = Hive.getHiveServer2Url() args.append("-c \"%s\"" % connectionUrl) if Hadoop.isSecure(): args.append("-k " + Config.get('machine', 'KEYTAB_FILES_DIR')) if Config.hasOption('machine', 'USER_REALM'): USER_REALM = Config.get('machine', 'USER_REALM', '') args.append("-e USER_REALM=%s" % (USER_REALM)) args.extend(["--skip %s" % t for t in skippedTests]) if reuseConnections: args.append("--reuseConnections") if testFilter: args.append("-f " + testFilter) from beaver.marker import getMarkerCondition markerCondition = getMarkerCondition() if markerCondition: args.append("-e 'marker=%s'" % markerCondition) if not logsDir: logsDir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "logs_%d" % int(999999 * random.random())) args.append("-l " + logsDir) if queryTimeout > 0: args.append("--queryTimeout %d" % queryTimeout) args.extend(addlArgs) return Java.runJava( Config.getEnv('ARTIFACTS_DIR'), "org.apache.hive.jdbc.TestDriver", classPath=(os.pathsep).join(classpath), cmdArgs=args )
def run_background_job(cls, runSmokeTestSetup=False, config=None): ''' Runs background long running Hive Job :param runSmokeTestSetup: Runs smoke test setup if set to true :param config: expected configuration location :return: Total number of long running jobs started ''' from beaver.component.hive import Hive from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode UpgradePerNode.reportProgress( "[INFO][Hive][BGJob] Long running job for Hive component started") setqueue = "" if Hive.isTezEnabled(): setqueue = "set tez.queue.name=%s; " % cls._yarn_queue else: setqueue = "set mapred.job.queue.name=%s; " % cls._yarn_queue logger.info("**** Running Hive CLI Test ****") query = setqueue + " create table if not exists hive_cli_lr (a string); select sleep(%d, 2000, 'hdfs://%s/hive_cli_lr', 'hdfs://%s/END') from (select count(*) from hive_cli_lr) a;" % ( cls._max_bgjtest_duration, cls._hdfs_bgjtest_dir, cls._hdfs_bgjtest_dir) Hive.runQuery(query, background=True) logger.info("**** Running Beeline CLI Test ****") # Create the sleep function within the same Beeline session # Function created outside of HS2 instance are not picked query = setqueue + "\n" query += "drop function sleep2;\n" query += "create function sleep2 as 'org.apache.hive.udf.generic.GenericUDFSleep' using jar 'hdfs://%s/hive-udfs-0.1.jar';\n" % cls._hdfs_bgjtest_dir query += "create table if not exists bline_cli_lr (a string);\n" query += "select sleep2(%d, 2000, 'hdfs://%s/bline_cli_lr', 'hdfs://%s/END') from (select count(*) from bline_cli_lr) a;\n" % ( cls._max_bgjtest_duration, cls._hdfs_bgjtest_dir, cls._hdfs_bgjtest_dir) Hive.runQueryOnBeeline(query, readFromFile=True, background=True) logger.info("**** Running WebHCat Test ****") webhcatHost = Config.get('templeton', 'TEMPLETON_HOST', default=Machine.getfqdn()) webhcatPort = Config.get('templeton', 'TEMPLETON_PORT', default="50111") url = "http://%s:%s/templeton/v1/hive" % (webhcatHost, webhcatPort) query = setqueue + " set mapred.task.timeout=0; create table if not exists whcat_rest_lr (a string); select sleep(%d, 2000, 'hdfs://%s/whcat_rest_lr', 'hdfs://%s/END') from (select count(*) from whcat_rest_lr) a;" % ( cls._max_bgjtest_duration, cls._hdfs_bgjtest_dir, cls._hdfs_bgjtest_dir) params = {'execute': query} status_code, stdout = util.curl(url, method='POST', params=params) retry = 0 while status_code == 404 and retry < 3: time.sleep(15) status_code, stdout = util.curl(url, method='POST', params=params) retry += 1 if status_code != 200: UpgradePerNode.reportProgress( "[FAILED][Hive][BGJobSetup] Long running job for WebHCat failed due to status code = %d" % status_code) logger.error( "Webhcat request failed with the following error: %s\n" % stdout) if runSmokeTestSetup: logger.info("**** Running Hive Smoke Test Setup ****") cls.smoke_test_setup() return 3
def setup_storm_hive_topology(cls, useStandaloneCmd): from beaver.component.hive import Hive storm_version = Storm.getVersion(useStandaloneCmd=True) hive_version = Hive.getVersion() HIVE_METASTORE_URI = Hive.getConfigValue( "hive.metastore.uris", defaultValue="thrift://localhost:9083") global HIVE_METASTORE_URI global HIVE_HOST global HIVE_PORT global HIVE_WAREHOUSE_DIR HIVE_WAREHOUSE_DIR = Hive.getConfigValue( "hive.metastore.warehouse.dir", defaultValue="/apps/hive/warehouse") HIVE_HOST = Hive.getHiveHost() HIVE_PORT = Hive.getMetastoreThriftPort() if Storm.isDalorBeyond(): JAVA_HIVE_SRC_DIR = os.path.join(Config.getEnv('WORKSPACE'), 'tests', 'rolling_upgrade', 'Storm', '2_3', 'storm-hive', 'java') else: JAVA_HIVE_SRC_DIR = os.path.join(Config.getEnv('WORKSPACE'), 'tests', 'rolling_upgrade', 'Storm', '2_2', 'storm-hive', 'java') # hive.txn.manager and hive.support.concurrency are set through ambari as per bug-40500 #logger.info("Restart Hive") #changes = {'hive-site.xml': {'hive.txn.manager': 'org.apache.hadoop.hive.ql.lockmgr.DbTxnManager', # 'hive.support.concurrency': 'true'}} #Hive.modifyConfig(changes, services=['metastore'], restartService=True) logger.info("Create test database in Hive") exit_code, stdout = Hive.runQuery( cls.get_set_queue_cmd(useStandaloneCmd) + " drop database if exists stormdb cascade; \ create database stormdb;") ruAssert("Storm", exit_code == 0, "[StormHiveSetup] Failed to create test database" + stdout) HDFS.chmod(runasUser=HDFS.getHDFSUser(), perm=777, directory=HIVE_WAREHOUSE_DIR + "/" + DATABASE_NAME + ".db") #copy tests/storm/storm-hive/java to artifacts/storm-hive-tests logger.info("JAVA_SRC_DIR " + JAVA_HIVE_SRC_DIR) logger.info("LOCAL_WORK_DIR " + LOCAL_HIVE_WORK_DIR) Machine.copy(JAVA_HIVE_SRC_DIR, LOCAL_HIVE_WORK_DIR, user=None, passwd=None) #mvn package if Machine.isWindows(): (_, _) = Maven.run( 'package -D%s=%s -D%s=%s -D%s=%s -D%s=%s' % (HADOOP_VERSION_MAVEN_PARAMETER, HADOOP_VERSION, STORM_VERSION_MAVEN_PARAMETER, storm_version, HIVE_VERSION_MAVEN_PARAMETER, hive_version, PUBLIC_REPO_MAVEN_PARAMETER, Maven.getPublicRepoUrl(), CORE_FILE_MAVEN_PARAMETER, CORE_FILE, HADOOP_CORE_MAVEN_PARAMETER, HADOOP_CONF, HIVE_CORE_MAVEN_PARAMETER, HIVE_CORE_DIR, HIVE_FILE_MAVEN_PARAMETER, HIVE_FILE), cwd=LOCAL_HIVE_WORK_DIR) else: (_, _) = Maven.run('package', cwd=LOCAL_HIVE_WORK_DIR, env={ HADOOP_VERSION_MAVEN_PARAMETER: HADOOP_VERSION, STORM_VERSION_MAVEN_PARAMETER: storm_version, HIVE_VERSION_MAVEN_PARAMETER: hive_version, PUBLIC_REPO_MAVEN_PARAMETER: Maven.getPublicRepoUrl(), CORE_FILE_MAVEN_PARAMETER: CORE_FILE, HADOOP_CONF_MAVEN_PARAMETER: HADOOP_CONF, HDFS_FILE_MAVEN_PARAMETER: HDFS_FILE, HADOOP_CORE_MAVEN_PARAMETER: HADOOP_CONF, HIVE_CORE_MAVEN_PARAMETER: HIVE_CORE_DIR, HIVE_FILE_MAVEN_PARAMETER: HIVE_FILE }) create_table_q = "use %s; \ drop table if exists %s; \ create table %s (id int, name string, phone string, street string) \ partitioned by (city string, state string) \ clustered by (id) into %s buckets \ stored as orc \ tblproperties ('transactional'='true');" % ( DATABASE_NAME, HIVE_TABLE_NAME, HIVE_TABLE_NAME, "5") exit_code, stdout = Hive.runQuery( cls.get_set_queue_cmd(useStandaloneCmd) + create_table_q) ruAssert( "Storm", exit_code == 0, "[StormHiveSetup] Failed to create test table userdata_partitioned" ) HDFS.chmod(runasUser=HDFS.getHDFSUser(), perm=777, directory=HIVE_WAREHOUSE_DIR + "/" + DATABASE_NAME + ".db/" + HIVE_TABLE_NAME)