def getHiveQueryOutput(cls, query, willRunMR=True, delim=",", useStandaloneCmd=True): from beaver.component.hive import Hive hiveconf = {} if willRunMR: hiveconf = { 'hive.input.format': 'org.apache.hadoop.hive.ql.io.HiveInputFormat', 'hive.vectorized.execution.enabled': 'false', 'hive.txn.manager': 'org.apache.hadoop.hive.ql.lockmgr.DbTxnManager', 'hive.support.concurrency': 'true' } exit_code, stdout, stderr = Hive.runQuery( cls.get_set_queue_cmd(useStandaloneCmd) + query, hiveconf=hiveconf, stderr_as_stdout=False) ruAssert("Storm", exit_code == 0, "[HiveQueryOutput] Failed to run Hive query [%s]" % query) return stdout.replace('\t', delim)
def background_job_when_master_upgrade(cls): ''' Start a background application which runs while component master service gets upgraded :return: ''' from beaver.component.hive import Hive from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode UpgradePerNode.reportProgress( "[INFO][Hive][BGJob] Background Job test setup when upgrading Hive started" ) logger.info("Creating hive tables for short background jobs") query = "drop table if exists shortlr_hive_verify;\n" query += "create table shortlr_hive_verify (userid string, age int);\n" query += "drop table if exists shortlr_bline_verify;\n" query += "create table shortlr_bline_verify (userid string, age int);\n" query += "drop table if exists shortlr_bline_verify;\n" query += "create table shortlr_bline_verify (userid string, age int);\n" short_bgjob_setupfile = os.path.join(Config.getEnv('ARTIFACTS_DIR'), 'shortlrsetup.sql') util.writeToFile(query, short_bgjob_setupfile) exit_code, stdout = Hive.run("-f " + short_bgjob_setupfile) if exit_code != 0: UpgradePerNode.reportProgress( "[FAILED][Hive][BGJob] Background Job test setup when Hive upgrades failed due to exitcode = %d" % exit_code) logger.info("Running the Background Job when upgrading Hive") UpgradePerNode.reportProgress( "[INFO][Hive][BGJob] Long running job for Hive component upgrades started" ) setqueue = "" if Hive.isTezEnabled(): setqueue = "set tez.queue.name=%s; " % cls._yarn_queue else: setqueue = "set mapred.job.queue.name=%s; " % cls._yarn_queue logger.info("**** Running Hive CLI Test ****") query = setqueue + " insert overwrite table shortlr_hive_verify select userid, avg(age) from %s group by userid order by userid;" % cls._bgjtest_tbl cls._shortbgj_hive_process = Hive.runQuery(query, background=True) # Sleeping for 10 seconds to make sure that query initializes before Metastore is restarted time.sleep(10) logger.info("**** Running Beeline CLI Test ****") query = setqueue + "\ninsert overwrite table shortlr_bline_verify select userid, avg(age) from %s group by userid order by userid;" % cls._bgjtest_tbl cls._shortbgj_bline_process = Hive.runQueryOnBeeline(query, readFromFile=True, background=True) UpgradePerNode.reportProgress( "[INFO][Hive][BGJob] Background Job test setup when Hive upgrades finished" )
def tear_down_hive_topology(cls, topologyName, useStandaloneCmd): """ tear down hbase topology. """ from beaver.component.hive import Hive Machine.rm(user=None, host="localhost", filepath=LOCAL_HIVE_WORK_DIR, isdir=True, passwd=None) Storm.killTopology(topologyName, logoutput=True, useStandaloneCmd=useStandaloneCmd) #Hive.restoreConfig(services=['metastore']) drop_table_q = "use %s; drop table if exists %s; " % (DATABASE_NAME, HIVE_TABLE_NAME) exit_code, stdout = Hive.runQuery( cls.get_set_queue_cmd(useStandaloneCmd) + drop_table_q) ruAssert("Storm", exit_code == 0)
def doBackgroundJobSetup(cls, hdfs_test_dir): from beaver.component.hive import Hive from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode logger.info("Preparing the test setup for Hive background job") udfjar = os.path.join(Config.getEnv('WORKSPACE'), "tests", "hive", "hive-udf", "hive-udfs-0.1.jar") HDFS.createDirectory(hdfs_test_dir, user=cls._hdfs_user, perm='777', force=True) HDFS.copyFromLocal(udfjar, hdfs_test_dir) query = "drop function sleep; create function sleep as 'org.apache.hive.udf.generic.GenericUDFSleep' using jar 'hdfs://%s/hive-udfs-0.1.jar';" % hdfs_test_dir exit_code, stdout = Hive.runQuery(query) if exit_code != 0: UpgradePerNode.reportProgress( "[FAILED][Hive][Setup] Long running failed due to exitcode = %d" % exit_code) else: UpgradePerNode.reportProgress( "[PASSED][Hive][Setup] Long running finished successfully")
def run_client_smoketest(cls, config=None, env=None): ''' Run Smoke test after upgrading Client :param config: Configuration location :param env: Set Environment variables ''' from beaver.component.hive import Hive from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode UpgradePerNode.reportProgress( "[INFO][Hive][Smoke] Smoke test for Hive component started") setqueue = "" if Hive.isTezEnabled(): setqueue = "set tez.queue.name=%s; " % cls._yarn_queue else: setqueue = "set mapred.job.queue.name=%s; " % cls._yarn_queue logger.info("**** Running Hive CLI Test ****") query = setqueue + " insert overwrite table %s_hive_verify select userid, avg(age) from %s group by userid order by userid; " % ( cls._smoketest_tbl, cls._smoketest_tbl) query += "select count(*) from %s_hive_verify;" % cls._smoketest_tbl exit_code, stdout, stderr = Hive.runQuery(query, stderr_as_stdout=False) if exit_code != 0: UpgradePerNode.reportProgress( "[FAILED][Hive][Smoke] Smoke test for Hive Metastore failed with exit code '%d'" % exit_code) logger.error( "Smoke test for Hive failed with the following error: " + stderr) elif stdout.find("%d" % cls._num_of_rows_smoke) == -1: UpgradePerNode.reportProgress( "[FAILED][Hive][Smoke] Smoke test for Hive Metastore failed to verify number of rows in output" ) logger.error( "Smoke test for Hive failed to find [%d] in output [%s]" % (cls._num_of_rows_smoke, stdout)) else: UpgradePerNode.reportProgress( "[PASSED][Hive][Smoke] Smoke test for Hive Metastore succeeded" ) logger.info("Smoke test for Hive Metastore succeeded") logger.info("**** Running Beeline CLI Test ****") query = setqueue + "\ndrop table if exists %s_bline_verify;\n" % cls._smoketest_tbl query += "create table %s_bline_verify (userid string, age int);\n" % cls._smoketest_tbl query += "insert overwrite table %s_bline_verify select userid, avg(age) from %s group by userid order by userid;\n" % ( cls._smoketest_tbl, cls._smoketest_tbl) query += "select count(*) from %s_bline_verify;\n" % cls._smoketest_tbl exit_code, stdout, stderr = Hive.runQueryOnBeeline(query, readFromFile=True) if exit_code != 0: UpgradePerNode.reportProgress( "[FAILED][Hive][Smoke] Smoke test for HiveServer2 failed with exit code '%d'" % exit_code) logger.error( "Smoke test for HiveServer2 failed with the following error: " + stderr) elif stdout.find("%d" % cls._num_of_rows_smoke) == -1: UpgradePerNode.reportProgress( "[FAILED][Hive][Smoke] Smoke test for HiveServer2 failed to verify number of rows in output" ) logger.error( "Smoke test for HiveServer2 failed to find [%d] in output [%s]" % (cls._num_of_rows_smoke, stdout)) else: logger.info("Smoke test for HiveServer2 succeeded") logger.info("**** Running WebHCat Smoke Test ****") query = "show tables;" webhcatHost = Config.get('templeton', 'TEMPLETON_HOST', default=Machine.getfqdn()) webhcatPort = Config.get('templeton', 'TEMPLETON_PORT', default="50111") url = "http://%s:%s/templeton/v1/ddl" % (webhcatHost, webhcatPort) params = {'exec': query} status_code, stdout = util.curl(url, method='POST', params=params) if status_code != 200: UpgradePerNode.reportProgress( "[FAILED][Hive][Smoke] Smoke test for WebHCat failed due to status code = %d" % status_code) else: logger.info("Smoke test for WebHCat succeeded") UpgradePerNode.reportProgress( "[INFO][Hive][Smoke] Smoke test for Hive component finished")
def run_background_job(cls, runSmokeTestSetup=False, config=None): ''' Runs background long running Hive Job :param runSmokeTestSetup: Runs smoke test setup if set to true :param config: expected configuration location :return: Total number of long running jobs started ''' from beaver.component.hive import Hive from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode UpgradePerNode.reportProgress( "[INFO][Hive][BGJob] Long running job for Hive component started") setqueue = "" if Hive.isTezEnabled(): setqueue = "set tez.queue.name=%s; " % cls._yarn_queue else: setqueue = "set mapred.job.queue.name=%s; " % cls._yarn_queue logger.info("**** Running Hive CLI Test ****") query = setqueue + " create table if not exists hive_cli_lr (a string); select sleep(%d, 2000, 'hdfs://%s/hive_cli_lr', 'hdfs://%s/END') from (select count(*) from hive_cli_lr) a;" % ( cls._max_bgjtest_duration, cls._hdfs_bgjtest_dir, cls._hdfs_bgjtest_dir) Hive.runQuery(query, background=True) logger.info("**** Running Beeline CLI Test ****") # Create the sleep function within the same Beeline session # Function created outside of HS2 instance are not picked query = setqueue + "\n" query += "drop function sleep2;\n" query += "create function sleep2 as 'org.apache.hive.udf.generic.GenericUDFSleep' using jar 'hdfs://%s/hive-udfs-0.1.jar';\n" % cls._hdfs_bgjtest_dir query += "create table if not exists bline_cli_lr (a string);\n" query += "select sleep2(%d, 2000, 'hdfs://%s/bline_cli_lr', 'hdfs://%s/END') from (select count(*) from bline_cli_lr) a;\n" % ( cls._max_bgjtest_duration, cls._hdfs_bgjtest_dir, cls._hdfs_bgjtest_dir) Hive.runQueryOnBeeline(query, readFromFile=True, background=True) logger.info("**** Running WebHCat Test ****") webhcatHost = Config.get('templeton', 'TEMPLETON_HOST', default=Machine.getfqdn()) webhcatPort = Config.get('templeton', 'TEMPLETON_PORT', default="50111") url = "http://%s:%s/templeton/v1/hive" % (webhcatHost, webhcatPort) query = setqueue + " set mapred.task.timeout=0; create table if not exists whcat_rest_lr (a string); select sleep(%d, 2000, 'hdfs://%s/whcat_rest_lr', 'hdfs://%s/END') from (select count(*) from whcat_rest_lr) a;" % ( cls._max_bgjtest_duration, cls._hdfs_bgjtest_dir, cls._hdfs_bgjtest_dir) params = {'execute': query} status_code, stdout = util.curl(url, method='POST', params=params) retry = 0 while status_code == 404 and retry < 3: time.sleep(15) status_code, stdout = util.curl(url, method='POST', params=params) retry += 1 if status_code != 200: UpgradePerNode.reportProgress( "[FAILED][Hive][BGJobSetup] Long running job for WebHCat failed due to status code = %d" % status_code) logger.error( "Webhcat request failed with the following error: %s\n" % stdout) if runSmokeTestSetup: logger.info("**** Running Hive Smoke Test Setup ****") cls.smoke_test_setup() return 3
def setup_storm_hive_topology(cls, useStandaloneCmd): from beaver.component.hive import Hive storm_version = Storm.getVersion(useStandaloneCmd=True) hive_version = Hive.getVersion() HIVE_METASTORE_URI = Hive.getConfigValue( "hive.metastore.uris", defaultValue="thrift://localhost:9083") global HIVE_METASTORE_URI global HIVE_HOST global HIVE_PORT global HIVE_WAREHOUSE_DIR HIVE_WAREHOUSE_DIR = Hive.getConfigValue( "hive.metastore.warehouse.dir", defaultValue="/apps/hive/warehouse") HIVE_HOST = Hive.getHiveHost() HIVE_PORT = Hive.getMetastoreThriftPort() if Storm.isDalorBeyond(): JAVA_HIVE_SRC_DIR = os.path.join(Config.getEnv('WORKSPACE'), 'tests', 'rolling_upgrade', 'Storm', '2_3', 'storm-hive', 'java') else: JAVA_HIVE_SRC_DIR = os.path.join(Config.getEnv('WORKSPACE'), 'tests', 'rolling_upgrade', 'Storm', '2_2', 'storm-hive', 'java') # hive.txn.manager and hive.support.concurrency are set through ambari as per bug-40500 #logger.info("Restart Hive") #changes = {'hive-site.xml': {'hive.txn.manager': 'org.apache.hadoop.hive.ql.lockmgr.DbTxnManager', # 'hive.support.concurrency': 'true'}} #Hive.modifyConfig(changes, services=['metastore'], restartService=True) logger.info("Create test database in Hive") exit_code, stdout = Hive.runQuery( cls.get_set_queue_cmd(useStandaloneCmd) + " drop database if exists stormdb cascade; \ create database stormdb;") ruAssert("Storm", exit_code == 0, "[StormHiveSetup] Failed to create test database" + stdout) HDFS.chmod(runasUser=HDFS.getHDFSUser(), perm=777, directory=HIVE_WAREHOUSE_DIR + "/" + DATABASE_NAME + ".db") #copy tests/storm/storm-hive/java to artifacts/storm-hive-tests logger.info("JAVA_SRC_DIR " + JAVA_HIVE_SRC_DIR) logger.info("LOCAL_WORK_DIR " + LOCAL_HIVE_WORK_DIR) Machine.copy(JAVA_HIVE_SRC_DIR, LOCAL_HIVE_WORK_DIR, user=None, passwd=None) #mvn package if Machine.isWindows(): (_, _) = Maven.run( 'package -D%s=%s -D%s=%s -D%s=%s -D%s=%s' % (HADOOP_VERSION_MAVEN_PARAMETER, HADOOP_VERSION, STORM_VERSION_MAVEN_PARAMETER, storm_version, HIVE_VERSION_MAVEN_PARAMETER, hive_version, PUBLIC_REPO_MAVEN_PARAMETER, Maven.getPublicRepoUrl(), CORE_FILE_MAVEN_PARAMETER, CORE_FILE, HADOOP_CORE_MAVEN_PARAMETER, HADOOP_CONF, HIVE_CORE_MAVEN_PARAMETER, HIVE_CORE_DIR, HIVE_FILE_MAVEN_PARAMETER, HIVE_FILE), cwd=LOCAL_HIVE_WORK_DIR) else: (_, _) = Maven.run('package', cwd=LOCAL_HIVE_WORK_DIR, env={ HADOOP_VERSION_MAVEN_PARAMETER: HADOOP_VERSION, STORM_VERSION_MAVEN_PARAMETER: storm_version, HIVE_VERSION_MAVEN_PARAMETER: hive_version, PUBLIC_REPO_MAVEN_PARAMETER: Maven.getPublicRepoUrl(), CORE_FILE_MAVEN_PARAMETER: CORE_FILE, HADOOP_CONF_MAVEN_PARAMETER: HADOOP_CONF, HDFS_FILE_MAVEN_PARAMETER: HDFS_FILE, HADOOP_CORE_MAVEN_PARAMETER: HADOOP_CONF, HIVE_CORE_MAVEN_PARAMETER: HIVE_CORE_DIR, HIVE_FILE_MAVEN_PARAMETER: HIVE_FILE }) create_table_q = "use %s; \ drop table if exists %s; \ create table %s (id int, name string, phone string, street string) \ partitioned by (city string, state string) \ clustered by (id) into %s buckets \ stored as orc \ tblproperties ('transactional'='true');" % ( DATABASE_NAME, HIVE_TABLE_NAME, HIVE_TABLE_NAME, "5") exit_code, stdout = Hive.runQuery( cls.get_set_queue_cmd(useStandaloneCmd) + create_table_q) ruAssert( "Storm", exit_code == 0, "[StormHiveSetup] Failed to create test table userdata_partitioned" ) HDFS.chmod(runasUser=HDFS.getHDFSUser(), perm=777, directory=HIVE_WAREHOUSE_DIR + "/" + DATABASE_NAME + ".db/" + HIVE_TABLE_NAME)