def submit_storm_hive_topology(cls, tcId, className, args, useStandaloneCmd): if Hadoop.isSecure(): if Config.hasOption('machine', 'USER_REALM'): user_realm = Config.get('machine', 'USER_REALM', '') else: nnKerbPrincipal = HDFS.getNameNodePrincipal(defaultValue='') atloc = nnKerbPrincipal.find("@") if (atloc != -1): user_realm = nnKerbPrincipal[atloc:] if user_realm != None: args += " " + Machine.getHeadlessUserKeytab( Config.getEnv('USER')) + " " + Config.getEnv( 'USER') + '@' + user_realm exit_code, stdout = Storm.runStormHdfsTopology( TARGET_HIVE_STORM_JAR, className, args, None, logoutput=True, inBackground=False, useStandaloneCmd=useStandaloneCmd) logger.info(exit_code) ruAssert("Storm", exit_code == 0, "[StormHiveSubmit] %s Failed" % (tcId))
def setupHS2ConcurrencyDataset(): logger.info("Setup test data") data_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "hs2concur-test-data") data_tgz = os.path.join(Config.getEnv('WORKSPACE'), "hs2concur-test-data.tgz") if not os.path.isfile(data_tgz): assert util.downloadUrl(Config.get('hive', 'HS2CONCURR_TEST_DATA'), data_tgz) Machine.tarExtractAll(data_tgz, data_dir) # load data into HDFS hdfs_user = Config.get("hadoop", 'HDFS_USER') HDFS.createDirectory("/tmp/hs2data", user=hdfs_user, perm='777', force=True) HDFS.createDirectory("/tmp/hs2data/student", perm='777', force=True) HDFS.copyFromLocal(os.path.join(data_dir, 'studenttab10k'), "/tmp/hs2data/student") HDFS.createDirectory("/tmp/hs2data/voter", perm='777', force=True) HDFS.copyFromLocal(os.path.join(data_dir, 'votertab10k'), "/tmp/hs2data/voter") query = """drop table if exists student_txt; create external table student_txt (name string, age int, gpa double) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/student'; drop table if exists voter_txt; create external table voter_txt (name string, age int, registration string, contributions float) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/voter'; drop table if exists student; create table student (name string, age int, gpa double) CLUSTERED BY (name) INTO 20 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true'); drop table if exists voter; create table voter (name string, age int, registration string, contributions float) CLUSTERED BY (name) INTO 20 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true'); Insert into table student select * from student_txt; Insert into table voter select * from voter_txt;""" exit_code, stdout, stderr = Hive.runQueryOnBeeline(query, readFromFile=True, logoutput=True) assert exit_code == 0, "Test data creation failed"
def get_log_aggregation_Dir_Locations(cls): ''' Gets base dir to for log aggregation. Returns a tuple of (str, str, str, str). Returns a tuple of (cluster name, temporary cluster dir for logs, temporary dir for applications, test component) ''' config = ConfigParser() reportconf = os.path.join(Config.getEnv('ARTIFACTS_DIR'), 'test_report.conf') SECTION = "HW-QE-PUBLISH-REPORT" config.optionxform = str config.read(reportconf) CLUSTER_NAME = config.get(SECTION, "CLUSTER_NAME") logUtilCloudbreak.LOCAL_TMP_CLUSTER_DIR = os.path.join( Config.getEnv('ARTIFACTS_DIR'), CLUSTER_NAME) logUtilCloudbreak.LOCAL_TMP_APP_STORAGE = os.path.join( cls.LOCAL_TMP_CLUSTER_DIR, "Application-logs") logUtilCloudbreak.COMPONENT = '' if config.has_option(SECTION, 'TESTSUITE_COMPONENT'): logUtilCloudbreak.COMPONENT = config.get(SECTION, 'TESTSUITE_COMPONENT') logger.info("Set logUtilCloudbreak.COMPONENT to %s", logUtilCloudbreak.COMPONENT) return (CLUSTER_NAME, logUtilCloudbreak.LOCAL_TMP_CLUSTER_DIR, logUtilCloudbreak.LOCAL_TMP_APP_STORAGE, logUtilCloudbreak.COMPONENT)
def setupMondrianDataset(): DATABASE_NAME = 'foodmart' LOCAL_DATA_DIR = os.path.join(Config.getEnv('ARTIFACTS_DIR'), DATABASE_NAME) FOODMART_DDL = os.path.join(LOCAL_DATA_DIR, "foodmart.ddl") HADOOPQA_USER = Config.get("hadoop", 'HADOOPQA_USER') logger.info("Setup Mondrian dataset") if not os.path.exists(LOCAL_DATA_DIR): MONDRIAN_DATA_TGZ = LOCAL_DATA_DIR + ".tgz" assert util.downloadUrl(Config.get('hive', 'MONDRIAN_DATASET'), MONDRIAN_DATA_TGZ) Machine.tarExtractAll(MONDRIAN_DATA_TGZ, Config.getEnv('ARTIFACTS_DIR')) assert os.path.isdir(LOCAL_DATA_DIR) logger.info("create foodmart database and tables") HDFS.createDirectory("/tmp/mondrian", HADOOPQA_USER, perm='777', force=True) HDFS.copyFromLocal(LOCAL_DATA_DIR, "/tmp/mondrian", HADOOPQA_USER) HDFS.chmod(None, 777, "/tmp/mondrian", recursive=True) exit_code, stdout, stderr = Hive.runQueryOnBeeline( FOODMART_DDL, hivevar={ 'DB': 'foodmart', 'LOCATION': '/tmp/mondrian/foodmart' }, logoutput=True, queryIsFile=True ) assert exit_code == 0, "Unable to deploy foodmart dataset"
def switchDirectory(currentDirectory, component): if isCurrentDirectoryAPIFramework(currentDirectory): path = os.path.join(Config.getEnv('ARTIFACTS_DIR'), component) logger.info('Switched current dir to: %s ' % (path)) return path else: path = os.path.join(Config.getEnv('ARTIFACTS_DIR'), component, 'apitestframework') logger.info('Switched current dir to: %s ' % (path)) return path
def setupHS2ConcurrTestData(stdauth=True): # hive.support.concurrency is not in the whitelist, as this is a server setting and not something that user should/can set in a session. # In a case of Ranger and SQL std authorization, set hive.support.concurrency to true and restart HS2 changes = { 'hive-site.xml': { 'hive.txn.manager': 'org.apache.hadoop.hive.ql.lockmgr.DbTxnManager', 'hive.support.concurrency': 'true', 'hive.compactor.initiator.on': 'true', 'hive.compactor.worker.threads': '3', 'hive.compactor.check.interval': '10', 'hive.timedout.txn.reaper.interval': '20s' }, 'hiveserver2-site.xml': { 'hive.compactor.initiator.on': 'false', 'hive.exec.dynamic.partition.mode': 'nonstrict' } } if not Hive.isHive2(): changes['hiveserver2-site.xml']['hive.enforce.bucketing'] = 'true' else: changes['hiveserver2-site.xml']['hive.server2.enable.doAs'] = 'false' changes['hiveserver2-site.xml']['hive.txn.manager'] = 'org.apache.hadoop.hive.ql.lockmgr.DbTxnManager' changes['hiveserver2-site.xml']['hive.support.concurrency'] = 'true' Hive.modifyConfig(changes) time.sleep(60) data_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "hs2concur-test-data") data_tgz = os.path.join(Config.getEnv('WORKSPACE'), "hs2concur-test-data.tgz") if not os.path.isfile(data_tgz): assert util.downloadUrl(Config.get('hive', 'HS2CONCURR_TEST_DATA'), data_tgz) Machine.tarExtractAll(data_tgz, data_dir) # load data into HDFS hdfs_user = Config.get("hadoop", 'HDFS_USER') test_user = Config.get("hadoop", 'HADOOPQA_USER') HDFS.createDirectory("/tmp/hs2data", user=test_user, perm='777', force=True) HDFS.createDirectory("/tmp/hs2data/student", user=test_user, perm='777', force=True) HDFS.copyFromLocal(os.path.join(data_dir, 'studenttab10k'), "/tmp/hs2data/student") HDFS.createDirectory("/tmp/hs2data/voter", perm='777', force=True) HDFS.copyFromLocal(os.path.join(data_dir, 'votertab10k'), "/tmp/hs2data/voter") HDFS.createDirectory("/tmp/hs2data/customer_address", perm='777', force=True) HDFS.copyFromLocal(os.path.join(data_dir, 'customer_address10k'), "/tmp/hs2data/customer_address") query = """drop table if exists student; create external table student (name string, age int, gpa double) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/student'; drop table if exists voter; create external table voter (name string, age int, registration string, contributions float) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/voter'; drop table if exists customer_address; create external table customer_address (ca_address_sk int, ca_address_id string, ca_street_number string, ca_street_name string, ca_street_type string, ca_suite_number string, ca_city string, ca_county string, ca_state string, ca_zip string, ca_country string, ca_gmt_offset decimal(5,2), ca_location_type string) row format delimited fields terminated by '|' stored as textfile location '/tmp/hs2data/customer_address'; drop table if exists customer_address_partitioned; create table customer_address_partitioned (ca_address_sk int, ca_address_id string, ca_street_number string, ca_street_name string, ca_street_type string, ca_suite_number string, ca_city string, ca_county string, ca_state string, ca_zip string, ca_country string, ca_gmt_offset decimal(5,2)) partitioned by (ca_location_type string) clustered by (ca_state) into 50 buckets stored as orc tblproperties('transactional'='true'); insert into table customer_address_partitioned partition(ca_location_type) select ca_address_sk, ca_address_id, ca_street_number, ca_street_name, ca_street_type, ca_suite_number, ca_city, ca_county, ca_state, ca_zip, ca_country, ca_gmt_offset, ca_location_type from customer_address;""" if stdauth: query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table student to role public with grant option;" query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table voter to role public with grant option;" query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table customer_address_partitioned to role public with grant option;" exit_code, stdout, stderr = Hive.runQueryOnBeeline(query, readFromFile=True, logoutput=True) assert exit_code == 0, "Test data creation failed"
def __init__(self): super(RollingUpgrade, self).__init__() self.AMBARI_PROP_FILE = os.path.join(Config.getEnv('WORKSPACE'), '..', 'ambari_deploy', 'uifrm', 'ambari.properties') self.AMBARI_OLD_PROP_FILE = os.path.join(Config.getEnv('WORKSPACE'), '..', 'ambari_deploy', 'uifrm_old', 'uifrm', 'ambari.properties') self.COMPONENTS_TO_TEST = [] self.DO_DOWNGRADE = False
def get_local_job_summary_logs(cls, component): """ Copy Job_summary Logs to local dirs [artifacts/job_summary_local.log] parameter: component : Component name for which log collection is taking place return: List of Local copies of Job summary log Note: Some components need special handling where there are multiple Job Summary Log files such as HA and Falcon """ LocalJobSummaryLogs = [] try: if component == FALCON_STR: from beaver.component.falcon import Falcon # pylint: disable=redefined-outer-name host1 = Falcon.get_cluster_1_masters()['rm'] host2 = Falcon.get_cluster_2_masters()['rm'] host3 = Falcon.get_cluster_3_masters()['rm'] for host in [host1, host2, host3]: JobSummaryLog = ComponentLogUtil.MAPRED_getJobSummaryLogFile( host) LocalJobSummaryLog = os.path.join( Config.getEnv('ARTIFACTS_DIR'), "jobsummary_" + host + ".log") Machine.copyToLocal(None, host, JobSummaryLog, LocalJobSummaryLog, None) if Machine.pathExists(None, None, LocalJobSummaryLog, passwd=None): LocalJobSummaryLogs.append(LocalJobSummaryLog) else: for host in ComponentLogUtil.YARN_getRMHANodes(): JobSummaryLog = ComponentLogUtil.MAPRED_getJobSummaryLogFile( host) LocalJobSummaryLog = os.path.join( Config.getEnv('ARTIFACTS_DIR'), "jobsummary_" + host + ".log") Machine.copyToLocal(Machine.getAdminUser(), host, JobSummaryLog, LocalJobSummaryLog, Machine.getAdminPasswd()) Machine.chmod("777", LocalJobSummaryLog, user=Machine.getAdminUser(), passwd=Machine.getAdminPasswd()) if Machine.pathExists(Machine.getAdminUser(), None, LocalJobSummaryLog, passwd=Machine.getAdminPasswd()): LocalJobSummaryLogs.append(LocalJobSummaryLog) return LocalJobSummaryLogs except Exception as e: logger.info("Exception occurs at job_summary_log collection %s", e) tb = traceback.format_exc() logger.info(tb) return LocalJobSummaryLogs
def setup_capacity_scheduler(cls, components): """ Setup yarn capacity scheduler based on components. This API is not called during setup_module. :param components: list of components :type components: list of str :return: None """ if RuSetup._defaultQueue: components.append("default") logger.info("*** setup_capacity_scheduler ***") if RuSetup._skipQueue != None: logger.info("Components do not have a queue: " + str(RuSetup._skipQueue)) components = list(set(components) - RuSetup._skipQueue) logger.info("components = %s" % components) numComponents = len(components) percentPerQueue = 100.0 / numComponents percentPerQueueStr = "{0:0.2f}".format(percentPerQueue) xmlDict = {} rootQueues = ",".join(components) xmlDict["yarn.scheduler.capacity.root.queues"] = rootQueues for component in components: xmlDict["yarn.scheduler.capacity.root.%s.capacity" % component] = percentPerQueueStr xmlDict["yarn.scheduler.capacity.root.%s.user-limit-factor" % component] = 1 xmlDict["yarn.scheduler.capacity.root.%s.maximum-capacity" % component] = percentPerQueueStr xmlDict["yarn.scheduler.capacity.root.%s.state" % component] = "RUNNING" xmlDict["yarn.scheduler.capacity.root.%s.acl_submit_jobs" % component] = "*" xmlDict["yarn.scheduler.capacity.root.%s.acl_administer_jobs" % component] = "*" util.dumpTextString(xmlDict, "====== PLANNED QUEUES ======", "==================") master_capacity_file = os.path.join(Config.getEnv("WORKSPACE"), "tests", "rolling_upgrade", "yarn", "data", "capacity-scheduler.xml") modified_capacity_file = os.path.join(Config.getEnv("ARTIFACTS_DIR"), "capacity-scheduler.xml") Machine.copy(master_capacity_file, modified_capacity_file) util.writePropertiesToConfigXMLFile(modified_capacity_file, modified_capacity_file, xmlDict) #util.dumpText(modified_capacity_file, "====== capacity-scheduler.xml ======", "==================") if RuSetup._defaultQueue: components.remove("default") return modified_capacity_file
def doSetup(cls, hdfs_test_dir, tbl_name, num_of_rows, type): from beaver.component.hive import Hive from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode logger.info("Generating test table dataset with %d rows" % num_of_rows) test_data_file = os.path.join(Config.getEnv('ARTIFACTS_DIR'), tbl_name + ".dat") f = open(test_data_file, 'w') userid = 100000 for i in xrange(num_of_rows): for j in range(random.randint(3, 8)): f.write("%d|%d\n" % (userid + i, random.randint(10, 80))) f.close() hdfs_tbl_dir = hdfs_test_dir + "/" + tbl_name logger.info("Copying the test dataset to HDFS directory '%s'" % hdfs_tbl_dir) HDFS.createDirectory(hdfs_test_dir, user=cls._hdfs_user, perm='777', force=True) HDFS.createDirectory(hdfs_tbl_dir, perm='777') HDFS.copyFromLocal(test_data_file, hdfs_tbl_dir) HDFS.chmod(cls._hdfs_user, '777', hdfs_tbl_dir) logger.info("Creating table '%s' and verification tables" % tbl_name) query = "drop table if exists %s;\n" % tbl_name query += "create external table %s (userid string, age int) row format delimited fields terminated by '|' stored as textfile location '%s';\n" % ( tbl_name, hdfs_tbl_dir) query += "drop table if exists %s_hive_verify;\n" % tbl_name query += "create table %s_hive_verify (userid string, age int);\n" % tbl_name if type == "Long running": for i in range(cls._num_of_webhcat_bgj): query += "drop table if exists %s_wh_%d;\n" % (tbl_name, i + 1) query += "create table %s_wh_%d (userid string, age int);\n" % ( tbl_name, i + 1) hivesetupfile = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "hivesetup.sql") util.writeToFile(query, hivesetupfile) exit_code, stdout = Hive.run("-f " + hivesetupfile, logoutput=False) if type: msg = "%s job setup for Hive component" % type if exit_code != 0: UpgradePerNode.reportProgress( "[FAILED][Hive][Setup] %s failed due to exitcode = %d" % (msg, exit_code)) else: UpgradePerNode.reportProgress( "[PASSED][Hive][Setup] %s finished successfully" % msg)
def validate_apps(self, local_dir_name="small_rw_jobs"): # pylint: disable=unused-argument ''' Validate small apps passed :param local_dir_name: :return: ''' local_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), self.local_dir_name) appIds = [] for root, _dirs, filenames in os.walk(local_dir): for f in filenames: logfile = open(os.path.join(root, f), 'r') stdout = logfile.read() appId = YARN.getApplicationIDFromStdout(stdout, logoutput=False) appIds.append(appId) # Sleep for 30 seconds before checking App status time.sleep(30) status, d = YARN.checkAppsSucceeded(appIds, logPrefix=None, useWS=True, localDir=None) for app, status in d.items(): if status != "SUCCEEDED": appInfo = YARN.getApplicationInfo(app) logger.info(appInfo) if appInfo: assert appInfo[ 'state'] == 'ACCEPTED', "app is neither in ACCEPTED or SUCCEEDED State"
def getZipFile(cls, version=HBase.getVersionFromBuild(), isRU=False): # download for linux, no download for windows HBASE_VER_BUILD = version if Machine.isWindows(): zipFile = os.path.join( Config.get('slider', 'SLIDER_HOME'), "app-packages", "slider-hbase-app-win-package-%s.zip" % HBASE_VER_BUILD) return zipFile pkg_list = "pkg-list_qe.txt" path = os.path.join(Config.getEnv('ARTIFACTS_DIR'), pkg_list) #pkgUrl = Config.get('slider','APP_PKG_LIST') pkgUrl = Slider.getAppPackageBaseUrl( isRU) + "/slider-app-packages/" + pkg_list util.downloadUrl(pkgUrl, path) with open(path, 'r') as f: for line in f: if line.startswith("hbase_pkg_url="): url = line.strip()[14:] break zipFile = os.path.join( os.getcwd(), "slider-hbase-app-package-%s.zip" % HBASE_VER_BUILD) logger.info("downloading " + url) util.downloadUrl(url, zipFile) return zipFile
def runas(cls, user, cmd, cwd=None, env=None, logoutput=True, runInBackground=False): runCmd = Config.get('pig', 'PIG_CMD') + " " + cmd # initialize env if not env: env = {} # get kerberos ticket if Hadoop.isSecure(): if user is None: user = Config.getEnv('USER') kerbTicket = Machine.getKerberosTicket(user) env['KRB5CCNAME'] = kerbTicket user = None if runInBackground: return Machine.runinbackgroundAs(user, runCmd, cwd=cwd, env=env) else: return Machine.runas(user, runCmd, cwd=cwd, env=env, logoutput=logoutput)
def gather_cloudbreakartifacts_log( # pylint: disable=unused-argument cls, destHost, destUser, destPath, passwd, cleanupDirFirst=False, logoutput=False): ''' Gather artifacts to destination host with rsync. returns None ''' try: srcPath = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "*") cls.createDirInternal(destHost, destUser, destPath, passwd, logoutput=logoutput) from beaver.rsync import RSync RSync.rsyncToRemoteHost(user=None, host=None, passwd=None, srcPath=srcPath, destPath=destPath, destHost=destHost, destUser=destUser, logoutput=logoutput, flag="-rhp --chmod=u=rwx,g=rwx,o=r") except Exception as e: logger.info("Exception occurs at gather_artifacts_log. %s", e) tb = traceback.format_exc() logger.info(tb)
def createPoliciesFromJson(cls, file, serviceType, sourceHiveServiceName="mycluster0_hive", sourceHdfsServiceName="mycluster0_hadoop", targetServiceName=None, ambariWeburl=source_weburl, updateIfExists=False, polResource=None, isOverRideTrue=True): if Xa.isArgusInstalled(): servicesMapJson = Config.getEnv( 'ARTIFACTS_DIR') + '/' + datetime.datetime.now().strftime( "%Y%m%d%H%M%S") + 'service_mapping.json' serviceName = "hadoop" if serviceType == "hdfs" else serviceType if targetServiceName is None: targetServiceName = \ Xa.findRepositories(nameRegex="^.*_" + serviceName + "$", type=serviceType, status=True, ambariWeburl=ambariWeburl)[0]['name'] f = open(servicesMapJson, 'w') if serviceType == "hive": f.write('{"' + sourceHiveServiceName + '":"' + targetServiceName + '"}') elif serviceType == "hdfs": f.write('{"' + sourceHdfsServiceName + '":"' + targetServiceName + '"}') f.close() Xa.importPoliciesInJsonFile(file, serviceType, servicesMapJson=servicesMapJson, ambariWeburl=ambariWeburl, updateIfExists=updateIfExists, polResource=polResource, isOverRideTrue=isOverRideTrue)
def kinitas(cls, keytabUser, principal=None, keytabFile=None, flag="-f", logoutput=True, host=None): ''' Runs kinit as specified keytab user. Returns (exit_code, stdout). ''' # Build common variables and cache them. cls._buildCommonVar() # If keytabUser is None, use current user if keytabUser is None: keytabUser = Config.getEnv('USER') # Get kerberos ticket location e.g. /grid/0/hadoopqe/artifacts/kerberosTickets/hrt_qa.kerberos.ticket kerbTicket = Machine.getKerberosTicket(user=keytabUser) # If keytab is unset, use default keytab path e.g. /home/hrt_qa/hadoopqa/keytabs/hrt_qa.headless.keytab if keytabFile is None: keytabFile = Machine.getHeadlessUserKeytab(keytabUser) # If principal is not set, use keytab user. if principal is None: principal = keytabUser # Build command cmd = "%s -c %s -k -t %s %s %s" % ( cls._kinitloc, kerbTicket, keytabFile, flag, Machine.get_user_principal(principal)) # kinit always runs with credential of current user. return Machine.runas(user=None, cmd=cmd, env=None, logoutput=logoutput, host=host)
def verifyLongRunningQuery(cls, file_to_verify): lfile = os.path.join(Config.getEnv('ARTIFACTS_DIR'), file_to_verify) exit_code, stdout = HDFS.copyToLocal( cls._hdfs_bgjtest_dir + "/" + file_to_verify, lfile) if exit_code != 0: logger.info("Error fetching the timestamp file from HDFS") return False lines = open(lfile, 'r').readlines() if len(lines) == 0: logger.info("Empty timestamp file") return False try: ts = int(lines[-1]) # Shutdown gracefully if ts == -1: return True # Timestamp should be less than 5 minutes, which indicates # UDF wrote something atleast once in the last 5 minutes timegap = time.time() - (ts / 1000) if timegap > 300: logger.info( "Time gap is %d seconds, last line in the timestamp file was '%d'" % (timegap, ts)) return False except ValueError: logger.info("Error parsing last line in the timestamp file => '" + lines[-1] + "'") return False return True
def setupTableauDataset(): LOCAL_DATA_DIR = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "tableau") DATA_DIR = os.path.join(LOCAL_DATA_DIR, 'data') SCHEMA_SQL_DIR = os.path.join(LOCAL_DATA_DIR, 'schema_3.0') HIVE_TABLES = [ 'Batters', 'Calcs', 'DateBins', 'DateTime', 'Election', 'FischerIris', 'Loan', 'NumericBins', 'REI', 'SeattleCrime', 'Securities', 'SpecialData', 'Staples', 'Starbucks', 'UTStarcom', 'xy' ] TABLEAU_TEST_DIR = "/user/hrt_qa/tableau" DATABASE_NAME = 'tableau' logger.info("Setup Tableau dataset") if not os.path.exists(LOCAL_DATA_DIR): TABLEAU_DATA_TGZ = LOCAL_DATA_DIR + ".tgz" assert util.downloadUrl(Config.get('hive', 'TABLEAU_DATASET'), TABLEAU_DATA_TGZ) Machine.tarExtractAll(TABLEAU_DATA_TGZ, Config.getEnv('ARTIFACTS_DIR')) assert os.path.isdir(LOCAL_DATA_DIR) logger.info("create test directory on hdfs to store tableau data files") HDFS.createDirectory(TABLEAU_TEST_DIR, user=HDFS_USER, perm='777', force=True) logger.info("create tableau database before creating tables") Hive.runQueryOnBeeline("DROP DATABASE IF EXISTS %s" % DATABASE_NAME) Hive.runQueryOnBeeline("CREATE DATABASE IF NOT EXISTS %s" % DATABASE_NAME) for tbl in HIVE_TABLES: hdfsDir = TABLEAU_TEST_DIR + '/%s' % tbl hdfsFile = hdfsDir + '/%s' % tbl localFile = os.path.join(DATA_DIR, '%s.tbl' % tbl) sqlFile = os.path.join(SCHEMA_SQL_DIR, '%s.sql' % tbl) logger.info("create directory for %s table" % tbl) exit_code, stdout = HDFS.createDirectory(hdfsDir, perm='777', force=True) assert exit_code == 0, 'Could not create dir for table %s on hdfs.' % tbl logger.info("copy file for table %s to hdfs" % tbl) exit_code, stdout = HDFS.copyFromLocal(localFile, hdfsFile) assert exit_code == 0, 'Could not copy file for table %s to hdfs.' % tbl logger.info("create %s table " % tbl) # thing-to-do Modify Hive.runQueryonBeeline to accept query file name exit_code, stdout, stderr = Hive.runQueryOnBeeline( ReadFromFile(sqlFile), readFromFile=True, hivevar={'HDFS_LOCATION': hdfsDir}, logoutput=True ) assert exit_code == 0, '%s table creation failed' % tbl
def get_testcase_status_txt(cls): """ Get test_case_status.txt location :return: """ return os.path.join( os.path.join(Config.getEnv('ARTIFACTS_DIR'), "test_case_status.txt"))
def __init__(self): self.COMPONENT = Config.get('ambari', 'COMPONENT') self.TESTSUITE_FILE = Config.get('ambari', 'TESTSUITE_FILE') self.PYTEST_FILE = os.path.join(Config.getEnv('WORKSPACE'), 'beaver', 'component', 'upgrades', 'upgradecommon.py') self.SRC_DIR = os.path.join(Config.getEnv('WORKSPACE'), 'uifrm') if 'preupgrade' in self.COMPONENT or 'installold' in self.COMPONENT: self.SRC_DIR = os.path.join(Config.getEnv('WORKSPACE'), 'uifrm_old', 'uifrm') self.LOCAL_WORK_DIR = os.path.join(Config.getEnv('ARTIFACTS_DIR'), self.COMPONENT) self.LOCAL_WORK_DIR_UIFRM = os.path.join( Config.getEnv('ARTIFACTS_DIR'), self.COMPONENT) self.DEPLOY_CODE_DIR = os.path.join(Config.getEnv('WORKSPACE'), '..', 'ambari_deploy') self.JAVA_HOME = Config.get('machine', 'JAVA_HOME') self.DISPLAY = Config.getEnv('DISPLAY') self.RUN_MARKER_VERSION = Config.get('ambari', 'RUN_MARKER_VERSION') self.RUN_MARKER_LIST = Config.get('ambari', 'RUN_MARKER_LIST') self.STACK_TYPE = Config.get('ambari', 'STACK_TYPE') self.TEST_RESULT = {} self.TESTCASES = [] self.env = {} self.step_counter = 0 self.source_stack_version = '' self.target_stack_version = Config.get('ambari', 'STACK_UPGRADE_TO') self.isambari_upgrade_success = False if (len(Config.get('ambari', 'UPGRADE_TO').strip()) > 0): self.is_ambari_upgrade = True else: self.is_ambari_upgrade = False
def setup_storm_slider_app(cls): sys.path.insert( 0, os.path.join(Config.getEnv('WORKSPACE'), 'tests', 'nonnightly', 'storm-slider-ru', 'aaaa')) from test_startStorm import setupStorm setupStorm( ) # This kills any currently running storm app and creates a new one.
class Ant(object): # set the ant version _version = '1.8.4' # determine java home _java_home = Config.get('machine', 'JAVA_HOME') _worksapce = Config.getEnv('WORKSPACE') # determine tools path _tools_path = os.path.join(_worksapce, 'tools') # determine ant home _ant_home = os.path.join(_tools_path, 'apache-ant-' + _version) _ant_cmd = os.path.join(_ant_home, 'bin', 'ant') # what url to download ant from # need to find a solution for windows until then the url will live here _ant_download_url = Config.get('machine', 'ANT_URL') def __init__(self): pass #method to run ant cmd @classmethod def run(cls, cmd, cwd=None, env=None, logoutput=True, user=None): # make sure maven is setup before its run cls.setupAnt() # initialize env if not env: env = {} # if running a secure cluster get a kerb ticket if Hadoop.isSecure(): env['KRB5CCNAME'] = Machine.getKerberosTicket(user) env['JAVA_HOME'] = cls._java_home env['ANT_HOME'] = cls._ant_home run_cmd = "%s -Dbuild.compiler=javac1.7 %s" % (cls._ant_cmd, cmd) exit_code, stdout = Machine.run(run_cmd, cwd=cwd, env=env, logoutput=logoutput) return exit_code, stdout # method to setup maven @classmethod def setupAnt(cls): # if dir exists return as its already setup if os.path.isdir(cls._ant_home): return # check if tarball exists or not before downloading it tarName = "apache-ant-" + cls._version + ".tar.gz" tarballPath = os.path.join(cls._worksapce, tarName) if not os.path.isfile(tarballPath): # download ant assert util.downloadUrl(cls._ant_download_url, tarballPath) # now untar ant on windows you have to run the tar cmd from the dir where # the file exists else it fails Machine.tarExtractAll(filepath=tarballPath, outpath=cls._tools_path, mode='r:gz') assert os.path.isfile(cls._ant_cmd)
def background_job_when_master_upgrade(cls): ''' Start a background application which runs while component master service gets upgraded :return: ''' from beaver.component.hive import Hive from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode UpgradePerNode.reportProgress( "[INFO][Hive][BGJob] Background Job test setup when upgrading Hive started" ) logger.info("Creating hive tables for short background jobs") query = "drop table if exists shortlr_hive_verify;\n" query += "create table shortlr_hive_verify (userid string, age int);\n" query += "drop table if exists shortlr_bline_verify;\n" query += "create table shortlr_bline_verify (userid string, age int);\n" query += "drop table if exists shortlr_bline_verify;\n" query += "create table shortlr_bline_verify (userid string, age int);\n" short_bgjob_setupfile = os.path.join(Config.getEnv('ARTIFACTS_DIR'), 'shortlrsetup.sql') util.writeToFile(query, short_bgjob_setupfile) exit_code, stdout = Hive.run("-f " + short_bgjob_setupfile) if exit_code != 0: UpgradePerNode.reportProgress( "[FAILED][Hive][BGJob] Background Job test setup when Hive upgrades failed due to exitcode = %d" % exit_code) logger.info("Running the Background Job when upgrading Hive") UpgradePerNode.reportProgress( "[INFO][Hive][BGJob] Long running job for Hive component upgrades started" ) setqueue = "" if Hive.isTezEnabled(): setqueue = "set tez.queue.name=%s; " % cls._yarn_queue else: setqueue = "set mapred.job.queue.name=%s; " % cls._yarn_queue logger.info("**** Running Hive CLI Test ****") query = setqueue + " insert overwrite table shortlr_hive_verify select userid, avg(age) from %s group by userid order by userid;" % cls._bgjtest_tbl cls._shortbgj_hive_process = Hive.runQuery(query, background=True) # Sleeping for 10 seconds to make sure that query initializes before Metastore is restarted time.sleep(10) logger.info("**** Running Beeline CLI Test ****") query = setqueue + "\ninsert overwrite table shortlr_bline_verify select userid, avg(age) from %s group by userid order by userid;" % cls._bgjtest_tbl cls._shortbgj_bline_process = Hive.runQueryOnBeeline(query, readFromFile=True, background=True) UpgradePerNode.reportProgress( "[INFO][Hive][BGJob] Background Job test setup when Hive upgrades finished" )
def checkClasspathVersion(cls, Version_Num, config=None): Local_Test_dir = os.path.join(Config.getEnv("WORKSPACE"), "tests", "rolling_upgrade", "yarn") Multi_Version_App_Dir = os.path.join(Local_Test_dir, "data") Mapper = "data/versionVerifyMapper.py" Reducer = "data/versionVerifyReducer.py" Verify_File_Name = "test.txt" Verify_Test_File = os.path.join(Multi_Version_App_Dir, Verify_File_Name) # Set up env mapred_app_path = MAPRED.getConfigValue( "mapreduce.application.framework.path", None) mapred_classpath = MAPRED.getConfigValue( "mapreduce.application.classpath", None) env = { "mapreduce.application.framework.path": mapred_app_path, "mapreduce.application.classpath": mapred_classpath } verifyInput = cls._hdfs_input + "/verify" HDFS.createDirectory(verifyInput, None, "777", False) # Copy template files for the verifier streaming job templateFile = open(Verify_Test_File, 'w') templateFile.write(Version_Num) templateFile.close() HDFS.copyFromLocal(Verify_Test_File, verifyInput, user=Config.get('hadoop', 'HADOOPQA_USER')) # Submit the special streaming job shortStreamingId = HadoopJobHelper.runStreamJob( Mapper, Reducer, verifyInput, cls._hdfs_output_verify, files=Multi_Version_App_Dir, config=config, extraJobArg=cls._jobArgs, env=env, proposedJobName=cls._shortStreamingName) MAPRED.waitForJobDoneOrTimeout(shortStreamingId, timeoutInSec=180) # Make sure task succeeded #assert YARN.getAppFinalStateFromID(appId) == 'SUCCEEDED' # Check result content retVal, checkContent = HDFS.cat(cls._hdfs_output_verify + '/part-00000') logger.info("CHECK CLASSPATH VERSION OUTPUT") logger.info(retVal) logger.info(checkContent) ruAssert("YARN", retVal == 0) ruAssert("YARN", 'True' in checkContent, "[VersionVerify] Stream job returns false: " + checkContent) #assert retVal == 0 #assert 'True' in checkContent, "Stream job returns false: " + checkContent #assert 'False' not in checkContent, "Stream job returns false: " + checkContent HDFS.deleteDirectory(cls._hdfs_output_verify, user=Config.get('hadoop', 'HADOOPQA_USER'))
def run(self): """ Move files to HDFS Input Dir after each interval period for n times. """ for count in range(0, self.times): text = "hello world \n Testing HDFS Word count Spark application" random_name = ''.join( random.choice(string.lowercase) for i in range(5)) filename = os.path.join(Config.getEnv('ARTIFACTS_DIR'), random_name) util.writeToFile(text, filename, isAppend=False) max_retry = 3 count = 0 while count < max_retry: try: if "hdfs://ns2" in self.hdfs_input_dir: cp_status = HDFS.copyFromLocal(filename, "hdfs://ns2/tmp", enableDebug=True) else: cp_status = HDFS.copyFromLocal(filename, "/tmp", enableDebug=True) assert cp_status[ 0] == 0, "Failed to copy file to HDFS 'tmp'" logger.info("copyFromLocal command finished for %s" % filename) if "hdfs://ns2" in self.hdfs_input_dir: mv_status = HDFS.mv(None, "hdfs://ns2/tmp/" + random_name, self.hdfs_input_dir, config=None) else: mv_status = HDFS.mv(None, "/tmp/" + random_name, self.hdfs_input_dir, config=None) assert mv_status[ 0] == 0, "Failed to move file from 'tmp' to test directory" except: if count < max_retry: count = count + 1 logger.info( "File copy into HDFS test directory failed after %s attempts, retrying after 120s sleep interval" % count) time.sleep(120) else: logger.error( "Failed to copy file into HDFS test directory, expect failures in HDFSWordCOunt" ) else: break logger.info("%s moved to %s" % (filename, self.hdfs_input_dir)) logger.info("sleeping for %s seconds" % self.interval) time.sleep(self.interval)
def runas(cls, user, cmd, env=None, logoutput=True, runKinit=True): if Ambari.is_cluster_secure() and runKinit: if user is None: user = Config.getEnv('USER') kerbTicket = Machine.getKerberosTicket(user=user, rmIfExists=True) if not env: env = {} env['KRB5CCNAME'] = kerbTicket user = None return Machine.runas(user, cmd, env=env, logoutput=logoutput)
def verifyLogMessageInServiceLog(text, service, timestamp=0, dateTimeFormat=None): ''' Returns true when given log message appears in service log ''' hiveLog = Hive.getServiceLog(service) if not hiveLog or not text: return None hiveHost = Hive.getHiveHost(service) destlog = os.path.join(Config.getEnv('ARTIFACTS_DIR'), 'tmp-%d.log' % int(999999 * random.random())) Machine.copyToLocal(None, hiveHost, hiveLog, destlog) return util.findMatchingPatternInFileAfterTimestamp(destlog, text, timestamp, dateTimeFormat=dateTimeFormat)
def getMiscTestLogPaths(cls, logoutput=False): HADOOPQE_TESTS_DIR = Config.getEnv("WORKSPACE") miscTestLogPaths = [ os.path.join(HADOOPQE_TESTS_DIR, "templeton", "src", "test", "e2e", "templeton", "testdist", "test_harnesss_*") ] if logoutput: logger.info("Hcatalog.getMiscTestLogPaths returns %s" % str(miscTestLogPaths)) return miscTestLogPaths
def background_job_setup(cls, runSmokeTestSetup=True, config=None): ''' Upload Data to HDFS before Upgrade starts Creates /user/hrt_qa/test_rollingupgrade dir on HDFS Upload 20 files to /user/hrt_qa/test_rollingupgrade ''' if not cls._base_hdfs_dir: cls._base_hdfs_dir = '/user/%s/test_rollingupgrade' % Config.get( 'hadoop', 'HADOOPQA_USER') exit_code, stdout = HDFS.createDirectory(cls._base_hdfs_dir, force=True) ruAssert("HDFS", exit_code == 0, '[BGJobSetup] could not create dir on hdfs.') LOCAL_WORK_DIR = os.path.join(Config.getEnv('ARTIFACTS_DIR'), 'HDFS_RU_TEST') localTestWorkDir1 = os.path.join(LOCAL_WORK_DIR, "Temp_data") HadoopJobHelper.runCustomWordWriter(LOCAL_WORK_DIR, localTestWorkDir1, 20, 40, 1000) HDFS.copyFromLocal(os.path.join(localTestWorkDir1, "*"), cls._base_hdfs_dir) # set up for loadGenerator cls._lgTestDataDir = cls._base_hdfs_dir + '/testData' cls._lgTestOutputDir = cls._base_hdfs_dir + '/lg_job' cls._lgStructureDir = Machine.getTempDir() + "/structure" # test dir setup HDFS.deleteDirectory(cls._lgTestDataDir) HDFS.deleteDirectory(cls._lgTestOutputDir) command = "rm -rf " + cls._lgStructureDir exit_code, stdout = Machine.runas(Machine.getAdminUser(), command, None, None, None, "True", Machine.getAdminPasswd()) command = "mkdir " + cls._lgStructureDir stdout = Machine.runas(None, command, None, None, None, "True", None) Machine.chmod("777", cls._lgStructureDir, "True", Machine.getAdminUser(), None, Machine.getAdminPasswd()) HADOOP_TEST_JAR = cls.get_hadoop_test_jar() TEST_USER = Config.get('hadoop', 'HADOOPQA_USER') # structure generator jobCmd = 'jar %s NNstructureGenerator -maxDepth 5 -minWidth 2 -maxWidth 5 -numOfFiles 100 -avgFileSize 3 -outDir %s' % ( HADOOP_TEST_JAR, cls._lgStructureDir) exit_code, stdout = Hadoop.run(jobCmd) ruAssert("HDFS", exit_code == 0, "[BGJobSetup] StructureGenerator failed") # data generator jobCmd = 'jar %s NNdataGenerator -inDir %s -root %s' % ( HADOOP_TEST_JAR, cls._lgStructureDir, cls._lgTestDataDir) exit_code, stdout = Hadoop.run(jobCmd) ruAssert("HDFS", exit_code == 0, "[BGJobSetup] DataGenerator failed") if runSmokeTestSetup: logger.info("**** Running HDFS Smoke Test Setup ****") cls.smoke_test_setup()
def take_screenshot(self, test_name): try: ts = time.time() currentTime = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d-%H-%M-%S') # Adding epoch time to get unique screenshot names as its observed that # screenshots taken in the same second are replaced epochTime = str(time.time() * 1000) filename = "%s-%s-%s.png" % (test_name, currentTime, epochTime) logger.info("------ capturing screenshot to file: %s" % (filename)) self.driver.get_screenshot_as_file(os.path.join(Config.getEnv('ARTIFACTS_DIR'), filename)) except Exception as e: logger.error("%s" % e)
def getArtifactsDir(cls): return Config.getEnv('ARTIFACTS_DIR')
def getTempFilepath(): return os.path.join(Config.getEnv('ARTIFACTS_DIR'), 'tmp-%d' % int(999999*random.random()))