Example #1
0
    def submit_storm_hive_topology(cls, tcId, className, args,
                                   useStandaloneCmd):
        if Hadoop.isSecure():
            if Config.hasOption('machine', 'USER_REALM'):
                user_realm = Config.get('machine', 'USER_REALM', '')
            else:
                nnKerbPrincipal = HDFS.getNameNodePrincipal(defaultValue='')
                atloc = nnKerbPrincipal.find("@")
                if (atloc != -1):
                    user_realm = nnKerbPrincipal[atloc:]
            if user_realm != None:
                args += " " + Machine.getHeadlessUserKeytab(
                    Config.getEnv('USER')) + " " + Config.getEnv(
                        'USER') + '@' + user_realm

        exit_code, stdout = Storm.runStormHdfsTopology(
            TARGET_HIVE_STORM_JAR,
            className,
            args,
            None,
            logoutput=True,
            inBackground=False,
            useStandaloneCmd=useStandaloneCmd)
        logger.info(exit_code)

        ruAssert("Storm", exit_code == 0,
                 "[StormHiveSubmit] %s Failed" % (tcId))
Example #2
0
def setupHS2ConcurrencyDataset():
    logger.info("Setup test data")
    data_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "hs2concur-test-data")
    data_tgz = os.path.join(Config.getEnv('WORKSPACE'), "hs2concur-test-data.tgz")
    if not os.path.isfile(data_tgz):
        assert util.downloadUrl(Config.get('hive', 'HS2CONCURR_TEST_DATA'), data_tgz)
    Machine.tarExtractAll(data_tgz, data_dir)
    # load data into HDFS
    hdfs_user = Config.get("hadoop", 'HDFS_USER')
    HDFS.createDirectory("/tmp/hs2data", user=hdfs_user, perm='777', force=True)
    HDFS.createDirectory("/tmp/hs2data/student", perm='777', force=True)
    HDFS.copyFromLocal(os.path.join(data_dir, 'studenttab10k'), "/tmp/hs2data/student")
    HDFS.createDirectory("/tmp/hs2data/voter", perm='777', force=True)
    HDFS.copyFromLocal(os.path.join(data_dir, 'votertab10k'), "/tmp/hs2data/voter")
    query = """drop table if exists student_txt;
        create external table student_txt (name string, age int, gpa double) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/student';
        drop table if exists voter_txt;
        create external table voter_txt (name string, age int, registration string, contributions float) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/voter';
        drop table if exists student;
        create table student (name string, age int, gpa double) CLUSTERED BY (name) INTO 20 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true');
        drop table if exists voter;
        create table voter (name string, age int, registration string, contributions float) CLUSTERED BY (name) INTO 20 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true');
        Insert into table student select * from student_txt;
        Insert into table voter select * from voter_txt;"""

    exit_code, stdout, stderr = Hive.runQueryOnBeeline(query, readFromFile=True, logoutput=True)
    assert exit_code == 0, "Test data creation failed"
Example #3
0
 def get_log_aggregation_Dir_Locations(cls):
     '''
     Gets base dir to for log aggregation.
     Returns a tuple of (str, str, str, str).
     Returns a tuple of (cluster name, temporary cluster dir for logs,
                         temporary dir for applications, test component)
     '''
     config = ConfigParser()
     reportconf = os.path.join(Config.getEnv('ARTIFACTS_DIR'),
                               'test_report.conf')
     SECTION = "HW-QE-PUBLISH-REPORT"
     config.optionxform = str
     config.read(reportconf)
     CLUSTER_NAME = config.get(SECTION, "CLUSTER_NAME")
     logUtilCloudbreak.LOCAL_TMP_CLUSTER_DIR = os.path.join(
         Config.getEnv('ARTIFACTS_DIR'), CLUSTER_NAME)
     logUtilCloudbreak.LOCAL_TMP_APP_STORAGE = os.path.join(
         cls.LOCAL_TMP_CLUSTER_DIR, "Application-logs")
     logUtilCloudbreak.COMPONENT = ''
     if config.has_option(SECTION, 'TESTSUITE_COMPONENT'):
         logUtilCloudbreak.COMPONENT = config.get(SECTION,
                                                  'TESTSUITE_COMPONENT')
         logger.info("Set logUtilCloudbreak.COMPONENT to %s",
                     logUtilCloudbreak.COMPONENT)
     return (CLUSTER_NAME, logUtilCloudbreak.LOCAL_TMP_CLUSTER_DIR,
             logUtilCloudbreak.LOCAL_TMP_APP_STORAGE,
             logUtilCloudbreak.COMPONENT)
Example #4
0
def setupMondrianDataset():
    DATABASE_NAME = 'foodmart'
    LOCAL_DATA_DIR = os.path.join(Config.getEnv('ARTIFACTS_DIR'), DATABASE_NAME)
    FOODMART_DDL = os.path.join(LOCAL_DATA_DIR, "foodmart.ddl")
    HADOOPQA_USER = Config.get("hadoop", 'HADOOPQA_USER')

    logger.info("Setup Mondrian dataset")
    if not os.path.exists(LOCAL_DATA_DIR):
        MONDRIAN_DATA_TGZ = LOCAL_DATA_DIR + ".tgz"
        assert util.downloadUrl(Config.get('hive', 'MONDRIAN_DATASET'), MONDRIAN_DATA_TGZ)
        Machine.tarExtractAll(MONDRIAN_DATA_TGZ, Config.getEnv('ARTIFACTS_DIR'))
        assert os.path.isdir(LOCAL_DATA_DIR)

    logger.info("create foodmart database and tables")
    HDFS.createDirectory("/tmp/mondrian", HADOOPQA_USER, perm='777', force=True)
    HDFS.copyFromLocal(LOCAL_DATA_DIR, "/tmp/mondrian", HADOOPQA_USER)
    HDFS.chmod(None, 777, "/tmp/mondrian", recursive=True)
    exit_code, stdout, stderr = Hive.runQueryOnBeeline(
        FOODMART_DDL,
        hivevar={
            'DB': 'foodmart',
            'LOCATION': '/tmp/mondrian/foodmart'
        },
        logoutput=True,
        queryIsFile=True
    )
    assert exit_code == 0, "Unable to deploy foodmart dataset"
Example #5
0
def switchDirectory(currentDirectory, component):
    if isCurrentDirectoryAPIFramework(currentDirectory):
        path = os.path.join(Config.getEnv('ARTIFACTS_DIR'), component)
        logger.info('Switched current dir to: %s ' % (path))
        return path
    else:
        path = os.path.join(Config.getEnv('ARTIFACTS_DIR'), component, 'apitestframework')
        logger.info('Switched current dir to: %s ' % (path))
        return path
Example #6
0
def setupHS2ConcurrTestData(stdauth=True):
    # hive.support.concurrency is not in the whitelist, as this is a server setting and not something that user should/can set in a session.
    # In a case of Ranger and SQL std authorization, set hive.support.concurrency to true and restart HS2
    changes = {
        'hive-site.xml': {
            'hive.txn.manager': 'org.apache.hadoop.hive.ql.lockmgr.DbTxnManager',
            'hive.support.concurrency': 'true',
            'hive.compactor.initiator.on': 'true',
            'hive.compactor.worker.threads': '3',
            'hive.compactor.check.interval': '10',
            'hive.timedout.txn.reaper.interval': '20s'
        },
        'hiveserver2-site.xml': {
            'hive.compactor.initiator.on': 'false',
            'hive.exec.dynamic.partition.mode': 'nonstrict'
        }
    }
    if not Hive.isHive2():
        changes['hiveserver2-site.xml']['hive.enforce.bucketing'] = 'true'
    else:
        changes['hiveserver2-site.xml']['hive.server2.enable.doAs'] = 'false'
        changes['hiveserver2-site.xml']['hive.txn.manager'] = 'org.apache.hadoop.hive.ql.lockmgr.DbTxnManager'
        changes['hiveserver2-site.xml']['hive.support.concurrency'] = 'true'
    Hive.modifyConfig(changes)
    time.sleep(60)
    data_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "hs2concur-test-data")
    data_tgz = os.path.join(Config.getEnv('WORKSPACE'), "hs2concur-test-data.tgz")
    if not os.path.isfile(data_tgz):
        assert util.downloadUrl(Config.get('hive', 'HS2CONCURR_TEST_DATA'), data_tgz)
    Machine.tarExtractAll(data_tgz, data_dir)
    # load data into HDFS
    hdfs_user = Config.get("hadoop", 'HDFS_USER')
    test_user = Config.get("hadoop", 'HADOOPQA_USER')
    HDFS.createDirectory("/tmp/hs2data", user=test_user, perm='777', force=True)
    HDFS.createDirectory("/tmp/hs2data/student", user=test_user, perm='777', force=True)
    HDFS.copyFromLocal(os.path.join(data_dir, 'studenttab10k'), "/tmp/hs2data/student")
    HDFS.createDirectory("/tmp/hs2data/voter", perm='777', force=True)
    HDFS.copyFromLocal(os.path.join(data_dir, 'votertab10k'), "/tmp/hs2data/voter")
    HDFS.createDirectory("/tmp/hs2data/customer_address", perm='777', force=True)
    HDFS.copyFromLocal(os.path.join(data_dir, 'customer_address10k'), "/tmp/hs2data/customer_address")
    query = """drop table if exists student;
create external table student (name string, age int, gpa double) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/student';
drop table if exists voter;
create external table voter (name string, age int, registration string, contributions float) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/voter';
drop table if exists customer_address;
create external table customer_address (ca_address_sk int, ca_address_id string, ca_street_number string, ca_street_name string, ca_street_type string, ca_suite_number string, ca_city string, ca_county string, ca_state string, ca_zip string, ca_country string, ca_gmt_offset decimal(5,2), ca_location_type string) row format delimited fields terminated by '|' stored as textfile location '/tmp/hs2data/customer_address';
drop table if exists customer_address_partitioned;
create table customer_address_partitioned (ca_address_sk int, ca_address_id string, ca_street_number string, ca_street_name string, ca_street_type string, ca_suite_number string, ca_city string, ca_county string, ca_state string, ca_zip string, ca_country string, ca_gmt_offset decimal(5,2)) partitioned by (ca_location_type string) clustered by (ca_state) into 50 buckets stored as orc tblproperties('transactional'='true');
insert into table customer_address_partitioned partition(ca_location_type) select ca_address_sk, ca_address_id, ca_street_number, ca_street_name, ca_street_type, ca_suite_number, ca_city, ca_county, ca_state, ca_zip, ca_country, ca_gmt_offset, ca_location_type from customer_address;"""
    if stdauth:
        query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table student to role public with grant option;"
        query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table voter to role public with grant option;"
        query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table customer_address_partitioned to role public with grant option;"
    exit_code, stdout, stderr = Hive.runQueryOnBeeline(query, readFromFile=True, logoutput=True)
    assert exit_code == 0, "Test data creation failed"
Example #7
0
 def __init__(self):
     super(RollingUpgrade, self).__init__()
     self.AMBARI_PROP_FILE = os.path.join(Config.getEnv('WORKSPACE'), '..',
                                          'ambari_deploy', 'uifrm',
                                          'ambari.properties')
     self.AMBARI_OLD_PROP_FILE = os.path.join(Config.getEnv('WORKSPACE'),
                                              '..', 'ambari_deploy',
                                              'uifrm_old', 'uifrm',
                                              'ambari.properties')
     self.COMPONENTS_TO_TEST = []
     self.DO_DOWNGRADE = False
Example #8
0
 def get_local_job_summary_logs(cls, component):
     """
     Copy Job_summary Logs to local dirs [artifacts/job_summary_local.log]
     parameter: component : Component name for which log collection is taking place
     return: List of Local copies of Job summary log
     Note: Some components need special handling where there are multiple Job Summary Log files
           such as HA and Falcon
     """
     LocalJobSummaryLogs = []
     try:
         if component == FALCON_STR:
             from beaver.component.falcon import Falcon  # pylint: disable=redefined-outer-name
             host1 = Falcon.get_cluster_1_masters()['rm']
             host2 = Falcon.get_cluster_2_masters()['rm']
             host3 = Falcon.get_cluster_3_masters()['rm']
             for host in [host1, host2, host3]:
                 JobSummaryLog = ComponentLogUtil.MAPRED_getJobSummaryLogFile(
                     host)
                 LocalJobSummaryLog = os.path.join(
                     Config.getEnv('ARTIFACTS_DIR'),
                     "jobsummary_" + host + ".log")
                 Machine.copyToLocal(None, host, JobSummaryLog,
                                     LocalJobSummaryLog, None)
                 if Machine.pathExists(None,
                                       None,
                                       LocalJobSummaryLog,
                                       passwd=None):
                     LocalJobSummaryLogs.append(LocalJobSummaryLog)
         else:
             for host in ComponentLogUtil.YARN_getRMHANodes():
                 JobSummaryLog = ComponentLogUtil.MAPRED_getJobSummaryLogFile(
                     host)
                 LocalJobSummaryLog = os.path.join(
                     Config.getEnv('ARTIFACTS_DIR'),
                     "jobsummary_" + host + ".log")
                 Machine.copyToLocal(Machine.getAdminUser(), host,
                                     JobSummaryLog, LocalJobSummaryLog,
                                     Machine.getAdminPasswd())
                 Machine.chmod("777",
                               LocalJobSummaryLog,
                               user=Machine.getAdminUser(),
                               passwd=Machine.getAdminPasswd())
                 if Machine.pathExists(Machine.getAdminUser(),
                                       None,
                                       LocalJobSummaryLog,
                                       passwd=Machine.getAdminPasswd()):
                     LocalJobSummaryLogs.append(LocalJobSummaryLog)
         return LocalJobSummaryLogs
     except Exception as e:
         logger.info("Exception occurs at job_summary_log collection %s", e)
         tb = traceback.format_exc()
         logger.info(tb)
         return LocalJobSummaryLogs
Example #9
0
    def setup_capacity_scheduler(cls, components):
        """
        Setup yarn capacity scheduler based on components.
        This API is not called during setup_module.
        :param components: list of components
        :type components: list of str
        :return: None
        """
        if RuSetup._defaultQueue:
            components.append("default")

        logger.info("*** setup_capacity_scheduler ***")
        if RuSetup._skipQueue != None:
            logger.info("Components do not have a queue: " +
                        str(RuSetup._skipQueue))
            components = list(set(components) - RuSetup._skipQueue)
        logger.info("components = %s" % components)
        numComponents = len(components)
        percentPerQueue = 100.0 / numComponents
        percentPerQueueStr = "{0:0.2f}".format(percentPerQueue)
        xmlDict = {}
        rootQueues = ",".join(components)
        xmlDict["yarn.scheduler.capacity.root.queues"] = rootQueues
        for component in components:
            xmlDict["yarn.scheduler.capacity.root.%s.capacity" %
                    component] = percentPerQueueStr
            xmlDict["yarn.scheduler.capacity.root.%s.user-limit-factor" %
                    component] = 1
            xmlDict["yarn.scheduler.capacity.root.%s.maximum-capacity" %
                    component] = percentPerQueueStr
            xmlDict["yarn.scheduler.capacity.root.%s.state" %
                    component] = "RUNNING"
            xmlDict["yarn.scheduler.capacity.root.%s.acl_submit_jobs" %
                    component] = "*"
            xmlDict["yarn.scheduler.capacity.root.%s.acl_administer_jobs" %
                    component] = "*"
        util.dumpTextString(xmlDict, "====== PLANNED QUEUES ======",
                            "==================")
        master_capacity_file = os.path.join(Config.getEnv("WORKSPACE"),
                                            "tests", "rolling_upgrade", "yarn",
                                            "data", "capacity-scheduler.xml")
        modified_capacity_file = os.path.join(Config.getEnv("ARTIFACTS_DIR"),
                                              "capacity-scheduler.xml")
        Machine.copy(master_capacity_file, modified_capacity_file)
        util.writePropertiesToConfigXMLFile(modified_capacity_file,
                                            modified_capacity_file, xmlDict)
        #util.dumpText(modified_capacity_file, "====== capacity-scheduler.xml ======", "==================")
        if RuSetup._defaultQueue:
            components.remove("default")
        return modified_capacity_file
Example #10
0
    def doSetup(cls, hdfs_test_dir, tbl_name, num_of_rows, type):

        from beaver.component.hive import Hive
        from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode

        logger.info("Generating test table dataset with %d rows" % num_of_rows)
        test_data_file = os.path.join(Config.getEnv('ARTIFACTS_DIR'),
                                      tbl_name + ".dat")
        f = open(test_data_file, 'w')
        userid = 100000
        for i in xrange(num_of_rows):
            for j in range(random.randint(3, 8)):
                f.write("%d|%d\n" % (userid + i, random.randint(10, 80)))
        f.close()

        hdfs_tbl_dir = hdfs_test_dir + "/" + tbl_name
        logger.info("Copying the test dataset to HDFS directory '%s'" %
                    hdfs_tbl_dir)
        HDFS.createDirectory(hdfs_test_dir,
                             user=cls._hdfs_user,
                             perm='777',
                             force=True)
        HDFS.createDirectory(hdfs_tbl_dir, perm='777')
        HDFS.copyFromLocal(test_data_file, hdfs_tbl_dir)
        HDFS.chmod(cls._hdfs_user, '777', hdfs_tbl_dir)

        logger.info("Creating table '%s' and verification tables" % tbl_name)
        query = "drop table if exists %s;\n" % tbl_name
        query += "create external table %s (userid string, age int) row format delimited fields terminated by '|' stored as textfile location '%s';\n" % (
            tbl_name, hdfs_tbl_dir)
        query += "drop table if exists %s_hive_verify;\n" % tbl_name
        query += "create table %s_hive_verify (userid string, age int);\n" % tbl_name
        if type == "Long running":
            for i in range(cls._num_of_webhcat_bgj):
                query += "drop table if exists %s_wh_%d;\n" % (tbl_name, i + 1)
                query += "create table %s_wh_%d (userid string, age int);\n" % (
                    tbl_name, i + 1)
        hivesetupfile = os.path.join(Config.getEnv('ARTIFACTS_DIR'),
                                     "hivesetup.sql")
        util.writeToFile(query, hivesetupfile)
        exit_code, stdout = Hive.run("-f " + hivesetupfile, logoutput=False)
        if type:
            msg = "%s job setup for Hive component" % type
            if exit_code != 0:
                UpgradePerNode.reportProgress(
                    "[FAILED][Hive][Setup] %s failed due to exitcode = %d" %
                    (msg, exit_code))
            else:
                UpgradePerNode.reportProgress(
                    "[PASSED][Hive][Setup] %s finished successfully" % msg)
Example #11
0
 def validate_apps(self, local_dir_name="small_rw_jobs"):  # pylint: disable=unused-argument
     '''
     Validate small apps passed
     :param local_dir_name:
     :return:
     '''
     local_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'),
                              self.local_dir_name)
     appIds = []
     for root, _dirs, filenames in os.walk(local_dir):
         for f in filenames:
             logfile = open(os.path.join(root, f), 'r')
             stdout = logfile.read()
             appId = YARN.getApplicationIDFromStdout(stdout,
                                                     logoutput=False)
             appIds.append(appId)
     # Sleep for 30 seconds before checking App status
     time.sleep(30)
     status, d = YARN.checkAppsSucceeded(appIds,
                                         logPrefix=None,
                                         useWS=True,
                                         localDir=None)
     for app, status in d.items():
         if status != "SUCCEEDED":
             appInfo = YARN.getApplicationInfo(app)
             logger.info(appInfo)
             if appInfo:
                 assert appInfo[
                     'state'] == 'ACCEPTED', "app is neither in ACCEPTED or SUCCEEDED State"
Example #12
0
    def getZipFile(cls, version=HBase.getVersionFromBuild(), isRU=False):
        # download for linux, no download for windows
        HBASE_VER_BUILD = version

        if Machine.isWindows():
            zipFile = os.path.join(
                Config.get('slider', 'SLIDER_HOME'), "app-packages",
                "slider-hbase-app-win-package-%s.zip" % HBASE_VER_BUILD)
            return zipFile

        pkg_list = "pkg-list_qe.txt"
        path = os.path.join(Config.getEnv('ARTIFACTS_DIR'), pkg_list)
        #pkgUrl = Config.get('slider','APP_PKG_LIST')
        pkgUrl = Slider.getAppPackageBaseUrl(
            isRU) + "/slider-app-packages/" + pkg_list
        util.downloadUrl(pkgUrl, path)
        with open(path, 'r') as f:
            for line in f:
                if line.startswith("hbase_pkg_url="):
                    url = line.strip()[14:]
                    break
        zipFile = os.path.join(
            os.getcwd(), "slider-hbase-app-package-%s.zip" % HBASE_VER_BUILD)
        logger.info("downloading " + url)
        util.downloadUrl(url, zipFile)
        return zipFile
Example #13
0
    def runas(cls,
              user,
              cmd,
              cwd=None,
              env=None,
              logoutput=True,
              runInBackground=False):
        runCmd = Config.get('pig', 'PIG_CMD') + " " + cmd
        # initialize env
        if not env:
            env = {}
        # get kerberos ticket
        if Hadoop.isSecure():
            if user is None:
                user = Config.getEnv('USER')
            kerbTicket = Machine.getKerberosTicket(user)
            env['KRB5CCNAME'] = kerbTicket
            user = None

        if runInBackground:
            return Machine.runinbackgroundAs(user, runCmd, cwd=cwd, env=env)
        else:
            return Machine.runas(user,
                                 runCmd,
                                 cwd=cwd,
                                 env=env,
                                 logoutput=logoutput)
Example #14
0
 def gather_cloudbreakartifacts_log(  # pylint: disable=unused-argument
         cls,
         destHost,
         destUser,
         destPath,
         passwd,
         cleanupDirFirst=False,
         logoutput=False):
     '''
     Gather artifacts to destination host with rsync.
     returns None
     '''
     try:
         srcPath = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "*")
         cls.createDirInternal(destHost,
                               destUser,
                               destPath,
                               passwd,
                               logoutput=logoutput)
         from beaver.rsync import RSync
         RSync.rsyncToRemoteHost(user=None,
                                 host=None,
                                 passwd=None,
                                 srcPath=srcPath,
                                 destPath=destPath,
                                 destHost=destHost,
                                 destUser=destUser,
                                 logoutput=logoutput,
                                 flag="-rhp --chmod=u=rwx,g=rwx,o=r")
     except Exception as e:
         logger.info("Exception occurs at gather_artifacts_log. %s", e)
         tb = traceback.format_exc()
         logger.info(tb)
Example #15
0
 def createPoliciesFromJson(cls,
                            file,
                            serviceType,
                            sourceHiveServiceName="mycluster0_hive",
                            sourceHdfsServiceName="mycluster0_hadoop",
                            targetServiceName=None,
                            ambariWeburl=source_weburl,
                            updateIfExists=False,
                            polResource=None,
                            isOverRideTrue=True):
     if Xa.isArgusInstalled():
         servicesMapJson = Config.getEnv(
             'ARTIFACTS_DIR') + '/' + datetime.datetime.now().strftime(
                 "%Y%m%d%H%M%S") + 'service_mapping.json'
         serviceName = "hadoop" if serviceType == "hdfs" else serviceType
         if targetServiceName is None:
             targetServiceName = \
             Xa.findRepositories(nameRegex="^.*_" + serviceName + "$", type=serviceType, status=True,
                                 ambariWeburl=ambariWeburl)[0]['name']
         f = open(servicesMapJson, 'w')
         if serviceType == "hive":
             f.write('{"' + sourceHiveServiceName + '":"' +
                     targetServiceName + '"}')
         elif serviceType == "hdfs":
             f.write('{"' + sourceHdfsServiceName + '":"' +
                     targetServiceName + '"}')
         f.close()
         Xa.importPoliciesInJsonFile(file,
                                     serviceType,
                                     servicesMapJson=servicesMapJson,
                                     ambariWeburl=ambariWeburl,
                                     updateIfExists=updateIfExists,
                                     polResource=polResource,
                                     isOverRideTrue=isOverRideTrue)
Example #16
0
 def kinitas(cls,
             keytabUser,
             principal=None,
             keytabFile=None,
             flag="-f",
             logoutput=True,
             host=None):
     '''
     Runs kinit as specified keytab user.
     Returns (exit_code, stdout).
     '''
     # Build common variables and cache them.
     cls._buildCommonVar()
     # If keytabUser is None, use current user
     if keytabUser is None:
         keytabUser = Config.getEnv('USER')
     # Get kerberos ticket location e.g. /grid/0/hadoopqe/artifacts/kerberosTickets/hrt_qa.kerberos.ticket
     kerbTicket = Machine.getKerberosTicket(user=keytabUser)
     # If keytab is unset, use default keytab path e.g. /home/hrt_qa/hadoopqa/keytabs/hrt_qa.headless.keytab
     if keytabFile is None:
         keytabFile = Machine.getHeadlessUserKeytab(keytabUser)
     # If principal is not set, use keytab user.
     if principal is None:
         principal = keytabUser
     # Build command
     cmd = "%s -c %s -k -t %s %s %s" % (
         cls._kinitloc, kerbTicket, keytabFile, flag,
         Machine.get_user_principal(principal))
     # kinit always runs with credential of current user.
     return Machine.runas(user=None,
                          cmd=cmd,
                          env=None,
                          logoutput=logoutput,
                          host=host)
Example #17
0
 def verifyLongRunningQuery(cls, file_to_verify):
     lfile = os.path.join(Config.getEnv('ARTIFACTS_DIR'), file_to_verify)
     exit_code, stdout = HDFS.copyToLocal(
         cls._hdfs_bgjtest_dir + "/" + file_to_verify, lfile)
     if exit_code != 0:
         logger.info("Error fetching the timestamp file from HDFS")
         return False
     lines = open(lfile, 'r').readlines()
     if len(lines) == 0:
         logger.info("Empty timestamp file")
         return False
     try:
         ts = int(lines[-1])
         # Shutdown gracefully
         if ts == -1:
             return True
         # Timestamp should be less than 5 minutes, which indicates
         # UDF wrote something atleast once in the last 5 minutes
         timegap = time.time() - (ts / 1000)
         if timegap > 300:
             logger.info(
                 "Time gap is %d seconds, last line in the timestamp file was '%d'"
                 % (timegap, ts))
             return False
     except ValueError:
         logger.info("Error parsing last line in the timestamp file => '" +
                     lines[-1] + "'")
         return False
     return True
Example #18
0
def setupTableauDataset():
    LOCAL_DATA_DIR = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "tableau")
    DATA_DIR = os.path.join(LOCAL_DATA_DIR, 'data')
    SCHEMA_SQL_DIR = os.path.join(LOCAL_DATA_DIR, 'schema_3.0')
    HIVE_TABLES = [
        'Batters', 'Calcs', 'DateBins', 'DateTime', 'Election', 'FischerIris', 'Loan', 'NumericBins', 'REI',
        'SeattleCrime', 'Securities', 'SpecialData', 'Staples', 'Starbucks', 'UTStarcom', 'xy'
    ]
    TABLEAU_TEST_DIR = "/user/hrt_qa/tableau"
    DATABASE_NAME = 'tableau'

    logger.info("Setup Tableau dataset")

    if not os.path.exists(LOCAL_DATA_DIR):
        TABLEAU_DATA_TGZ = LOCAL_DATA_DIR + ".tgz"
        assert util.downloadUrl(Config.get('hive', 'TABLEAU_DATASET'), TABLEAU_DATA_TGZ)
        Machine.tarExtractAll(TABLEAU_DATA_TGZ, Config.getEnv('ARTIFACTS_DIR'))
        assert os.path.isdir(LOCAL_DATA_DIR)

    logger.info("create test directory on hdfs to store tableau data files")
    HDFS.createDirectory(TABLEAU_TEST_DIR, user=HDFS_USER, perm='777', force=True)

    logger.info("create tableau database before creating tables")
    Hive.runQueryOnBeeline("DROP DATABASE IF EXISTS %s" % DATABASE_NAME)
    Hive.runQueryOnBeeline("CREATE DATABASE IF NOT EXISTS %s" % DATABASE_NAME)

    for tbl in HIVE_TABLES:
        hdfsDir = TABLEAU_TEST_DIR + '/%s' % tbl
        hdfsFile = hdfsDir + '/%s' % tbl
        localFile = os.path.join(DATA_DIR, '%s.tbl' % tbl)
        sqlFile = os.path.join(SCHEMA_SQL_DIR, '%s.sql' % tbl)

        logger.info("create directory for %s table" % tbl)
        exit_code, stdout = HDFS.createDirectory(hdfsDir, perm='777', force=True)
        assert exit_code == 0, 'Could not create dir for table %s on hdfs.' % tbl

        logger.info("copy file for table %s to hdfs" % tbl)
        exit_code, stdout = HDFS.copyFromLocal(localFile, hdfsFile)
        assert exit_code == 0, 'Could not copy file for table %s to hdfs.' % tbl

        logger.info("create %s table " % tbl)
        # thing-to-do Modify Hive.runQueryonBeeline to accept query file name
        exit_code, stdout, stderr = Hive.runQueryOnBeeline(
            ReadFromFile(sqlFile), readFromFile=True, hivevar={'HDFS_LOCATION': hdfsDir}, logoutput=True
        )
        assert exit_code == 0, '%s table creation failed' % tbl
Example #19
0
 def get_testcase_status_txt(cls):
     """
     Get test_case_status.txt location
     :return:
     """
     return os.path.join(
         os.path.join(Config.getEnv('ARTIFACTS_DIR'),
                      "test_case_status.txt"))
Example #20
0
    def __init__(self):
        self.COMPONENT = Config.get('ambari', 'COMPONENT')
        self.TESTSUITE_FILE = Config.get('ambari', 'TESTSUITE_FILE')
        self.PYTEST_FILE = os.path.join(Config.getEnv('WORKSPACE'), 'beaver',
                                        'component', 'upgrades',
                                        'upgradecommon.py')
        self.SRC_DIR = os.path.join(Config.getEnv('WORKSPACE'), 'uifrm')
        if 'preupgrade' in self.COMPONENT or 'installold' in self.COMPONENT:
            self.SRC_DIR = os.path.join(Config.getEnv('WORKSPACE'),
                                        'uifrm_old', 'uifrm')
        self.LOCAL_WORK_DIR = os.path.join(Config.getEnv('ARTIFACTS_DIR'),
                                           self.COMPONENT)
        self.LOCAL_WORK_DIR_UIFRM = os.path.join(
            Config.getEnv('ARTIFACTS_DIR'), self.COMPONENT)
        self.DEPLOY_CODE_DIR = os.path.join(Config.getEnv('WORKSPACE'), '..',
                                            'ambari_deploy')
        self.JAVA_HOME = Config.get('machine', 'JAVA_HOME')
        self.DISPLAY = Config.getEnv('DISPLAY')
        self.RUN_MARKER_VERSION = Config.get('ambari', 'RUN_MARKER_VERSION')
        self.RUN_MARKER_LIST = Config.get('ambari', 'RUN_MARKER_LIST')
        self.STACK_TYPE = Config.get('ambari', 'STACK_TYPE')
        self.TEST_RESULT = {}
        self.TESTCASES = []
        self.env = {}
        self.step_counter = 0
        self.source_stack_version = ''
        self.target_stack_version = Config.get('ambari', 'STACK_UPGRADE_TO')

        self.isambari_upgrade_success = False

        if (len(Config.get('ambari', 'UPGRADE_TO').strip()) > 0):
            self.is_ambari_upgrade = True
        else:
            self.is_ambari_upgrade = False
Example #21
0
    def setup_storm_slider_app(cls):
        sys.path.insert(
            0,
            os.path.join(Config.getEnv('WORKSPACE'), 'tests', 'nonnightly',
                         'storm-slider-ru', 'aaaa'))

        from test_startStorm import setupStorm
        setupStorm(
        )  # This kills any currently running storm app and creates a new one.
Example #22
0
class Ant(object):
    # set the ant version
    _version = '1.8.4'
    # determine java home
    _java_home = Config.get('machine', 'JAVA_HOME')
    _worksapce = Config.getEnv('WORKSPACE')
    # determine tools path
    _tools_path = os.path.join(_worksapce, 'tools')
    # determine ant home
    _ant_home = os.path.join(_tools_path, 'apache-ant-' + _version)
    _ant_cmd = os.path.join(_ant_home, 'bin', 'ant')
    # what url to download ant from
    # need to find a solution for windows until then the url will live here
    _ant_download_url = Config.get('machine', 'ANT_URL')

    def __init__(self):
        pass

    #method to run ant cmd
    @classmethod
    def run(cls, cmd, cwd=None, env=None, logoutput=True, user=None):
        # make sure maven is setup before its run
        cls.setupAnt()

        # initialize env
        if not env:
            env = {}

        # if running a secure cluster get a kerb ticket
        if Hadoop.isSecure():
            env['KRB5CCNAME'] = Machine.getKerberosTicket(user)

        env['JAVA_HOME'] = cls._java_home
        env['ANT_HOME'] = cls._ant_home
        run_cmd = "%s -Dbuild.compiler=javac1.7 %s" % (cls._ant_cmd, cmd)
        exit_code, stdout = Machine.run(run_cmd, cwd=cwd, env=env, logoutput=logoutput)
        return exit_code, stdout

    # method to setup maven
    @classmethod
    def setupAnt(cls):
        # if dir exists return as its already setup
        if os.path.isdir(cls._ant_home):
            return

        # check if tarball exists or not before downloading it
        tarName = "apache-ant-" + cls._version + ".tar.gz"
        tarballPath = os.path.join(cls._worksapce, tarName)
        if not os.path.isfile(tarballPath):
            # download ant
            assert util.downloadUrl(cls._ant_download_url, tarballPath)

        # now untar ant on windows you have to run the tar cmd from the dir where
        # the file exists else it fails
        Machine.tarExtractAll(filepath=tarballPath, outpath=cls._tools_path, mode='r:gz')
        assert os.path.isfile(cls._ant_cmd)
Example #23
0
    def background_job_when_master_upgrade(cls):
        '''
        Start a background application which runs while component master service gets upgraded
        :return:
        '''
        from beaver.component.hive import Hive
        from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode

        UpgradePerNode.reportProgress(
            "[INFO][Hive][BGJob] Background Job test setup when upgrading Hive started"
        )

        logger.info("Creating hive tables for short background jobs")
        query = "drop table if exists shortlr_hive_verify;\n"
        query += "create table shortlr_hive_verify (userid string, age int);\n"
        query += "drop table if exists shortlr_bline_verify;\n"
        query += "create table shortlr_bline_verify (userid string, age int);\n"
        query += "drop table if exists shortlr_bline_verify;\n"
        query += "create table shortlr_bline_verify (userid string, age int);\n"
        short_bgjob_setupfile = os.path.join(Config.getEnv('ARTIFACTS_DIR'),
                                             'shortlrsetup.sql')
        util.writeToFile(query, short_bgjob_setupfile)

        exit_code, stdout = Hive.run("-f " + short_bgjob_setupfile)
        if exit_code != 0:
            UpgradePerNode.reportProgress(
                "[FAILED][Hive][BGJob] Background Job test setup when Hive upgrades failed due to exitcode = %d"
                % exit_code)

        logger.info("Running the Background Job when upgrading Hive")
        UpgradePerNode.reportProgress(
            "[INFO][Hive][BGJob] Long running job for Hive component upgrades started"
        )

        setqueue = ""
        if Hive.isTezEnabled():
            setqueue = "set tez.queue.name=%s; " % cls._yarn_queue
        else:
            setqueue = "set mapred.job.queue.name=%s; " % cls._yarn_queue

        logger.info("**** Running Hive CLI Test ****")
        query = setqueue + " insert overwrite table shortlr_hive_verify select userid, avg(age) from %s group by userid order by userid;" % cls._bgjtest_tbl
        cls._shortbgj_hive_process = Hive.runQuery(query, background=True)

        # Sleeping for 10 seconds to make sure that query initializes before Metastore is restarted
        time.sleep(10)

        logger.info("**** Running Beeline CLI Test ****")
        query = setqueue + "\ninsert overwrite table shortlr_bline_verify select userid, avg(age) from %s group by userid order by userid;" % cls._bgjtest_tbl
        cls._shortbgj_bline_process = Hive.runQueryOnBeeline(query,
                                                             readFromFile=True,
                                                             background=True)

        UpgradePerNode.reportProgress(
            "[INFO][Hive][BGJob] Background Job test setup when Hive upgrades finished"
        )
Example #24
0
    def checkClasspathVersion(cls, Version_Num, config=None):
        Local_Test_dir = os.path.join(Config.getEnv("WORKSPACE"), "tests",
                                      "rolling_upgrade", "yarn")
        Multi_Version_App_Dir = os.path.join(Local_Test_dir, "data")
        Mapper = "data/versionVerifyMapper.py"
        Reducer = "data/versionVerifyReducer.py"
        Verify_File_Name = "test.txt"
        Verify_Test_File = os.path.join(Multi_Version_App_Dir,
                                        Verify_File_Name)
        # Set up env
        mapred_app_path = MAPRED.getConfigValue(
            "mapreduce.application.framework.path", None)
        mapred_classpath = MAPRED.getConfigValue(
            "mapreduce.application.classpath", None)
        env = {
            "mapreduce.application.framework.path": mapred_app_path,
            "mapreduce.application.classpath": mapred_classpath
        }
        verifyInput = cls._hdfs_input + "/verify"
        HDFS.createDirectory(verifyInput, None, "777", False)
        # Copy template files for the verifier streaming job
        templateFile = open(Verify_Test_File, 'w')
        templateFile.write(Version_Num)
        templateFile.close()
        HDFS.copyFromLocal(Verify_Test_File,
                           verifyInput,
                           user=Config.get('hadoop', 'HADOOPQA_USER'))
        # Submit the special streaming job
        shortStreamingId = HadoopJobHelper.runStreamJob(
            Mapper,
            Reducer,
            verifyInput,
            cls._hdfs_output_verify,
            files=Multi_Version_App_Dir,
            config=config,
            extraJobArg=cls._jobArgs,
            env=env,
            proposedJobName=cls._shortStreamingName)
        MAPRED.waitForJobDoneOrTimeout(shortStreamingId, timeoutInSec=180)
        # Make sure task succeeded
        #assert YARN.getAppFinalStateFromID(appId) == 'SUCCEEDED'

        # Check result content
        retVal, checkContent = HDFS.cat(cls._hdfs_output_verify +
                                        '/part-00000')
        logger.info("CHECK CLASSPATH VERSION OUTPUT")
        logger.info(retVal)
        logger.info(checkContent)
        ruAssert("YARN", retVal == 0)
        ruAssert("YARN", 'True' in checkContent,
                 "[VersionVerify] Stream job returns false: " + checkContent)
        #assert retVal == 0
        #assert 'True' in checkContent, "Stream job returns false: " + checkContent
        #assert 'False' not in checkContent, "Stream job returns false: " + checkContent
        HDFS.deleteDirectory(cls._hdfs_output_verify,
                             user=Config.get('hadoop', 'HADOOPQA_USER'))
    def run(self):
        """
        Move files to HDFS Input Dir after each interval period for n times.
        """
        for count in range(0, self.times):
            text = "hello world \n Testing HDFS Word count Spark application"
            random_name = ''.join(
                random.choice(string.lowercase) for i in range(5))
            filename = os.path.join(Config.getEnv('ARTIFACTS_DIR'),
                                    random_name)
            util.writeToFile(text, filename, isAppend=False)
            max_retry = 3
            count = 0
            while count < max_retry:
                try:
                    if "hdfs://ns2" in self.hdfs_input_dir:
                        cp_status = HDFS.copyFromLocal(filename,
                                                       "hdfs://ns2/tmp",
                                                       enableDebug=True)
                    else:
                        cp_status = HDFS.copyFromLocal(filename,
                                                       "/tmp",
                                                       enableDebug=True)
                    assert cp_status[
                        0] == 0, "Failed to copy file to HDFS 'tmp'"
                    logger.info("copyFromLocal command finished for %s" %
                                filename)
                    if "hdfs://ns2" in self.hdfs_input_dir:
                        mv_status = HDFS.mv(None,
                                            "hdfs://ns2/tmp/" + random_name,
                                            self.hdfs_input_dir,
                                            config=None)
                    else:
                        mv_status = HDFS.mv(None,
                                            "/tmp/" + random_name,
                                            self.hdfs_input_dir,
                                            config=None)
                    assert mv_status[
                        0] == 0, "Failed to move file from 'tmp' to test directory"
                except:
                    if count < max_retry:
                        count = count + 1
                        logger.info(
                            "File copy into HDFS test directory failed after %s attempts, retrying after 120s sleep interval"
                            % count)
                        time.sleep(120)
                    else:
                        logger.error(
                            "Failed to copy file into HDFS test directory, expect failures in HDFSWordCOunt"
                        )
                else:
                    break

            logger.info("%s moved to %s" % (filename, self.hdfs_input_dir))
            logger.info("sleeping for %s seconds" % self.interval)
            time.sleep(self.interval)
Example #26
0
 def runas(cls, user, cmd, env=None, logoutput=True, runKinit=True):
     if Ambari.is_cluster_secure() and runKinit:
         if user is None:
             user = Config.getEnv('USER')
         kerbTicket = Machine.getKerberosTicket(user=user, rmIfExists=True)
         if not env:
             env = {}
         env['KRB5CCNAME'] = kerbTicket
         user = None
     return Machine.runas(user, cmd, env=env, logoutput=logoutput)
Example #27
0
def verifyLogMessageInServiceLog(text, service, timestamp=0, dateTimeFormat=None):
    '''
  Returns true when given log message appears in service log
  '''
    hiveLog = Hive.getServiceLog(service)
    if not hiveLog or not text:
        return None
    hiveHost = Hive.getHiveHost(service)
    destlog = os.path.join(Config.getEnv('ARTIFACTS_DIR'), 'tmp-%d.log' % int(999999 * random.random()))
    Machine.copyToLocal(None, hiveHost, hiveLog, destlog)
    return util.findMatchingPatternInFileAfterTimestamp(destlog, text, timestamp, dateTimeFormat=dateTimeFormat)
Example #28
0
    def getMiscTestLogPaths(cls, logoutput=False):
        HADOOPQE_TESTS_DIR = Config.getEnv("WORKSPACE")
        miscTestLogPaths = [
            os.path.join(HADOOPQE_TESTS_DIR, "templeton", "src", "test", "e2e",
                         "templeton", "testdist", "test_harnesss_*")
        ]

        if logoutput:
            logger.info("Hcatalog.getMiscTestLogPaths returns %s" %
                        str(miscTestLogPaths))
        return miscTestLogPaths
Example #29
0
    def background_job_setup(cls, runSmokeTestSetup=True, config=None):
        '''
        Upload Data to HDFS before Upgrade starts
        Creates /user/hrt_qa/test_rollingupgrade dir on HDFS
        Upload 20 files to /user/hrt_qa/test_rollingupgrade
        '''
        if not cls._base_hdfs_dir:
            cls._base_hdfs_dir = '/user/%s/test_rollingupgrade' % Config.get(
                'hadoop', 'HADOOPQA_USER')
        exit_code, stdout = HDFS.createDirectory(cls._base_hdfs_dir,
                                                 force=True)
        ruAssert("HDFS", exit_code == 0,
                 '[BGJobSetup] could not create dir on hdfs.')
        LOCAL_WORK_DIR = os.path.join(Config.getEnv('ARTIFACTS_DIR'),
                                      'HDFS_RU_TEST')
        localTestWorkDir1 = os.path.join(LOCAL_WORK_DIR, "Temp_data")
        HadoopJobHelper.runCustomWordWriter(LOCAL_WORK_DIR, localTestWorkDir1,
                                            20, 40, 1000)
        HDFS.copyFromLocal(os.path.join(localTestWorkDir1, "*"),
                           cls._base_hdfs_dir)

        # set up for loadGenerator
        cls._lgTestDataDir = cls._base_hdfs_dir + '/testData'
        cls._lgTestOutputDir = cls._base_hdfs_dir + '/lg_job'
        cls._lgStructureDir = Machine.getTempDir() + "/structure"
        # test dir setup
        HDFS.deleteDirectory(cls._lgTestDataDir)
        HDFS.deleteDirectory(cls._lgTestOutputDir)
        command = "rm -rf " + cls._lgStructureDir
        exit_code, stdout = Machine.runas(Machine.getAdminUser(), command,
                                          None, None, None, "True",
                                          Machine.getAdminPasswd())
        command = "mkdir " + cls._lgStructureDir
        stdout = Machine.runas(None, command, None, None, None, "True", None)
        Machine.chmod("777", cls._lgStructureDir, "True",
                      Machine.getAdminUser(), None, Machine.getAdminPasswd())

        HADOOP_TEST_JAR = cls.get_hadoop_test_jar()
        TEST_USER = Config.get('hadoop', 'HADOOPQA_USER')
        # structure generator
        jobCmd = 'jar %s NNstructureGenerator -maxDepth 5 -minWidth 2 -maxWidth 5 -numOfFiles 100 -avgFileSize 3 -outDir %s' % (
            HADOOP_TEST_JAR, cls._lgStructureDir)
        exit_code, stdout = Hadoop.run(jobCmd)
        ruAssert("HDFS", exit_code == 0,
                 "[BGJobSetup] StructureGenerator failed")
        # data generator
        jobCmd = 'jar %s NNdataGenerator -inDir %s -root %s' % (
            HADOOP_TEST_JAR, cls._lgStructureDir, cls._lgTestDataDir)
        exit_code, stdout = Hadoop.run(jobCmd)
        ruAssert("HDFS", exit_code == 0, "[BGJobSetup] DataGenerator failed")

        if runSmokeTestSetup:
            logger.info("**** Running HDFS Smoke Test Setup ****")
            cls.smoke_test_setup()
Example #30
0
 def take_screenshot(self, test_name):
     try:
         ts = time.time()
         currentTime = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d-%H-%M-%S')
         # Adding epoch time to get unique screenshot names as its observed that
         # screenshots taken in the same second are replaced
         epochTime = str(time.time() * 1000)
         filename = "%s-%s-%s.png" % (test_name, currentTime, epochTime)
         logger.info("------ capturing screenshot to file: %s" % (filename))
         self.driver.get_screenshot_as_file(os.path.join(Config.getEnv('ARTIFACTS_DIR'), filename))
     except Exception as e:
         logger.error("%s" % e)
 def getArtifactsDir(cls):
     return Config.getEnv('ARTIFACTS_DIR')
Example #32
0
def getTempFilepath():
    return os.path.join(Config.getEnv('ARTIFACTS_DIR'), 'tmp-%d' % int(999999*random.random()))