예제 #1
0
def verifyOozieAppsAndJobsSucceeded(workflowIds,
                                    logPrefix,
                                    localDir,
                                    testMap,
                                    action_name='wc',
                                    checkJob=True):
    '''
  Verifies if all apps and jobs submitted/created via. Oozie have succeed all the validations.
  :param workflowIDs: List of workflow ids to verify.
  :param logPrefix: log prefix for YARN app logs.
  :param localDir: Path to local log dir.
  :return: Bool status indicating if validation succeeded.
  '''
    appIds = []
    jobIds = []
    dLog = {}
    appStatus = True
    jobStatus = True
    wprStatus = True

    # check the job and app status for each workflow we launched.
    if Hadoop.isHadoop2():
        # get all the app and job ids
        for workflowId in workflowIds:
            if action_name != 'None':
                stdout = Oozie.getJobInfo('%s@%s' % (workflowId, action_name),
                                          verbose=True,
                                          retry=True)
            else:
                stdout = Oozie.getJobInfo('%s' % (workflowId),
                                          verbose=True,
                                          retry=True)
            ids = Oozie.getJobAndAppIds(stdout)
            for id in ids:
                appIds.append(id['application'])
                jobIds.append(id['job'])
        # get the app and job status for all the jobs we found
        appStatus, appLog = YARN.checkAppsSucceeded(appIds,
                                                    logPrefix=logPrefix,
                                                    localDir=localDir)
        dLog.update(appLog)
        if checkJob:
            jobStatus, jobLog = YARN.checkJobsSucceeded(jobIds)
            dLog.update(jobLog)
        for key, value in dLog.items():
            logger.info("%s -> %s" % (key, value))

        wprStatus, d = verifyWorkPreservingRMRestart(jobIds, testMap)
        for k, v in d.items():
            logger.info("%s -> %s" % (k, v))

    logger.info("appStatus: %s jobStatus: %s wprStatus: %s" %
                (appStatus, jobStatus, wprStatus))
    return appStatus and jobStatus and wprStatus
예제 #2
0
def resetYarn(skip_check=False):
    # updates for Hadoop 2
    if YARN.isHAEnabled():
        logger.info("Resetting YARN...")
        # only do this on nano as we these services are unreliable on nano
        tasktrackers = MAPRED.getTasktrackers()
        if skip_check or (Hadoop.isHadoop2() and Machine.isLinux()
                          and Machine.isNano()):
            YARN.restartHARMNodes()
            # add sleep to give RM enough time to register all the nodes
            # and be ready
            MAPRED.waitForNMToRegister(len(tasktrackers))

        logger.info("Resetting YARN Completed.")
예제 #3
0
def setupSchemaEvolutionDataset():
    logger.info("Setup Schema Evolution dataset")
    HDFS.createDirectory(HCAT_TEST_DIR, user=HDFS_USER, perm='777', force=True)
    HDFS.createDirectory(HDFS_TEST_DIR, user=HDFS_USER, perm='777', force=True)

    HIVE_TEST_CMD = "-Dhive.use.beeline=true -Dhadoop.home=%s -Dhive.home=%s -Dhcat.home=%s -Dpig.home=%s -Dhbase.home=%s" % (
        HADOOP_HOME, HIVE_HOME, HCATALOG_HOME, PIG_HOME, HIVE_HOME
    )
    if Hadoop.isHadoop2():
        HIVE_TEST_CMD += " -Dmapred.home=%s -Dhadoop.conf.dir=%s" % (Config.get('hadoop', 'MAPRED_HOME'), HADOOP_CONF)
    hiveServer2Url = str(Hive.getHiveServer2Url())
    exit_code, stdout = Ant.run(
        HIVE_TEST_CMD + " deploy-schemaevolution", cwd=SRC_DIR, env={"HIVE_SERVER2_URL": hiveServer2Url}
    )
    assert exit_code == 0
예제 #4
0
def verifyAppsAndJobsSucceeded(appLogSearchPrefix,
                               stdout,
                               localDir,
                               testMap=None,
                               user=None):
    '''
  :param appLogSearchPrefix: The prefix using which the app logs are going to be searched.
  :param stdout: stdout from the app.
  :param localDir: Path to current dir.
  :param testMap: map containing the service(s) names and the kwargs of the services being restarted in the test.
  :return: success status and a dict with the relevant info.
  '''
    d = {}
    status = True

    if appLogSearchPrefix is None or localDir is None:
        status = False

    # Check if all the Jobs and the apps succeeded.
    if Hadoop.isHadoop2():
        dLog = jobLog = wprdLog = {}
        appStatus = jobStatus = True
        appIds, jobIds = YARN.getAppAndJobIdsFromConsole(stdout)
        appStatus, dLog = YARN.checkAppsSucceeded(appIds,
                                                  logPrefix=appLogSearchPrefix,
                                                  localDir=localDir)
        jobStatus, jobLog = YARN.checkJobsSucceeded(jobIds, user)
        d.update(dLog)
        d.update(jobLog)

        jobIds = cleanseJobIds(jobIds)

        # Performing WPR validations.
        wprStatus, wprdLog = verifyWorkPreservingRMRestart(jobIds, testMap)
        d.update(wprdLog)

        # Check if all the validations succeeded.
        if appStatus is False or jobStatus is False or wprStatus is False:
            d[appLogSearchPrefix] = "appStatus: %s jobStatus: %s wprStatus: %s" % (
                appStatus, jobStatus, wprStatus)
            status = False

    # Prepend the method names to all the keys in the dict.
    tempd = {}
    for k, v in d.items():
        tempd["%s: %s" % ("verifyAppsAndJobsSucceeded", k)] = v

    return status, tempd
예제 #5
0
def getClasspathForJdbcClient():
    jdbc_standalone_jar = getStandaloneHiveJdbcJar()
    assert jdbc_standalone_jar, "No JDBC standalone jar found"
    classpath = [jdbc_standalone_jar]
    hadoop_home = Config.get('hadoop', 'HADOOP_HOME')
    if Hadoop.isHadoop2() and Machine.type() == 'Windows':
        hadoop_home = os.path.join(hadoop_home, "share", "hadoop", "common")
    hadoop_common_jar = util.findMatchingFiles(hadoop_home, "hadoop-common-*[!(tests)].jar", depth=1)
    assert len(hadoop_common_jar) > 0, "No hadoop-common.jar found"
    classpath.append(hadoop_common_jar[0])
    if Hadoop.isSecure():
        hadoop_auth_jar = util.findMatchingFiles(hadoop_home, "hadoop-auth-*[!(tests)].jar", depth=1)
        assert len(hadoop_auth_jar) > 0, "No hadoop-auth.jar found"
        classpath.append(hadoop_auth_jar[0])
    classpath.append(Config.get('hadoop', 'HADOOP_CONF'))
    return (os.pathsep).join(classpath)
예제 #6
0
def resetHdfs(skip_check=False):
    # updates for Hadoop 2
    # only do this on nano as we these services are unreliable on nano
    if HDFS.isHAEnabled():
        logger.info("Resetting HDFS...")
        if skip_check or (Hadoop.isHadoop2() and Machine.isLinux()
                          and Machine.isNano()):
            HDFS.resetHANamenodes('stop')
            HDFS.resetJournalNodes('stop')
            HDFS.resetZkfc('stop')
            time.sleep(10)
            HDFS.resetJournalNodes('start')
            HDFS.resetZkfc('start')
            HDFS.resetHANamenodes('start')
            time.sleep(10)

        # make sure we are out of safemode after every test
        HDFS.waitForActiveAndStandbyNNOutOfSafemode()
        logger.info("Resetting HDFS Completed.")
예제 #7
0
def setupTPCDSOriginalDataset(CURR_DIR):
    tpcds_data_dir = os.path.join(SRC_DIR, "data", "tpcds")
    TPCDS_DATA_TGZ = os.path.join(tpcds_data_dir, "tpcds_original.tgz")
    hdfs_localcopy_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), 'tpcds_original', 'data')
    tpcds_text_data_dir = os.path.join(tpcds_data_dir, 'data')

    downloadDataset(
        tpcds_data_dir, TPCDS_DATA_TGZ, Config.get('hive', 'TPCDS_ORIGINAL_DNLD_URL'), hdfs_localcopy_dir,
        tpcds_text_data_dir
    )

    HIVE_TEST_CMD = "-Dhive.use.beeline=true -Dhadoop.home=%s -Dhive.home=%s -Dhcat.home=%s -Dpig.home=%s -Dhbase.home=%s" % (
        HADOOP_HOME, HIVE_HOME, HCATALOG_HOME, PIG_HOME, HIVE_HOME
    )

    if Hadoop.isHadoop2():
        HIVE_TEST_CMD += " -Dmapred.home=%s -Dhadoop.conf.dir=%s" % (Config.get('hadoop', 'MAPRED_HOME'), HADOOP_CONF)

    if Machine.type() == 'Windows':
        HIVE_TEST_CMD += ' -Dharness.conf=conf\windows.conf'

    query_file_1 = os.path.join(CURR_DIR, 'ddl_queries', 'alltables_text.sql')
    query_file_2 = os.path.join(CURR_DIR, 'ddl_queries', 'alltables_orc.sql')
    exit_code, stdout, stderr = Hive.runQueryOnBeeline(
        query_file_1,
        hivevar={
            'LOCATION': HDFS_TEST_DIR + '/data',
            'DB': 'tpcds_src'
        },
        cwd=CURR_DIR,
        logoutput=True,
        queryIsFile=True
    )
    logger.info("Check if populating the data in Hive for text tables is successful")
    assert exit_code == 0, "Failed to populate the data in Hive"
    exit_code, stdout, stderr = Hive.runQueryOnBeeline(
        query_file_2, hivevar={
            'FILE': 'ORC',
            'SOURCE': 'tpcds_src'
        }, cwd=CURR_DIR, logoutput=True, queryIsFile=True
    )
    logger.info("Check if populating the data in Hive for ORC tables is successful")
    assert exit_code == 0, "Failed to populate the data in Hive"
예제 #8
0
def setupTPCDSDataset():
    tpcds_data_dir = os.path.join(SRC_DIR, "data", "tpcds")
    TPCDS_DATA_TGZ = os.path.join(tpcds_data_dir, "tpcds_data.tgz")
    hdfs_localcopy_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), 'data')
    tpcds_text_data_dir = os.path.join(tpcds_data_dir, 'data')

    downloadDataset(
        tpcds_data_dir, TPCDS_DATA_TGZ, Config.get('hive', 'TPCDS_DNLD_URL_HDP3'), hdfs_localcopy_dir,
        tpcds_text_data_dir
    )

    HIVE_TEST_CMD = "-Dhive.use.beeline=true -Dhadoop.home=%s -Dhive.home=%s -Dhcat.home=%s -Dpig.home=%s -Dhbase.home=%s" % (
        HADOOP_HOME, HIVE_HOME, HCATALOG_HOME, PIG_HOME, HIVE_HOME
    )

    if Hadoop.isHadoop2():
        HIVE_TEST_CMD += " -Dmapred.home=%s -Dhadoop.conf.dir=%s" % (Config.get('hadoop', 'MAPRED_HOME'), HADOOP_CONF)

    if Machine.type() == 'Windows':
        HIVE_TEST_CMD += ' -Dharness.conf=conf\windows.conf'

    hiveServer2Url = str(Hive.getHiveServer2Url())

    # generate data
    exit_code, stdout = Ant.run(
        HIVE_TEST_CMD + " deploy-tpcds-orc", cwd=SRC_DIR, env={"HIVE_SERVER2_URL": hiveServer2Url}
    )
    assert exit_code == 0

    exit_code, stdout = Ant.run(HIVE_TEST_CMD + " deploy-tpcds", cwd=SRC_DIR, env={"HIVE_SERVER2_URL": hiveServer2Url})
    assert exit_code == 0

    exit_code, stdout = Ant.run(
        HIVE_TEST_CMD + " deploy-tpcds-parquet", cwd=SRC_DIR, env={"HIVE_SERVER2_URL": hiveServer2Url}
    )
    assert exit_code == 0