Ejemplo n.º 1
0
def setupSchemaEvolutionDataset():
    logger.info("Setup Schema Evolution dataset")
    HDFS.createDirectory(HCAT_TEST_DIR, user=HDFS_USER, perm='777', force=True)
    HDFS.createDirectory(HDFS_TEST_DIR, user=HDFS_USER, perm='777', force=True)

    HIVE_TEST_CMD = "-Dhive.use.beeline=true -Dhadoop.home=%s -Dhive.home=%s -Dhcat.home=%s -Dpig.home=%s -Dhbase.home=%s" % (
        HADOOP_HOME, HIVE_HOME, HCATALOG_HOME, PIG_HOME, HIVE_HOME
    )
    if Hadoop.isHadoop2():
        HIVE_TEST_CMD += " -Dmapred.home=%s -Dhadoop.conf.dir=%s" % (Config.get('hadoop', 'MAPRED_HOME'), HADOOP_CONF)
    hiveServer2Url = str(Hive.getHiveServer2Url())
    exit_code, stdout = Ant.run(
        HIVE_TEST_CMD + " deploy-schemaevolution", cwd=SRC_DIR, env={"HIVE_SERVER2_URL": hiveServer2Url}
    )
    assert exit_code == 0
Ejemplo n.º 2
0
def setupTPCDSDataset():
    tpcds_data_dir = os.path.join(SRC_DIR, "data", "tpcds")
    TPCDS_DATA_TGZ = os.path.join(tpcds_data_dir, "tpcds_data.tgz")
    hdfs_localcopy_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), 'data')
    tpcds_text_data_dir = os.path.join(tpcds_data_dir, 'data')

    downloadDataset(
        tpcds_data_dir, TPCDS_DATA_TGZ, Config.get('hive', 'TPCDS_DNLD_URL_HDP3'), hdfs_localcopy_dir,
        tpcds_text_data_dir
    )

    HIVE_TEST_CMD = "-Dhive.use.beeline=true -Dhadoop.home=%s -Dhive.home=%s -Dhcat.home=%s -Dpig.home=%s -Dhbase.home=%s" % (
        HADOOP_HOME, HIVE_HOME, HCATALOG_HOME, PIG_HOME, HIVE_HOME
    )

    if Hadoop.isHadoop2():
        HIVE_TEST_CMD += " -Dmapred.home=%s -Dhadoop.conf.dir=%s" % (Config.get('hadoop', 'MAPRED_HOME'), HADOOP_CONF)

    if Machine.type() == 'Windows':
        HIVE_TEST_CMD += ' -Dharness.conf=conf\windows.conf'

    hiveServer2Url = str(Hive.getHiveServer2Url())

    # generate data
    exit_code, stdout = Ant.run(
        HIVE_TEST_CMD + " deploy-tpcds-orc", cwd=SRC_DIR, env={"HIVE_SERVER2_URL": hiveServer2Url}
    )
    assert exit_code == 0

    exit_code, stdout = Ant.run(HIVE_TEST_CMD + " deploy-tpcds", cwd=SRC_DIR, env={"HIVE_SERVER2_URL": hiveServer2Url})
    assert exit_code == 0

    exit_code, stdout = Ant.run(
        HIVE_TEST_CMD + " deploy-tpcds-parquet", cwd=SRC_DIR, env={"HIVE_SERVER2_URL": hiveServer2Url}
    )
    assert exit_code == 0
Ejemplo n.º 3
0
def runJdbcMultiSessionDriver(
        testDir,
        addlClasspath=[],
        connectionUrl=None,
        skippedTests=[],
        addlArgs=[],
        reuseConnections=False,
        testFilter=None,
        logsDir=None,
        queryTimeout=3600
):
    '''
  Run the Hive Jdbc MultiSession Test Driver
  '''
    harnessDir = os.path.join(Config.getEnv('WORKSPACE'), 'datateamtest', 'hive_jdbc_multisession')
    logger.info("Build the TestDriver to run tests")
    exit_code, stdout = Maven.run("clean package", cwd=harnessDir)
    assert exit_code == 0, "Failed to build the test driver"
    classpath = [
        os.path.join(harnessDir, "target", "hive-multisession-test-0.1.jar"),
        Config.get('hadoop', 'HADOOP_CONF')
    ]
    if len(addlClasspath) == 0:
        hiveJdbcDriver = getStandaloneHiveJdbcJar()
        classpath.insert(0, hiveJdbcDriver)
    else:
        classpath = addlClasspath + classpath

    cobert_tool_version = "cobertura-2.1.1"
    COBERTURA_CLASSPTH = os.path.join(
        tempfile.gettempdir(), "coverage-tmp", cobert_tool_version, cobert_tool_version + ".jar"
    )
    if Machine.pathExists(Machine.getAdminUser(), None, COBERTURA_CLASSPTH, Machine.getAdminPasswd()):
        classpath.append(COBERTURA_CLASSPTH)

    args = ["-t " + testDir]
    if connectionUrl is None:
        connectionUrl = Hive.getHiveServer2Url()
    args.append("-c \"%s\"" % connectionUrl)
    if Hadoop.isSecure():
        args.append("-k " + Config.get('machine', 'KEYTAB_FILES_DIR'))
        if Config.hasOption('machine', 'USER_REALM'):
            USER_REALM = Config.get('machine', 'USER_REALM', '')
            args.append("-e USER_REALM=%s" % (USER_REALM))
    args.extend(["--skip %s" % t for t in skippedTests])
    if reuseConnections:
        args.append("--reuseConnections")
    if testFilter:
        args.append("-f " + testFilter)
    from beaver.marker import getMarkerCondition
    markerCondition = getMarkerCondition()
    if markerCondition:
        args.append("-e 'marker=%s'" % markerCondition)
    if not logsDir:
        logsDir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "logs_%d" % int(999999 * random.random()))
    args.append("-l " + logsDir)
    if queryTimeout > 0:
        args.append("--queryTimeout %d" % queryTimeout)
    args.extend(addlArgs)
    return Java.runJava(
        Config.getEnv('ARTIFACTS_DIR'),
        "org.apache.hive.jdbc.TestDriver",
        classPath=(os.pathsep).join(classpath),
        cmdArgs=args
    )