Beispiel #1
0
    def resetNode(cls, nodename, hostlist,action):
        HADOOP_HOME = Config.get('hadoop', 'HADOOP_HOME')
        HADOOP_BIN = os.path.join(HADOOP_HOME,"bin")
        nodetype = { 'namenode': 'master',
                     'datanode': 'slave',
                     'jobtracker': 'master',
                     'tasktracker': 'slave',
                     }[nodename]

        MAPRED_USER = Config.get('hadoop', 'MAPRED_USER')
        HDFS_USER = Config.get('hadoop', 'HDFS_USER')

        user = { 'namenode': HDFS_USER,
                     'datanode': MAPRED_USER,
                     'jobtracker': HDFS_USER,
                     'tasktracker': MAPRED_USER,
                     }[nodename]
        cmd = os.path.join(HADOOP_BIN,action + "-" + nodetype + ".cmd")
 
        pattern = "(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})"
        for host in hostlist:
            if not re.search(pattern,host):
                host = host.split(':')[0]
                host = socket.gethostbyname(host.replace('\n','').strip()) 

            host = re.findall(pattern, host)[0]
            logger.info( action + " " + nodename + " on host " + host)
            Machine.runinbackgroundAs(user, cmd, host,HADOOP_BIN)
Beispiel #2
0
    def upgrade_master(cls, version, config=None):
        '''
        Upgrades Master services:
        :param version: Version to be upgraded to
        :param config: Config location
        '''
        from beaver.component.rollingupgrade.ruCommon import hdpSelect
        from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode
        UpgradePerNode.reportProgress("### Knox Upgrade started ####")
        #Stop knox gateway and apacheDS
        Knox.stopKnox()
        Knox.stopLdap()

        node = Config.get('knox', 'KNOX_HOST').split(',')[0]

        #Switch to the new version
        hdpSelect.changeVersion("knox-server", version, node)

        #Start apacheDs and knox gateway service
        Knox.startLdap()
        Knox.startKnox()

        time.sleep(10)

        UpgradePerNode.reportProgress("### Knox Upgrade Finished ####")
Beispiel #3
0
 def deregisterHiveServer2(cls, version=None):
     if cls.isDynamicServiceDiscoveryOn():
         hiveHome = cls.getHiveHome(service="hiveserver2")
         hiveUser = Config.get('hive', 'HIVE_USER')
         if not version: version = cls.getVersion()
         deregCmd = os.path.join(hiveHome, "bin", "hive") + " --service hiveserver2 --deregister %s" % version
         return Machine.runas(hiveUser, deregCmd)
Beispiel #4
0
def getClasspathForJdbcClient():
    jdbc_standalone_jar = getStandaloneHiveJdbcJar()
    assert jdbc_standalone_jar, "No JDBC standalone jar found"
    classpath = [jdbc_standalone_jar]
    hadoop_home = Config.get('hadoop', 'HADOOP_HOME')
    if Hadoop.isHadoop2() and Machine.type() == 'Windows':
        hadoop_home = os.path.join(hadoop_home, "share", "hadoop", "common")
    hadoop_common_jar = util.findMatchingFiles(hadoop_home, "hadoop-common-*[!(tests)].jar", depth=1)
    assert len(hadoop_common_jar) > 0, "No hadoop-common.jar found"
    classpath.append(hadoop_common_jar[0])
    if Hadoop.isSecure():
        hadoop_auth_jar = util.findMatchingFiles(hadoop_home, "hadoop-auth-*[!(tests)].jar", depth=1)
        assert len(hadoop_auth_jar) > 0, "No hadoop-auth.jar found"
        classpath.append(hadoop_auth_jar[0])
    classpath.append(Config.get('hadoop', 'HADOOP_CONF'))
    return (os.pathsep).join(classpath)
Beispiel #5
0
 def queryserver(cls,
                 user,
                 host,
                 action,
                 config=None,
                 homePath=None,
                 binFolder='bin'):
     if Machine.type() == 'Windows':
         return Machine.service("queryserver", action, host=host)
     else:
         if homePath is None:
             homePath = Config.get('phoenix', 'PHOENIX_HOME')
         cmd = os.path.join(homePath, binFolder, 'queryserver.py')
         env = None
         if config:
             env = {}
             env['HBASE_CONF_DIR'] = config
         cmd = "%s %s" % (cmd, action)
         exit_code, stdout = Machine.runas(user, cmd, host=host, env=env)
         # The command exited abnormally, don't run any follow-on checks
         if exit_code != 0:
             logger.warn("Failed to execute queryserver command. %d, %s",
                         exit_code, stdout)
             return exit_code, stdout
         # Check if PQS is accepting HTTP request before returning
         if action == 'start':
             logger.info('Verifying that PQS is running')
             if Phoenix.verify_pqs_running(host, QUERYSERVER_PORT):
                 return exit_code, stdout
             # Failed to verify PQS is running, bail out.
             raise Exception('Failed to connect to PQS on %s' % host)
         return exit_code, stdout
Beispiel #6
0
def getJMXData(url,
               modelerType,
               metric,
               defaultValue=None,
               tries=5,
               wait_time=15,
               user=None):
    count = 0
    my_data = 0
    if not user:
        user = Config.get('hadoop', 'YARN_USER', 'yarn')
    while (my_data is None or my_data is 0 or my_data is '') and count < tries:
        JMX_DATA = util.getJSONContent(url, user=user)
        for data in JMX_DATA['beans']:
            # check for None
            if data is not None and data['modelerType'] is not None and data[
                    'modelerType'] == modelerType:
                my_data = data[str(metric)]
                break
        count += 1
        time.sleep(wait_time)

    if my_data is None or my_data is 0 or my_data is '':
        return defaultValue
    else:
        return my_data
Beispiel #7
0
 def HDFS_getNNLogDir(cls, logoutput=True):
     try:
         nnHostList = cls.HDFS_getNamenodes()
         if nnHostList:
             for nnHost in nnHostList:
                 lines = Machine.find(user=Machine.getAdminUser(),
                                      host=nnHost,
                                      filepath=Config.get(
                                          'hadoop', 'HADOOP_LOG_DIR'),
                                      searchstr="hadoop*-namenode-*.log",
                                      passwd=Machine.getAdminPasswd())
                 lines = util.prune_output(lines, Machine.STRINGS_TO_IGNORE)
                 if lines:
                     nnLogFilePath = lines[0]
                     if nnLogFilePath is None:
                         continue
                     else:
                         return util.getDir(nnLogFilePath)
         return None
     except Exception:
         if logoutput:
             logger.error(
                 "Exception occured during HDFS_getNNLogDir() call")
             logger.error(traceback.format_exc())
         return None
Beispiel #8
0
 def importDmp(cls, dmpfile, database, host=None):
     pg_cmd = "psql -U%s" % Config.get(
         'machine', 'POSTGRES_ROOT_USER', default='postgres')
     if host:
         pg_cmd += " -h" + host
     pg_cmd += " -d%s -f %s" % (database, dmpfile)
     return Machine.run(pg_cmd)
Beispiel #9
0
 def getDatanodeCount(cls):
     exit_code, output = Hadoop.runas(Config.get('hadoop', 'HDFS_USER'), "dfsadmin -report")
     if exit_code == 0:
         m = re.match(".*Datanodes available: (\d+) \(", output, re.DOTALL)
         if m:
             return int(m.group(1))
     return 0
Beispiel #10
0
    def getDatabaseFlavor(cls):
        SQOOP2_HOME = Config.get("sqoop2", "SQOOP2_HOME")
        SQOOP2_LIB = os.path.join(SQOOP2_HOME, 'sqoop-server', 'lib')
        db = ''
        if (config.find('/usr/share/java', "mysql-connector-java*.jar")):
            db = "mysql"

        # special handling for postgres version
        if (config.find('/usr/share/java', "postgresql91-jdbc.jar")):
            db = 'postgres-9.1'
        elif (config.find(SQOOP_LIB, "postgresql93-jdbc.jar")):
            db = "postgres-9.3"
        elif (config.find('/usr/share/java', "postgresql*.jar")):
            db = "postgres-8"

        if (config.find(SQOOP2_LIB, "ojdbc*.jar")):
            db = "oracle"

        if (config.find(SQOOP2_LIB, "nzjdbc*.jar")):
            if db == '':
                db = "netezza"
            else:
                db += ", netezza"

        return db
Beispiel #11
0
    def submit_storm_hive_topology(cls, tcId, className, args,
                                   useStandaloneCmd):
        if Hadoop.isSecure():
            if Config.hasOption('machine', 'USER_REALM'):
                user_realm = Config.get('machine', 'USER_REALM', '')
            else:
                nnKerbPrincipal = HDFS.getNameNodePrincipal(defaultValue='')
                atloc = nnKerbPrincipal.find("@")
                if (atloc != -1):
                    user_realm = nnKerbPrincipal[atloc:]
            if user_realm != None:
                args += " " + Machine.getHeadlessUserKeytab(
                    Config.getEnv('USER')) + " " + Config.getEnv(
                        'USER') + '@' + user_realm

        exit_code, stdout = Storm.runStormHdfsTopology(
            TARGET_HIVE_STORM_JAR,
            className,
            args,
            None,
            logoutput=True,
            inBackground=False,
            useStandaloneCmd=useStandaloneCmd)
        logger.info(exit_code)

        ruAssert("Storm", exit_code == 0,
                 "[StormHiveSubmit] %s Failed" % (tcId))
Beispiel #12
0
    def runas(cls,
              user,
              cmd,
              cwd=None,
              env=None,
              logoutput=True,
              runInBackground=False):
        runCmd = Config.get('pig', 'PIG_CMD') + " " + cmd
        # initialize env
        if not env:
            env = {}
        # get kerberos ticket
        if Hadoop.isSecure():
            if user is None:
                user = Config.getEnv('USER')
            kerbTicket = Machine.getKerberosTicket(user)
            env['KRB5CCNAME'] = kerbTicket
            user = None

        if runInBackground:
            return Machine.runinbackgroundAs(user, runCmd, cwd=cwd, env=env)
        else:
            return Machine.runas(user,
                                 runCmd,
                                 cwd=cwd,
                                 env=env,
                                 logoutput=logoutput)
Beispiel #13
0
 def run_background_job(cls,
                        runSmokeTestSetup=True,
                        config=None,
                        flagFile="/tmp/flagFile"):
     '''
     Uploads Files to HDFS before upgrade starts and runs long running sleep job in background
     :return:  number of application started
     '''
     # start long running application which performs I/O operations (BUG-23838)
     #from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode
     #UpgradePerNode.reportProgress("### Background application for HDFS started ####")
     #jobArgs = {"mapred.job.queue.name" : cls._queue}
     #HadoopJobHelper.runSleepJob(numOfMaps = 1, numOfReduce = 1, mapSleepTime = "10000000", reduceSleepTime = "100", extraJobArg = jobArgs, runInBackground = True, config = config, directoutput = False )
     #MAPRED.triggerSleepJob("1", "0", "100000", "1000000", 1, background = True)
     # load generator
     HADOOP_TEST_JAR = cls.get_hadoop_test_jar()
     TEST_USER = Config.get('hadoop', 'HADOOPQA_USER')
     HDFS.deleteDirectory(flagFile)
     slavelist = HDFS.getDatanodes()
     jobCmd = 'jar %s NNloadGenerator -Dmapred.job.queue.name=%s -mr 3 %s -root %s -numOfThreads 5 -maxDelayBetweenOps 1000 -elapsedTime 36000 -flagFile %s' % (
         HADOOP_TEST_JAR, cls._queue, cls._lgTestOutputDir,
         cls._lgTestDataDir, flagFile)
     Hadoop.runInBackground(jobCmd)
     time.sleep(15)
     return 1
Beispiel #14
0
    def ru_rollback_state(cls):
        '''
        Saved state is rolled back - upgrade is abandonded
        NOTE: this command will not return until namenode shuts down
        :return:
        '''
        logger.info("[INFO][HDFS][Upgrade] HA Journal Node Upgrade Started ")
        hdfs_user = Config.get('hadoop', 'HDFS_USER')

        nodes = []
        nodes.append(HDFS.getNamenodeByState('standby'))
        nodes.append(HDFS.getNamenodeByState('active'))
        logger.info("[INFO][HDFS][Upgrade] HA Namenode Upgrade Started")
        for node in nodes:
            HDFS.resetZkfc('stop', hosts=node.split())
            HDFS.resetNamenode('stop', host=node)

        HDFS.resetNamenode('start',
                           config=None,
                           host=nodes[0],
                           option=" -rollingUpgrade rollback")
        HDFS.resetZkfc('start', hosts=nodes[0].split())
        # lets make sure the NN is out of safemode before we proceed to the next namenode
        HDFS.waitForNNOutOfSafemode(options='-fs hdfs://%s:8020' % nodes[0])

        command = "sudo su - -c 'hadoop namenode -bootstrapStandby -force' hdfs"
        (exitcode, stdout) = Machine.runas(Machine.getAdminUser(), command,
                                           nodes[1], None, None, "True",
                                           Machine.getAdminPasswd())
        ruAssert("HDFS", exitcode == 0,
                 "hadoop namenode -bootstrapStandby -force")
        HDFS.resetNamenode('start', config=None, host=nodes[1], option="")
        HDFS.resetZkfc('start', hosts=nodes[1].split())
        # lets make sure the NN is out of safemode before we proceed to the next namenode
        HDFS.waitForNNOutOfSafemode(options='-fs hdfs://%s:8020' % nodes[1])
Beispiel #15
0
    def getZipFile(cls, version=HBase.getVersionFromBuild(), isRU=False):
        # download for linux, no download for windows
        HBASE_VER_BUILD = version

        if Machine.isWindows():
            zipFile = os.path.join(
                Config.get('slider', 'SLIDER_HOME'), "app-packages",
                "slider-hbase-app-win-package-%s.zip" % HBASE_VER_BUILD)
            return zipFile

        pkg_list = "pkg-list_qe.txt"
        path = os.path.join(Config.getEnv('ARTIFACTS_DIR'), pkg_list)
        #pkgUrl = Config.get('slider','APP_PKG_LIST')
        pkgUrl = Slider.getAppPackageBaseUrl(
            isRU) + "/slider-app-packages/" + pkg_list
        util.downloadUrl(pkgUrl, path)
        with open(path, 'r') as f:
            for line in f:
                if line.startswith("hbase_pkg_url="):
                    url = line.strip()[14:]
                    break
        zipFile = os.path.join(
            os.getcwd(), "slider-hbase-app-package-%s.zip" % HBASE_VER_BUILD)
        logger.info("downloading " + url)
        util.downloadUrl(url, zipFile)
        return zipFile
Beispiel #16
0
 def get_hadoop_test_jar(cls):
     return Machine.find(
         user=Machine.getAdminUser(),
         host='',
         filepath=Config.get('hadoop', 'MAPRED_HOME'),
         searchstr='hadoop-mapreduce-client-jobclient-*-tests.jar',
         passwd=Machine.getAdminPasswd())[0]
Beispiel #17
0
    def upgrade_slave(cls, version, node, config=None, logText="Upgrade"):
        '''
        Upgrade HDFS slave sevice: Datanode
        :param version: latestVersion.
        :param node: The node name where DN is running
        '''
        from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode
        UpgradePerNode.reportProgress(
            "[INFO][HDFS][%s] Datanode %s for node %s to version %s started " %
            (logText, logText, node, version))
        ipcPort = HDFS.getDataNodeIPCPort()
        cmd = "dfsadmin -shutdownDatanode %s:%s upgrade" % (node, ipcPort)
        exitcode, stdout = HDFS.runas(Config.get('hadoop', 'HDFS_USER'),
                                      cmd,
                                      env=None,
                                      logoutput=True,
                                      config=None,
                                      host=None,
                                      skipAuth=False)
        if exitcode != 0:
            UpgradePerNode.reportProgress(
                "[INFO][HDFS][%s] Datanode shutdownDatanode command failed for %s "
                % (logText, node))

        HDFS.waitForDNDown(node, ipcPort, "ipc.client.connect.max.retries=1")
        from beaver.component.rollingupgrade.ruCommon import hdpSelect
        hdpSelect.changeVersion("hadoop-hdfs-datanode", version, node)
        HDFS.startDatanodes(config=config, nodes=[node])
        cls.wait4DNLive(node)
        UpgradePerNode.reportProgress(
            "[INFO][HDFS][%s] Datanode %s for node %s to version %s finished "
            % (logText, logText, node, version))
Beispiel #18
0
    def ru_prepare_save_state_for_upgrade(cls):
        '''
        Prepare Namenode to save State for Upgrade
        '''
        from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode
        UpgradePerNode.reportProgress(
            "[INFO][HDFS][Prepare] Preparing state for HDFS upgrade")
        # BUG-26726: we need to be in safemode only in non HA cluster
        if not HDFS.isHAEnabled():
            exit_code, output = HDFS.runasAdmin("dfsadmin -safemode enter")
            ruAssert("HDFS", exit_code == 0,
                     '[Preparation] enter safemode failed')

        exit_code, output = HDFS.runas(
            Config.get('hadoop', 'HDFS_USER'),
            "dfsadmin -Ddfs.client.test.drop.namenode.response.number=0 -rollingUpgrade prepare"
        )
        ruAssert("HDFS", exit_code == 0,
                 '[Preparation] -rollingUpgrade prepare failed')
        if not HDFS.isHAEnabled():
            exit_code, output = HDFS.runasAdmin("dfsadmin -safemode leave")
            ruAssert("HDFS", exit_code == 0,
                     '[Preparation] leave safemode failed')
        UpgradePerNode.reportProgress(
            "[INFO][HDFS][Prepare] Preparing state for HDFS upgrade finished ")
Beispiel #19
0
 def backupDatabase(cls, database, backupfile, host=None):
     pg_cmd = "pg_dump -U%s" % Config.get(
         'machine', 'POSTGRES_ROOT_USER', default='postgres')
     if host:
         pg_cmd += " -h" + host
     pg_cmd += " %s > %s" % (database, backupfile)
     return Machine.run(pg_cmd)
Beispiel #20
0
 def getTTHostForAttemptId(cls,attemptID):
     HADOOP_JOBTRACKER_LOG = Config.get('hadoop', 'HADOOP_JOBTRACKER_LOG')
     f = open(HADOOP_JOBTRACKER_LOG,"r")
     for line in f:
         searchFor = re.search(".*" + attemptID + ".*tracker_.*/(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}).*",line)
         if searchFor != None:
             return searchFor.group(1)
Beispiel #21
0
 def runSource(cls, file_, host, database, cwd=None):
     return Machine.run(
         "psql -h%s -U%s -d%s -f%s" %
         (host,
          Config.get('machine', 'POSTGRES_ROOT_USER',
                     default='postgres'), database, file_),
         cwd=cwd)
    def log_test_properties(cls, propChanges):
        splitNumStr = str(propChanges['SPLIT_NUM'])
        ambariTestClass = str(propChanges['AMBARI_TESTSNAMES'])

        print "splitNumStr is: ", splitNumStr
        print "Ambari tests to run: ", ambariTestClass
        print "Additional properties for run :", Config.get('ambari', 'ADDITIONAL_AMBARI_PROPS')
Beispiel #23
0
 def smoke_test_setup(cls):
     '''
     Setup function for HDFS smoke test
     '''
     if not cls._SmokeInputDir:
         cls._SmokeInputDir = cls._base_hdfs_dir + "/smokeHdfsInput"
     HDFS.deleteDirectory(cls._SmokeInputDir,
                          Config.get('hadoop', 'HADOOPQA_USER'))
     jobCmd = 'jar %s randomtextwriter \"-D%s=%s\" \"-D%s=%s\" %s' % (
         Config.get('hadoop', 'HADOOP_EXAMPLES_JAR'),
         "mapreduce.randomtextwriter.totalbytes", "4096",
         "mapred.job.queue.name", cls._queue, cls._SmokeInputDir)
     exit_code, stdout = Hadoop.run(jobCmd)
     ruAssert(
         "HDFS", exit_code == 0,
         '[SmokeSetup] Randomtextwriter job failed and could not create data on hdfs'
     )
Beispiel #24
0
 def Knox_getLogDir(cls, logoutput=True):
     try:
         return Config.get('knox', 'KNOX_LOG', 'knoxlogdirnotfound')
     except Exception:
         if logoutput:
             logger.error("Exception occured during Knox_getLogDir() call")
             logger.error(traceback.format_exc())
         return None
Beispiel #25
0
 def getOozieLogDir(cls, logoutput=False):
     '''
     Returns oozie log dir.
     '''
     returnValue = Config.get("oozie", "OOZIE_LOG_DIR")
     if logoutput:
         logger.info("Oozie.getOozieLogDir returns %s" % returnValue)
     return returnValue
Beispiel #26
0
 def print_environment_details(self):
     UpgradeLogger.reportProgress("=====Environment Details=====\n ", True)
     is_hdp = self.STACK_TYPE.lower() == "hdp"
     UpgradeLogger.reportProgress(
         "Ambari URL : " + Ambari.getWebUrl(is_hdp=is_hdp), True)
     UpgradeLogger.reportProgress("Ambari OS : " + Ambari.getOS(), True)
     UpgradeLogger.reportProgress(
         "Stack Type : " + Config.get('ambari', 'STACK_TYPE'), True)
     UpgradeLogger.reportProgress(
         "Ambari DB : " + Config.get('ambari', 'AMBARI_DB'), True)
     UpgradeLogger.reportProgress(
         "Kerberos : " + Config.get('machine', 'IS_SECURE'), True)
     UpgradeLogger.reportProgress(
         "HA : " + Config.get('ambari', 'IS_HA_TEST'), True)
     UpgradeLogger.reportProgress(
         "Wire Encryption : " + Config.get('ambari', 'WIRE_ENCRYPTION'),
         True)
Beispiel #27
0
 def runAvroClient(cls, host, port, conffile):
     flume_conf = Config.get('flume-ng', 'FLUME_CONF')
     cmd = "avro-client -H %s -p %s" % (host, port)
     if Machine.type() == 'Linux':
         cmd += " -c %s -F %s" % (flume_conf, conffile)
     else:
         cmd += " -conf %s -filename %s" % (flume_conf, conffile)
     return cls.run(cmd)
Beispiel #28
0
 def upgradeSchema(cls):
     hiveHome = cls.getHiveHome(service="metastore")
     hiveUser = Config.get('hive', 'HIVE_USER')
     hiveHost = cls.getHiveHost()
     dbflavor = cls.getDatabaseFlavor()
     if dbflavor.startswith("postgres"): dbflavor = "postgres"
     upCmd = os.path.join(hiveHome, "bin", "hive") + " --service schemaTool -dbType %s -upgradeSchema" % dbflavor
     return Machine.runas(hiveUser, upCmd, host=hiveHost)
Beispiel #29
0
def setupMergeScaleDataset(LOCAL_DIR):
    # change timezone on test machines
    Machine.resetTimeZoneOnCluster()

    # Download the TPCH dataset if not there
    tpch_data_dir = os.path.join(LOCAL_DIR, "data")
    TPCH_DATA_TGZ = os.path.join(LOCAL_DIR, "tpch_data.tgz")
    if not os.path.isfile(TPCH_DATA_TGZ):
        assert util.downloadUrl(Config.get('hive', 'TPCH_DNLD_URL'), TPCH_DATA_TGZ)
        Machine.tarExtractAll(TPCH_DATA_TGZ, LOCAL_DIR)

    # Load the tables in Hive
    HADOOPQA_USER = Config.get("hadoop", 'HADOOPQA_USER')
    HDFS.createDirectory("/tmp/tpch", user=HADOOPQA_USER, perm='777', force=True)
    HDFS.copyFromLocal(tpch_data_dir, "/tmp/tpch", user=HADOOPQA_USER)
    HDFS.chmod(None, 777, "/tmp/tpch", recursive=True)
    exit_code, stdout, stderr = Hive.runQueryOnBeeline(
        os.path.join(LOCAL_DIR, "ddl", "merge-tpch-tablesetup.sql"),
        hivevar={'HDFS_LOCATION': '/tmp/tpch/data'},
        logoutput=True,
        queryIsFile=True
    )
    assert exit_code == 0, "Failed to populate the TPCH data in Hive"

    # Download TPCH staging data
    tpch_stage_dir = os.path.join(LOCAL_DIR, "tpch_newdata_5G")
    TPCH_STAGE_TGZ = os.path.join(LOCAL_DIR, "tpch_newdata_5G.tgz")
    if not os.path.isfile(TPCH_STAGE_TGZ):
        assert util.downloadUrl(Config.get('hive', 'TPCH_NEWDATA_5G_DNLD_URL'), TPCH_STAGE_TGZ)
        Machine.tarExtractAll(TPCH_STAGE_TGZ, LOCAL_DIR)

    # Load the staged tables in Hive
    HDFS.createDirectory(
        "/tmp/lineitem_stage /tmp/orders_stage /tmp/delete_stage", user=HADOOPQA_USER, perm='777', force=True
    )
    HDFS.copyFromLocal(os.path.join(tpch_stage_dir, "lineitem*"), "/tmp/lineitem_stage", HADOOPQA_USER)
    HDFS.copyFromLocal(os.path.join(tpch_stage_dir, "order*"), "/tmp/orders_stage", HADOOPQA_USER)
    HDFS.copyFromLocal(os.path.join(tpch_stage_dir, "delete*"), "/tmp/delete_stage", HADOOPQA_USER)
    HDFS.chmod(None, 777, "/tmp/lineitem_stage /tmp/orders_stage /tmp/delete_stage", recursive=True)
    exit_code, stdout, stderr = Hive.runQueryOnBeeline(
        os.path.join(LOCAL_DIR, "ddl", "merge-staged-tpch-tablesetup.sql"),
        hivevar={'HDFS_LOCATION': '/tmp'},
        logoutput=True,
        queryIsFile=True
    )
    assert exit_code == 0, "Failed to populate the TPCH staging data in Hive"
Beispiel #30
0
 def getTempletonLogDir(cls, logoutput=False):
     '''
   Returns Templeton log directory (String).
   '''
     returnValue = Config.get('templeton', 'TEMPLETON_LOG_DIR', default='')
     if logoutput:
         logger.info("Hcatalog.getTempletonLogDir returns %s" % returnValue)
     return returnValue
Beispiel #31
0
def getLLAPDaemonPidsHosts():
    hosts = []
    llapdaemon_pids = []
    if not Machine.isHumboldt(): nodes = Hadoop.getAllNodes()
    else: nodes = HDFS.getDatanodes()
    for node in nodes:
        pids = Machine.getProcessListRemote(
            node, format="%U %p %P %a", filter="org.apache.hadoop.hive.llap.daemon.impl.LlapDaemon", logoutput=True
        )
        if pids:
            if Hadoop.isSecure():
                pid = Machine.getPidFromString(pids[0], Config.get('hive', 'HIVE_USER'))
            else:
                pid = Machine.getPidFromString(pids[0], Config.get('hadoop', 'YARN_USER'))
            llapdaemon_pids.append(pid)
            hosts.append(node)
    return llapdaemon_pids, hosts
Beispiel #32
0
 def runAsRoot(cls, cmd, host="", database=""):
     passwd = Config.get('machine', 'ORACLE_ROOT_PASSWD',
                         default='root').strip()
     return cls.runJdbcCmd(cmd,
                           user="******",
                           passwd=passwd,
                           host=host,
                           database=database)
Beispiel #33
0
 def runCmd(cls, cmd, host=None, database=None, user=None, passwd=None):
     if not user:
         user = Config.get('machine',
                           'POSTGRES_ROOT_USER',
                           default='postgres')
     psql_cmd = Machine.echocmd(cmd) + " | psql -U%s" % user
     if host:
         psql_cmd += " -h" + host
     if database:
         psql_cmd += " -d" + database
     if not passwd:
         passwd = Config.get('machine', 'POSTGRES_ROOT_PASSWD',
                             default='').strip()
     env = {}
     if passwd != "":
         env['PGPASSWORD'] = passwd
     return Machine.run(psql_cmd, env=env)
Beispiel #34
0
 def triggerSleepJob(cls,numOfMaps,numOfReduce,mapsleeptime,reducesleeptime,numOfJobs,queue='',background=False):
     jobCounter = 0
     while (jobCounter < numOfJobs):
         sleepCmd = " jar " + Config.get('hadoop', 'HADOOP_EXAMPLES_JAR') + " sleep " + queue + " -m " + numOfMaps + " -r " + numOfReduce + " -mt " + mapsleeptime +" -rt " + reducesleeptime
         if background:
             Hadoop.runInBackground(sleepCmd)
         else:
             Hadoop.run(sleepCmd)                
         jobCounter = jobCounter + 1
def generateTestReportConf(infile, outfile, results):
    config = ConfigParser()
    config.optionxform=str
    config.read(infile)
    if config.has_section(SECTION):
        for option, value in config.items(SECTION):
            if value != "": continue
            elif option == "BUILD_ID" and config.has_option(SECTION, "REPO_URL"):
                config.set(SECTION, option, getBuildId(config.get(SECTION, "REPO_URL")))
                config.remove_option(SECTION, "REPO_URL")
            elif option == "HOSTNAME":
                config.set(SECTION, option, socket.getfqdn())
            elif option == "COMPONENT_VERSION":
                if not config.has_option(SECTION, "COMPONENT") or config.get(SECTION, "COMPONENT") == "":
                    config.set(SECTION, "COMPONENT", "Hadoop")
                config.set(SECTION, option, getComponentVersion(config.get(SECTION, "COMPONENT")))
            elif option == "OS":
                config.set(SECTION, option, platform.platform())
            elif option == "SECURE" and Config.hasOption('hadoop', 'IS_SECURE'):
                config.set(SECTION, option, Config.get('hadoop', 'IS_SECURE').lower())
            elif option == "BLOB":
                pass
            elif option == "RAN":
                config.set(SECTION, option, results[0] + len(results[1]))
            elif option == "PASS":
                config.set(SECTION, option, results[0])
            elif option == "FAIL":
                config.set(SECTION, option, len(results[1]))
            elif option == "SKIPPED":
                config.set(SECTION, option, results[2])
            elif option == "ABORTED":
                config.set(SECTION, option, results[3])
            elif option == "FAILED_TESTS":
                config.set(SECTION, option, ",".join(results[1]))
            elif option == "SINGLE_NODE":
                from beaver.component.hadoop import HDFS
                if HDFS.getDatanodeCount() > 1:
                    config.set(SECTION, option, "false")
                else:
                    config.set(SECTION, option, "true")
        config.write(open(outfile, 'w'))
Beispiel #36
0
 def runAdminReport(cls):
     exit_code, output = Hadoop.runas(Config.get('hadoop', 'HDFS_USER'), "dfsadmin -report")
     return output
Beispiel #37
0
 def refreshDatanodes(cls):
     exit_code, output = Hadoop.runas(Config.get('hadoop', 'HDFS_USER'), "dfsadmin -refreshNodes")
     return output
Beispiel #38
0
 def exitSafemode(cls):
     exit_code, output = Hadoop.runas(Config.get('hadoop', 'HDFS_USER'), "dfsadmin -safemode leave")
     if re.search(".*Safe mode is OFF",output) != None:
         return True
     else:
         return False        
Beispiel #39
0
 def waitForNNOutOfSafemode(cls):
     exit_code, output = Hadoop.runas(Config.get('hadoop', 'HDFS_USER'), "dfsadmin -safemode get")
     while (re.search(".*Safe mode is ON",output) != None):
         exit_code, output = Hadoop.runas(Config.get('hadoop', 'HDFS_USER'), "dfsadmin -safemode get")
         time.sleep(20)
     return True
#Get user from config file
HADOOPQA_USER = CommonHadoopEnv.getHadoopQAUser()
HDFS_USER = CommonHadoopEnv.getHDFSUser()
MAPRED_USER = CommonHadoopEnv.getMapredUser()

SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__))
DATA_PATH = os.path.join(SCRIPT_PATH, "data")
CREATE_FILE = "CreateFile.py"
CREATE_FILE_PATH_IN_HADOOP = '/user/' + HADOOPQA_USER + '/' + CREATE_FILE
CREATE_FILE_PATH_IN_LOCAL = os.path.join(SCRIPT_PATH, "data", CREATE_FILE)
CREATE_FILE_2 = "CreateFile2.py"
CREATE_FILE_2_PATH_IN_HADOOP = '/user/' + HADOOPQA_USER + '/' + CREATE_FILE_2
CREATE_FILE_2_PATH_IN_LOCAL = os.path.join(SCRIPT_PATH, "data", CREATE_FILE_2)
OUT_PATH_IN_HADOOP = '/user/' + HADOOPQA_USER + '/out1'
HADOOP_STREAMING_JAR = Config.get('hadoop', 'HADOOP_STREAMING_JAR')

logger = logging.getLogger(__name__)
  
def validateJobId(jobId):
    jobId = jobId + ""
    return jobId.startswith('job_')

def getLocalDirInfo(host):            
    return util.getPropertyValueFromConfigXMLFile(os.path.join(Config.get('hadoop', 'HADOOP_CONF'), "mapred-site.xml"), "mapred.local.dir")

def checkJobCreatedTempFileInTT(logFileDir, currentUser, currentJobId, currentAttemptId, logfile, taskTrackerHost):      
    pathFile = os.path.join(logFileDir, 'taskTracker', currentUser, 'jobcache', currentJobId, currentAttemptId, 'work', logfile)
    logger.info("path file: " + pathFile)
    result = False
    if platform.system() == 'Windows':
 def getHadoopExamplesJar(cls):
     return Config.get('hadoop', 'HADOOP_EXAMPLES_JAR')
Beispiel #42
0
 def refreshTasktrackers(cls):
     exit_code, output = Hadoop.runas(Config.get('hadoop', 'HDFS_USER'), "mradmin -refreshNodes")
     return output
Beispiel #43
0
 def getTasktrackers(cls):
     slaveFile = os.path.join(Config.get('hadoop', 'HADOOP_CONF'),"slaves")
     f = open(slaveFile, "r")
     tasktrackers = f.readlines()
     return tasktrackers
Beispiel #44
0
 def run(cls, cmd, logoutput=True):
     return Machine.run(Config.get('hbase', 'HBASE_CMD') + " " + cmd, logoutput=logoutput)
Beispiel #45
0
 def runInBackgroundAs(cls, user, cmd):
     hadoop_cmd = Config.get('hadoop', 'HADOOP_CMD')
     if Config.get('hadoop', 'HADOOP_CONF_EXCLUDE') == 'False':
         hadoop_cmd += " --config " + Config.get('hadoop', 'HADOOP_CONF')
     hadoop_cmd += " " + cmd
     return Machine.runinbackgroundAs(user, hadoop_cmd)
 def getHadoopQAUser(cls):
     return Config.get('hadoop', 'HADOOPQA_USER')
 def getCluster(cls):
     return Config.get('hadoop', 'CLUSTER') 
 def getMapredUser(cls):
     return Config.get('hadoop', 'MAPRED_USER')
 def getHDFSUser(cls):
     return Config.get('hadoop', 'HDFS_USER')
 def getHadoopConfDir(cls):
     return Config.get('hadoop', 'HADOOP_CONF')
def getLocalDirInfo(host):            
    return util.getPropertyValueFromConfigXMLFile(os.path.join(Config.get('hadoop', 'HADOOP_CONF'), "mapred-site.xml"), "mapred.local.dir")
Beispiel #52
0
 def runAgent(cls, name, conffile, user=None, cwd=None, env=None):
     flume_conf = Config.get('flume-ng', 'FLUME_CONF')
     cmd = "agent -n %s -c %s -f %s" % (name, flume_conf, conffile)
     return cls.runInBackgroundAs(user, cmd, cwd=cwd, env=env)
Beispiel #53
0
 def getConfigValue(cls, propertyValue, defaultValue=None):
     return util.getPropertyValueFromConfigXMLFile(os.path.join(Config.get('hadoop', 'HADOOP_CONF'), "core-site.xml"), propertyValue, defaultValue=defaultValue)
Beispiel #54
0
 def runInBackgroundAs(cls, user, cmd, cwd=None, env=None):
     flume_cmd = Config.get('flume-ng', 'FLUME_CMD')
     flume_cmd += " " + cmd
     return Machine.runinbackgroundAs(user, flume_cmd, cwd=cwd, env=env)
Beispiel #55
0
 def getDatanodes(cls):
     slaveFile = os.path.join(Config.get('hadoop', 'HADOOP_CONF'),"slaves")
     f = open(slaveFile, "r")
     datanodes = f.readlines()
     return datanodes
Beispiel #56
0
 def runas(cls, user, cmd, cwd=None, env=None, logoutput=True):
     flume_cmd = Config.get('flume-ng', 'FLUME_CMD')
     flume_cmd += " " + cmd
     return Machine.runas(user, flume_cmd, cwd=cwd, env=env, logoutput=logoutput)
Beispiel #57
0
 def runas(cls, user, cmd, logoutput=True):
     hadoop_cmd = Config.get('hadoop', 'HADOOP_CMD')
     if Config.get('hadoop', 'HADOOP_CONF_EXCLUDE') == 'False':
         hadoop_cmd += " --config " + Config.get('hadoop', 'HADOOP_CONF')
     hadoop_cmd += " " + cmd
     return Machine.runas(user, hadoop_cmd, logoutput=logoutput)
 def getVersion(cls):
     return Config.get('hadoop', 'VERSION')