Example #1
0
 def run_background_job(cls,
                        runSmokeTestSetup=True,
                        config=None,
                        flagFile="/tmp/flagFile"):
     '''
     Uploads Files to HDFS before upgrade starts and runs long running sleep job in background
     :return:  number of application started
     '''
     # start long running application which performs I/O operations (BUG-23838)
     #from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode
     #UpgradePerNode.reportProgress("### Background application for HDFS started ####")
     #jobArgs = {"mapred.job.queue.name" : cls._queue}
     #HadoopJobHelper.runSleepJob(numOfMaps = 1, numOfReduce = 1, mapSleepTime = "10000000", reduceSleepTime = "100", extraJobArg = jobArgs, runInBackground = True, config = config, directoutput = False )
     #MAPRED.triggerSleepJob("1", "0", "100000", "1000000", 1, background = True)
     # load generator
     HADOOP_TEST_JAR = cls.get_hadoop_test_jar()
     TEST_USER = Config.get('hadoop', 'HADOOPQA_USER')
     HDFS.deleteDirectory(flagFile)
     slavelist = HDFS.getDatanodes()
     jobCmd = 'jar %s NNloadGenerator -Dmapred.job.queue.name=%s -mr 3 %s -root %s -numOfThreads 5 -maxDelayBetweenOps 1000 -elapsedTime 36000 -flagFile %s' % (
         HADOOP_TEST_JAR, cls._queue, cls._lgTestOutputDir,
         cls._lgTestDataDir, flagFile)
     Hadoop.runInBackground(jobCmd)
     time.sleep(15)
     return 1
Example #2
0
    def updateJobProperties(cls,
                            propFile,
                            properties=None,
                            haEnabled=False,
                            debug=False):
        fileSystemName = Hadoop.getFSDefaultValue()
        jobTrackerIP = MAPRED.getJobtrackerAddress()
        jobTracker = jobTrackerIP[0] + ":" + jobTrackerIP[1]

        if not properties:
            properties = {}
        if not properties.has_key('nameNode'):
            properties['nameNode'] = fileSystemName
        if not properties.has_key('jobTracker'):
            properties['jobTracker'] = jobTracker

        if "hcatalog" in propFile:
            if Hadoop.isSecure():
                kerberosPrincipal = Hive.getConfigValue(
                    "hive.metastore.kerberos.principal")
                properties[
                    'hive.metastore.kerberos.principal'] = kerberosPrincipal

            logger.info("Updating for hcatalog workflow")
            hcatNode = Hive.getConfigValue("hive.metastore.uris").replace(
                'thrift', 'hcat')
            logger.info("Hcat node is " + hcatNode)
            properties['hcatNode'] = hcatNode

        if Hadoop.isSecure():
            # determine the namenode and the jobtracker principal
            nnPrincipal = None
            if haEnabled:
                nnPrincipal = HDFS.getNameNodePrincipal().replace(
                    '_HOST', HDFS.getNamenodeByState('active'))
            else:
                nnPrincipal = HDFS.getNameNodePrincipal().replace(
                    '_HOST',
                    HDFS.getNamenodeHttpAddress()[0])
            jtPrincipal = MAPRED.getMasterPrincipal().replace(
                '_HOST', jobTrackerIP[0])
            properties['dfs.namenode.kerberos.principal'] = nnPrincipal
            properties['mapreduce.jobtracker.kerberos.principal'] = jtPrincipal

        wfPath = util.getPropertyValueFromFile(propFile,
                                               "oozie.wf.application.path")
        if wfPath != None and wfPath.find("hdfs://localhost:9000") != -1:
            wfPath = wfPath.replace("hdfs://localhost:9000", fileSystemName)
            logger.info("Value of replaced oozie.wf.application.path is " +
                        wfPath)
            properties['oozie.wf.application.path'] = wfPath

        util.writePropertiesToFile(propFile, propFile, properties)

        if debug:
            logger.info('Content of properties file %s' % propFile)
            f = open(propFile, 'r')
            # print the file to the console
            logger.info(f.read())
            f.close()
Example #3
0
    def background_job_setup(cls, runSmokeTestSetup=True, config=None):
        '''
        Upload Data to HDFS before Upgrade starts
        Creates /user/hrt_qa/test_rollingupgrade dir on HDFS
        Upload 20 files to /user/hrt_qa/test_rollingupgrade
        '''
        if not cls._base_hdfs_dir:
            cls._base_hdfs_dir = '/user/%s/test_rollingupgrade' % Config.get(
                'hadoop', 'HADOOPQA_USER')
        exit_code, stdout = HDFS.createDirectory(cls._base_hdfs_dir,
                                                 force=True)
        ruAssert("HDFS", exit_code == 0,
                 '[BGJobSetup] could not create dir on hdfs.')
        LOCAL_WORK_DIR = os.path.join(Config.getEnv('ARTIFACTS_DIR'),
                                      'HDFS_RU_TEST')
        localTestWorkDir1 = os.path.join(LOCAL_WORK_DIR, "Temp_data")
        HadoopJobHelper.runCustomWordWriter(LOCAL_WORK_DIR, localTestWorkDir1,
                                            20, 40, 1000)
        HDFS.copyFromLocal(os.path.join(localTestWorkDir1, "*"),
                           cls._base_hdfs_dir)

        # set up for loadGenerator
        cls._lgTestDataDir = cls._base_hdfs_dir + '/testData'
        cls._lgTestOutputDir = cls._base_hdfs_dir + '/lg_job'
        cls._lgStructureDir = Machine.getTempDir() + "/structure"
        # test dir setup
        HDFS.deleteDirectory(cls._lgTestDataDir)
        HDFS.deleteDirectory(cls._lgTestOutputDir)
        command = "rm -rf " + cls._lgStructureDir
        exit_code, stdout = Machine.runas(Machine.getAdminUser(), command,
                                          None, None, None, "True",
                                          Machine.getAdminPasswd())
        command = "mkdir " + cls._lgStructureDir
        stdout = Machine.runas(None, command, None, None, None, "True", None)
        Machine.chmod("777", cls._lgStructureDir, "True",
                      Machine.getAdminUser(), None, Machine.getAdminPasswd())

        HADOOP_TEST_JAR = cls.get_hadoop_test_jar()
        TEST_USER = Config.get('hadoop', 'HADOOPQA_USER')
        # structure generator
        jobCmd = 'jar %s NNstructureGenerator -maxDepth 5 -minWidth 2 -maxWidth 5 -numOfFiles 100 -avgFileSize 3 -outDir %s' % (
            HADOOP_TEST_JAR, cls._lgStructureDir)
        exit_code, stdout = Hadoop.run(jobCmd)
        ruAssert("HDFS", exit_code == 0,
                 "[BGJobSetup] StructureGenerator failed")
        # data generator
        jobCmd = 'jar %s NNdataGenerator -inDir %s -root %s' % (
            HADOOP_TEST_JAR, cls._lgStructureDir, cls._lgTestDataDir)
        exit_code, stdout = Hadoop.run(jobCmd)
        ruAssert("HDFS", exit_code == 0, "[BGJobSetup] DataGenerator failed")

        if runSmokeTestSetup:
            logger.info("**** Running HDFS Smoke Test Setup ****")
            cls.smoke_test_setup()
Example #4
0
    def getComponnetsToTest(cls, compFile, depFile):
        '''
        Get the components that are being tested according to depFile
        '''
        # read in the config file
        conf = RuSetup.readJson(compFile)
        isStandalone = conf[RuSetup.CONF_STANDALONE]
        RuSetup._skipQueue = set(conf[RuSetup.CONF_SKIP_QUEUE])
        RuSetup._defaultQueue = conf[RuSetup.CONF_DEFAULT_QUEUE]
        returnSet = None
        if isStandalone:
            # get the components to test
            returnSet = set(conf[RuSetup.CONF_COMPONENTS_TEST])
        else:
            returnSet = set(RuSetup.getComponentsAffected(compFile, depFile))

        # skip tests according to cluster settings
        if not HDFS.isHAEnabled():
            logger.info("Skip HDFS since HA is not enabled")
            returnSet.discard("hdfs")

        # as discussed in Ru standup for 11/13, enabling storm-slider for non HA cluster and storm standalone for HA cluster
        if YARN.isHAEnabled():
            returnSet.discard("storm-slider")
        else:
            returnSet.discard("storm")

        if Hadoop.isEncrypted():
            returnSet.discard("knox")
            returnSet.discard("falcon")

        if Hadoop.isTez():
            logger.info("Add tez since Hadoop.isTez()")
            returnSet.add("tez")
        else:
            logger.info(
                "Make sure tez is not in the list since Hadoop.isTez() is false"
            )
            returnSet.discard("tez")
        # Note: component.xa is always available, even if xa is not installed
        # So this line should work even if the cluster does not have xa installed
        from beaver.component.xa import Xa
        if Xa.isArgusInstalled():
            logger.info("Add argus since argus is there")
            returnSet.add("argus")
        else:
            logger.info(
                "Make sure argus is not in the list since it's not available")
            returnSet.discard("argus")

        return list(returnSet)
    def restartRemoteActiveNN(cls, wait=10, host=None):

        if host == None:
            host = Config.get("falcon", "HOST2")

        active_namenode = cls.getActiveNN(host, "NAMENODE")
        logger.info("Current Active NameNode Before Killing %s" %
                    active_namenode)
        Hadoop.killService('namenode', hdfs_user, active_namenode)
        cls.stopComponent(host, "NAMENODE", active_namenode)
        time.sleep(wait)
        cls.startComponent(host, "NAMENODE", active_namenode)
        logger.info("Active NameNode After Restart %s" % active_namenode)
        return
def getAttemptIdsForJobIdAndStoreInFile(jobId, myTask="map"):
    artifactsDir = CommonHadoopEnv.getArtifactsDir()
    saveFilePath = os.path.join(artifactsDir,"AttemptIdFile")  
    listAttemptCmd = " job -list-attempt-ids "+ jobId +" "+ myTask + " running " 
    out=Hadoop.run(listAttemptCmd)
    buf = StringIO.StringIO(out[1])    
    util.writeToFile(out[1],saveFilePath)
Example #7
0
    def submit_storm_hive_topology(cls, tcId, className, args,
                                   useStandaloneCmd):
        if Hadoop.isSecure():
            if Config.hasOption('machine', 'USER_REALM'):
                user_realm = Config.get('machine', 'USER_REALM', '')
            else:
                nnKerbPrincipal = HDFS.getNameNodePrincipal(defaultValue='')
                atloc = nnKerbPrincipal.find("@")
                if (atloc != -1):
                    user_realm = nnKerbPrincipal[atloc:]
            if user_realm != None:
                args += " " + Machine.getHeadlessUserKeytab(
                    Config.getEnv('USER')) + " " + Config.getEnv(
                        'USER') + '@' + user_realm

        exit_code, stdout = Storm.runStormHdfsTopology(
            TARGET_HIVE_STORM_JAR,
            className,
            args,
            None,
            logoutput=True,
            inBackground=False,
            useStandaloneCmd=useStandaloneCmd)
        logger.info(exit_code)

        ruAssert("Storm", exit_code == 0,
                 "[StormHiveSubmit] %s Failed" % (tcId))
Example #8
0
 def _postStopAction(cls, service):
     if service == 'hiveserver2':
         logger.info("Hard kill Tez sessions")
         yarn_user = YARN.getYarnUser()
         apps = YARN.getApplicationIDList(state='NEW,NEW_SAVING,SUBMITTED,ACCEPTED,RUNNING')
         if len(apps) > 0:
             for app in apps:
                 YARN.killApplicationAs(app, user=yarn_user)
                 time.sleep(5)
         logger.info("Hard kill the HS2 application if still running")
         admin_user = Machine.getAdminUser()
         hosts = cls.getServiceHosts(service)
         port = cls.getHiveserver2ThriftPort()
         for host in hosts:
             pid = Machine.getPIDByPort(port, host=host, user=admin_user)
             if pid:
                 logger.info("Found process for '%s' with PID %d" % (service, pid))
                 Machine.killProcessRemote(pid, host=host, user=admin_user)
                 time.sleep(2)
         logger.info("Hard Kill proc_llap daemon due to BUG-62657")
         allnodes = util.getAllNodes() if Machine.isHumboldt() else Hadoop.getAllNodes()
         for node in allnodes:
             proc_llap_pids = Machine.getProcessListRemote(
                 node, format="%U %p %P %a", filter="proc_llap", logoutput=True
             )
             if len(proc_llap_pids) != 0:
                 proc_llap_pid = Machine.getPidFromString(proc_llap_pids[0], yarn_user)
                 if proc_llap_pid:
                     logger.info("Found proc_llap process with PID %d on %s" % (proc_llap_pid, node))
                     Machine.killProcessRemote(proc_llap_pid, host=node, user=admin_user)
                     time.sleep(2)
Example #9
0
    def getHSIInstances(cls):
        """
    Returns the list of active and passive hive server interactive instances.
    """

        hsi_dict = {'active': [], 'passive': []}
        zkhosts = Zookeeper.getZKHosts()
        if Hadoop.isSecure():
            zNode = 'hs2ActivePassiveHA-sasl'
        else:
            zNode = 'hs2ActivePassiveHA-unsecure'
        exit_code, stdout = Zookeeper.runZKCli("ls /%s/instances" % zNode, server=zkhosts[0])
        instances = stdout.split('\n')[-1]
        logger.info(instances)
        if instances == '[]':
            return hsi_dict
        for each_bracket in ['[', ']']:
            instances = instances.replace(each_bracket, '')
        instances_list = instances.split(', ')
        for each_instance in instances_list:
            exit_code, out = Zookeeper.runZKCli("get /%s/instances/%s" % (zNode, each_instance), server=zkhosts[0])
            for line in out.split("\n"):
                match = re.search(".*JSONServiceRecord.*", line)
                if match:
                    json_data = line
                    break
            instance_dict = ast.literal_eval(json_data)
            instance_host = instance_dict['hive.server2.thrift.bind.host']
            if instance_dict['internal'][0]['api'] == 'passiveEndpoint':
                hsi_dict['passive'].append(instance_host)
            else:
                hsi_dict['active'].append(instance_host)
        logger.info(hsi_dict)
        return hsi_dict
Example #10
0
def getClasspathForJdbcClient():
    jdbc_standalone_jar = getStandaloneHiveJdbcJar()
    assert jdbc_standalone_jar, "No JDBC standalone jar found"
    classpath = [jdbc_standalone_jar]
    hadoop_home = Config.get('hadoop', 'HADOOP_HOME')
    if Hadoop.isHadoop2() and Machine.type() == 'Windows':
        hadoop_home = os.path.join(hadoop_home, "share", "hadoop", "common")
    hadoop_common_jar = util.findMatchingFiles(hadoop_home, "hadoop-common-*[!(tests)].jar", depth=1)
    assert len(hadoop_common_jar) > 0, "No hadoop-common.jar found"
    classpath.append(hadoop_common_jar[0])
    if Hadoop.isSecure():
        hadoop_auth_jar = util.findMatchingFiles(hadoop_home, "hadoop-auth-*[!(tests)].jar", depth=1)
        assert len(hadoop_auth_jar) > 0, "No hadoop-auth.jar found"
        classpath.append(hadoop_auth_jar[0])
    classpath.append(Config.get('hadoop', 'HADOOP_CONF'))
    return (os.pathsep).join(classpath)
    def __init__(self, host='localhost', port=None, isHttps=False):
        if port is None:
            if (Hadoop.isEncrypted() or Ambari.is_ambari_encrypted() and Machine.isHumboldt() == False):
                port = 8443
                isHttps = True
            else:
                port = 8080
        if isHttps or self.isCloudbreak():
            self.baseUrl = 'https://' + host
        else:
            self.baseUrl = 'http://' + host

        if self.isCloudbreak():
            self.baseUrl = self.baseUrl + '/ambari'
        else:
            self.baseUrl = self.baseUrl + ':' + str(port)

        if Machine.isHumboldt():
            self.username_password = Config.get('ambari', 'AMBARI_USERNAME', 'admin') + ':HdpCli123!'
            ambari_gateway = Config.get('machine', 'GATEWAY').replace("-ssh", "")
            self.baseUrl = 'https://%s' % ambari_gateway
        elif Machine.getInstaller() == 'cloudbreak':
            self.username_password = Config.get('ambari', 'AMBARI_USERNAME', 'admin') + ':cloudbreak1'
        else:
            self.username_password = Config.get('ambari', 'AMBARI_USERNAME', 'admin'
                                                ) + ':' + Config.get('ambari', 'AMBARI_PASSWORD', 'admin')

        self.urlLogin = self.baseUrl + '#/login'
        self.urlGetClusters = self.baseUrl + '/api/v1/clusters'
        self.urlGetAmbClusters = self.baseUrl + '/api/v1/services'
        self.urlConfig = '/configurations'
        self.backupDataJson = dict()
        self.logger = logging.getLogger(__name__)
Example #12
0
 def run_client_smoketest(cls, config=None, env=None):
     '''
     Run wordcount Job passing env variables
     :param config: Configuration location
     :param env: Set Environment variables
     '''
     logger.info("**** Running HDFS CLI Test ****")
     from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode
     UpgradePerNode.reportProgress(
         "[INFO][HDFS][ClientSmoke] CLI test for HDFS started ")
     if not cls._SmokeInputDir:
         cls._SmokeInputDir = cls._base_hdfs_dir + "/smokeHdfsInput"
     SmokeOutputDir = cls._base_hdfs_dir + '/smokeHdfsOutput_cli'
     HDFS.deleteDirectory(SmokeOutputDir,
                          Config.get('hadoop', 'HADOOPQA_USER'))
     jobCmd = 'jar %s wordcount \"-Dmapreduce.reduce.input.limit=-1\" \"-D%s=%s\" %s %s' % (
         Config.get('hadoop',
                    'HADOOP_EXAMPLES_JAR'), "mapred.job.queue.name",
         cls._queue, cls._SmokeInputDir, SmokeOutputDir)
     exit_code, stdout = Hadoop.run(jobCmd, env=env)
     ruAssert("HDFS", exit_code == 0, "[ClientSmoke] Hdfs smoketest failed")
     HDFS.deleteDirectory(SmokeOutputDir)
     ruAssert("HDFS", exit_code == 0,
              "[ClientSmoke] could not delete: " + SmokeOutputDir)
     UpgradePerNode.reportProgress(
         "[INFO][HDFS][ClientSmoke] CLI test for HDFS Finished ")
Example #13
0
    def runas(cls,
              user,
              cmd,
              cwd=None,
              env=None,
              logoutput=True,
              runInBackground=False):
        runCmd = Config.get('pig', 'PIG_CMD') + " " + cmd
        # initialize env
        if not env:
            env = {}
        # get kerberos ticket
        if Hadoop.isSecure():
            if user is None:
                user = Config.getEnv('USER')
            kerbTicket = Machine.getKerberosTicket(user)
            env['KRB5CCNAME'] = kerbTicket
            user = None

        if runInBackground:
            return Machine.runinbackgroundAs(user, runCmd, cwd=cwd, env=env)
        else:
            return Machine.runas(user,
                                 runCmd,
                                 cwd=cwd,
                                 env=env,
                                 logoutput=logoutput)
Example #14
0
 def share_lib_update(cls, fs=None):
     # only need to do this on windows
     oozie_server = cls.getOozieUrl()
     cmd = "  admin -oozie " + oozie_server + "  -sharelibupdate"
     if Hadoop.isSecure():
         cmd = " -Doozie.auth.token.cache=false " + cmd
     return cls.runas(cls._oozie_user, cmd)
 def getIsSecure(cls):
     _security_prop_value = Hadoop.getConfigValue("hadoop.security.authentication", "kerberos") 
     isSecure = True
     if not _security_prop_value  == "kerberos":
         isSecure =False
     
     return isSecure
Example #16
0
 def setup(cls, S3_AWS_ACCESS_KEY=None, S3_AWS_SECRET=None):
     Machine.installPackageWithPip(packages="awscli",
                                   hosts=Hadoop.getAllNodes(),
                                   logoutput=True)
     aws_home = "/root/.aws"
     if not os.path.exists(aws_home):
         Machine.makedirs(ADMIN_USER, GATEWAY_NODE, "/root/.aws", ADMIN_PWD)
         util.writeToFile("[default]\nregion = us-west-2\noutput=json",
                          os.path.join(ARTIFACTS_DIR, "config"))
         if S3_AWS_ACCESS_KEY:
             cls._aws_access_key_id = S3_AWS_ACCESS_KEY
         else:
             cls._aws_access_key_id = Config.get('machine',
                                                 'S3_AWS_ACCESS_KEY')
         if S3_AWS_SECRET:
             cls._aws_secret_access_key = S3_AWS_SECRET
         else:
             cls._aws_secret_access_key = Config.get(
                 'machine', 'S3_AWS_SECRET')
         util.writeToFile(
             "[default]\naws_access_key_id = %s\naws_secret_access_key = %s"
             % (cls._aws_access_key_id, cls._aws_secret_access_key),
             os.path.join(ARTIFACTS_DIR, "credentials"))
         Machine.runas(
             ADMIN_USER,
             "chown  %s '%s/config'" % (ADMIN_USER, ARTIFACTS_DIR),
             GATEWAY_NODE, ADMIN_PWD)
         Machine.runas(
             ADMIN_USER,
             "chown  %s '%s/credentials'" % (ADMIN_USER, ARTIFACTS_DIR),
             GATEWAY_NODE, ADMIN_PWD)
         Machine.copy(os.path.join(ARTIFACTS_DIR, "config"), aws_home,
                      ADMIN_USER, ADMIN_PWD)
         Machine.copy(os.path.join(ARTIFACTS_DIR, "credentials"), aws_home,
                      ADMIN_USER, ADMIN_PWD)
Example #17
0
def getNameNodeURL(nameservice2=False):
    if Hadoop.isEncrypted():
        baseUrl = "https://%s" % (HDFS.getNamenodeHttpsAddress(nameservice2))
    else:
        baseUrl = "http://%s" % (HDFS.getNamenodeHttpAddress(nameservice2))
    logger.info("URL being returned is - %s" % baseUrl)
    return baseUrl
Example #18
0
def balancerModifyConfig(duReservedValue):
    """
    Create /tmp/hadoopConf in all nodes
    :param duReservedValue:
    :return:
    """
    Hadoop.modifyConfig(
        {
            'hdfs-site.xml': {
                'dfs.datanode.du.reserved': duReservedValue,
                'dfs.replication': 1,
                'dfs.namenode.heartbeat.recheck-interval': 5000,
                'dfs.namenode.stale.datanode.interval': 1000,
                'dfs.namenode.replication.interval': 1
            },
        }, {'services': ['all']})
Example #19
0
 def verifyLongRunningJob(cls):
     '''
     Validate long running background job after end of all component upgrade
     '''
     from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode
     logger.info("Stop the Flume agents before verification")
     open(cls._data_stop, 'a').close()
     time.sleep(60)
     agent1.stop()
     agent2.stop()
     time.sleep(60)
     logger.info("Verifying the sinked data from Flume agent")
     exit_code, stdout, stderr = Hadoop.runas(cls._hdfs_user,
                                              "dfs -cat %s/*" %
                                              cls._hdfs_test_dir,
                                              logoutput=False,
                                              stderr_as_stdout=False)
     if exit_code != 0:
         logger.error(
             "Following error during the HDFS cat while fetching Flume data: %s"
             % stderr)
     if not util.compareOutputToFileIgnoreDupsAndOrder(
             stdout, cls._data_file):
         UpgradePerNode.reportProgress(
             "[FAILED][FLUME][BGJob] Long running test for Flume failed while verifying data"
         )
     else:
         UpgradePerNode.reportProgress(
             "### [PASSED][FLUME][BGJob] Long running test validation for Flume passed ####"
         )
Example #20
0
    def createClusterEntities(cls, colo, desc, name):
        try:
            from beaver.component.falcon import Falcon
        except ImportError:
            ## Import fails when Falcon is not installed on this machine. Nothing to do
            return

        from beaver.component.hadoop import Hadoop, HDFS, YARN
        write_endpoint = Hadoop.getFSDefaultValue()
        webhdfs_scheme = 'webhdfs'
        if HDFS.isHttpsEnabled():
            webhdfs_scheme = 'swebhdfs'
        read_endpoint = '%s://%s:%s' % (
            webhdfs_scheme, write_endpoint.split('/')[2].split(':')[0],
            HDFS.getNNWebPort())
        execute_endpoint = YARN.getResourceManager()
        falconNode = Falcon.get_falcon_server()

        from beaver.component.oozie import Oozie
        oozieUrl = Oozie.getOozieUrl()
        entityText = "<?xml version=\"1.0\"?>" \
                     "<cluster colo=\"" + colo + "\" description=\"" + desc + "\" name=\"" + name + "\" " \
                     "xmlns=\"uri:falcon:cluster:0.1\"> " \
                        "<interfaces> " \
                            "<interface type=\"readonly\" endpoint=\""+read_endpoint+"\" version=\"0.20.2\"/> " \
                            "<interface type=\"write\" endpoint=\""+write_endpoint+"\" version=\"0.20.2\"/> " \
                            "<interface type=\"execute\" endpoint=\"" + execute_endpoint + "\" version=\"0.20.2\"/> " \
                            "<interface type=\"workflow\" endpoint=\"" + oozieUrl + "\" version=\"3.1\"/>" \
                            "<interface type=\"messaging\" endpoint=\"" \
                                "tcp://" + falconNode + ":61616?daemon=true\" version=\"5.1.6\"/>" \
                        "</interfaces>" \
                        "<locations>" \
                            "<location name=\"staging\" path=\"/apps/falcon/" + name + "/staging\" />" \
                            "<location name=\"temp\" path=\"/tmp\" />" \
                            "<location name=\"working\" path=\"/apps/falcon/" + name + "/working\" />" \
                        "</locations>" \
                        "<ACL owner=\"" + cls._job_user + "\" group=\"users\" permission=\"0755\"/>"
        if Hadoop.isSecure():
            realm = HDFS.getConfigValue(
                'dfs.namenode.kerberos.principal').split('@')[1]
            entityText += "<properties> <property name=\"dfs.namenode.kerberos.principal\" value=\"nn/_HOST@" + realm + "\"/> </properties>"
        entityText += "</cluster>"
        textFile = open(os.path.join(cls._local_workspace, name + ".xml"), "w")
        textFile.write("%s" % entityText)
        textFile.close()

        return
Example #21
0
def getLLAPDaemonPidsHosts():
    hosts = []
    llapdaemon_pids = []
    if not Machine.isHumboldt(): nodes = Hadoop.getAllNodes()
    else: nodes = HDFS.getDatanodes()
    for node in nodes:
        pids = Machine.getProcessListRemote(
            node, format="%U %p %P %a", filter="org.apache.hadoop.hive.llap.daemon.impl.LlapDaemon", logoutput=True
        )
        if pids:
            if Hadoop.isSecure():
                pid = Machine.getPidFromString(pids[0], Config.get('hive', 'HIVE_USER'))
            else:
                pid = Machine.getPidFromString(pids[0], Config.get('hadoop', 'YARN_USER'))
            llapdaemon_pids.append(pid)
            hosts.append(node)
    return llapdaemon_pids, hosts
def insertFileIntoHdfs(fileName):
    pathFileName = '/user/' + HADOOPQA_USER + '/' + fileName    
    if (not(HDFS.fileExists(pathFileName))):
        sourceFile = DATA_PATH + '/' + fileName
        destFile = '/user/' + HADOOPQA_USER + '/' + fileName
        putCmd = "dfs -put " + sourceFile + ' ' + destFile
        out = Hadoop.run(putCmd)
        return out
Example #23
0
def getAmbariURL():
    ambariHost = socket.getfqdn()
    if Hadoop.isEncrypted():
        baseUrl = "https://%s:8443" % (ambariHost)
    else:
        baseUrl = "http://%s:8080" % (ambariHost)
    logger.info("URL being returned is - %s" % baseUrl)
    return baseUrl
Example #24
0
    def setup_storm_hdfs_topology(cls, useStandaloneCmd):
        storm_version = Storm.getVersion(useStandaloneCmd=True)
        try:
            file_obj = open(HDFS_CONFIG_FILE, 'w')
            if Hadoop.isSecure():
                file_obj.write('hdfs.keytab.file: ' +
                               Machine.getHeadlessUserKeytab(
                                   user=HADOOPQA_USER) + '\n')
                file_obj.write('hdfs.kerberos.principal: ' +
                               Machine.get_user_principal(user=HADOOPQA_USER) +
                               '\n')
        finally:
            file_obj.close()

        HDFS.createDirectory("/tmp/mySeqTopology", HDFS_USER, "777", False)
        HDFS.createDirectory("/tmp/dest", HDFS_USER, "777", False)
        HDFS.createDirectory("/tmp/dest2", HDFS_USER, "777", False)
        HDFS.createDirectory("/tmp/foo", HDFS_USER, "777", False)
        HDFS.createDirectory("/tmp/trident", HDFS_USER, "777", False)
        HDFS.createDirectory("/tmp/trident-seq", HDFS_USER, "777", False)

        Machine.copy(JAVA_HDFS_SRC_DIR,
                     LOCAL_HDFS_WORK_DIR,
                     user=None,
                     passwd=None)
        if not Machine.isWindows():
            (exit_code, _) = Maven.run('package',
                                       cwd=LOCAL_HDFS_WORK_DIR,
                                       env={
                                           HADOOP_VERSION_MAVEN_PARAMETER:
                                           HADOOP_VERSION,
                                           STORM_VERSION_MAVEN_PARAMETER:
                                           storm_version,
                                           HADOOP_CONF_MAVEN_PARAMETER:
                                           HADOOP_CONF,
                                           HDFS_FILE_MAVEN_PARAMETER:
                                           HDFS_FILE,
                                           HADOOP_CORE_MAVEN_PARAMETER:
                                           HADOOP_CONF,
                                           CORE_FILE_MAVEN_PARAMETER:
                                           CORE_FILE,
                                           PUBLIC_REPO_MAVEN_PARAMETER:
                                           Maven.getPublicRepoUrl()
                                       })
        else:
            filepath = os.path.join(MOD_CONF_PATH, "core-site.xml")
            (exit_code, _) = Maven.run(
                'package -D%s=%s -D%s=%s -D%s=%s -D%s=%s -D%s=%s -D%s=%s -D%s=%s'
                % (HADOOP_VERSION_MAVEN_PARAMETER, HADOOP_VERSION,
                   STORM_VERSION_MAVEN_PARAMETER, storm_version,
                   HADOOP_CONF_MAVEN_PARAMETER, HADOOP_CONF,
                   HDFS_FILE_MAVEN_PARAMETER, HDFS_FILE,
                   HADOOP_CORE_MAVEN_PARAMETER, HADOOP_CONF,
                   CORE_FILE_MAVEN_PARAMETER, CORE_FILE,
                   PUBLIC_REPO_MAVEN_PARAMETER, Maven.getPublicRepoUrl()),
                cwd=LOCAL_HDFS_WORK_DIR)
        ruAssert("Storm", exit_code == 0,
                 "[StormHDFSSetup] maven package command failed")
Example #25
0
 def getAllNodes(cls):
     '''
     Get all nodes according to the platform used
     '''
     if Machine.isHumboldt():
         allNodes = util.getAllNodes()
     else:
         allNodes = Hadoop.getAllNodes()
     return allNodes
Example #26
0
 def getBaseUrl(self):
     from beaver.component.ambari import Ambari
     GRAFANA_HOST = Ambari.getHostsForComponent('METRICS_GRAFANA')[0]
     if Hadoop.isEncrypted() or Machine.isHumboldt():
         GRAFANA_URL = "https://%s:3000/dashboard/db/hbase-tuning" % (GRAFANA_HOST)
     else:
         GRAFANA_URL = "http://%s:3000/dashboard/db/hbase-tuning" % (GRAFANA_HOST)
     self.base_url = GRAFANA_URL
     return self.base_url
Example #27
0
 def background_job_setup(cls, runSmokeTestSetup=True, config=None):
     '''
     Setup for background long running job
     :param runSmokeTestSetup: Runs smoke test setup if set to true
     '''
     from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode
     UpgradePerNode.reportProgress(
         "[INFO][FLUME][BGJobSetup] Long running job setup for Flume component started"
     )
     from beaver.component.flume import Agent
     global agent1
     global agent2
     if not os.path.exists(cls._local_work_dir):
         os.mkdir(cls._local_work_dir)
     shutil.copy(cls._flume_datagen_src, cls._local_work_dir)
     agent1 = Agent(cls._local_work_dir)
     agent2 = Agent(cls._local_work_dir)
     for outdir in (cls._agent1_chkpt_dir, cls._agent1_data_dir,
                    cls._agent2_chkpt_dir, cls._agent2_data_dir):
         os.mkdir(outdir)
     logger.info("Preparing the Flume configs for long running test")
     propertyMap = {}
     namenode = Hadoop.getFSDefaultValue()
     propertyMap['agent2.sinks.hdfsSink.hdfs.path'] = "%s%s" % (
         namenode, cls._hdfs_test_dir)
     if Hadoop.isSecure():
         if Config.hasOption('machine', 'USER_REALM'):
             user_realm = Config.get('machine', 'USER_REALM', '')
         else:
             nnKerbPrincipal = HDFS.getNameNodePrincipal(defaultValue='')
             atloc = nnKerbPrincipal.find("@")
             if atloc != -1:
                 user_realm = nnKerbPrincipal[atloc:]
         if user_realm:
             propertyMap[
                 'agent2.sinks.hdfsSink.hdfs.kerberosPrincipal'] = cls._test_user + '@' + user_realm
         propertyMap[
             'agent2.sinks.hdfsSink.hdfs.kerberosKeytab'] = Machine.getHeadlessUserKeytab(
                 cls._test_user)
     util.writePropertiesToFile(
         os.path.join(cls._flume_test_conf, 'longrunning.properties'),
         cls._flume_test_src, propertyMap)
Example #28
0
def verifyOozieAppsAndJobsSucceeded(workflowIds,
                                    logPrefix,
                                    localDir,
                                    testMap,
                                    action_name='wc',
                                    checkJob=True):
    '''
  Verifies if all apps and jobs submitted/created via. Oozie have succeed all the validations.
  :param workflowIDs: List of workflow ids to verify.
  :param logPrefix: log prefix for YARN app logs.
  :param localDir: Path to local log dir.
  :return: Bool status indicating if validation succeeded.
  '''
    appIds = []
    jobIds = []
    dLog = {}
    appStatus = True
    jobStatus = True
    wprStatus = True

    # check the job and app status for each workflow we launched.
    if Hadoop.isHadoop2():
        # get all the app and job ids
        for workflowId in workflowIds:
            if action_name != 'None':
                stdout = Oozie.getJobInfo('%s@%s' % (workflowId, action_name),
                                          verbose=True,
                                          retry=True)
            else:
                stdout = Oozie.getJobInfo('%s' % (workflowId),
                                          verbose=True,
                                          retry=True)
            ids = Oozie.getJobAndAppIds(stdout)
            for id in ids:
                appIds.append(id['application'])
                jobIds.append(id['job'])
        # get the app and job status for all the jobs we found
        appStatus, appLog = YARN.checkAppsSucceeded(appIds,
                                                    logPrefix=logPrefix,
                                                    localDir=localDir)
        dLog.update(appLog)
        if checkJob:
            jobStatus, jobLog = YARN.checkJobsSucceeded(jobIds)
            dLog.update(jobLog)
        for key, value in dLog.items():
            logger.info("%s -> %s" % (key, value))

        wprStatus, d = verifyWorkPreservingRMRestart(jobIds, testMap)
        for k, v in d.items():
            logger.info("%s -> %s" % (k, v))

    logger.info("appStatus: %s jobStatus: %s wprStatus: %s" %
                (appStatus, jobStatus, wprStatus))
    return appStatus and jobStatus and wprStatus
Example #29
0
    def runOozieJobsCmdAs(cls,
                          user,
                          cmd,
                          cwd=None,
                          env=None,
                          logoutput=True,
                          retry=False,
                          num_of_retries=5,
                          wait=30,
                          oozie_server=None):
        if not env:
            env = {}
        if Hadoop.isSecure():
            if user is None: user = Config.getEnv('USER')
            kerbTicket = Machine.getKerberosTicket(user)
            env['KRB5CCNAME'] = kerbTicket
            user = None

        # if oozie server is not sent find it.
        logger.info("OOZIE SERVER:%s" % oozie_server)
        if not oozie_server:
            oozie_server = cls.getOozieUrl()
        logger.info("OOZIE SERVER:%s" % oozie_server)
        env['JAVA_HOME'] = Config.get("machine", "JAVA_HOME")
        if Machine.type() == 'Windows':
            paramsList = cmd.split()
            escapedCmd = ""
            for param in paramsList:
                if param[0] != '"' and param[-1] != '"':
                    escapedCmd = escapedCmd + "\"" + param + "\"" + " "
                else:
                    escapedCmd = escapedCmd + param + " "

            oozie_cmd = OOZIE_CMD + " jobs -oozie " + oozie_server + "  " + escapedCmd
        else:
            oozie_cmd = OOZIE_CMD + " jobs -oozie " + oozie_server + "  " + cmd
        exit_code, output = Machine.runas(user,
                                          oozie_cmd,
                                          cwd=cwd,
                                          env=env,
                                          logoutput=logoutput)
        count = 1
        # Handle retires if the user selects
        while retry and exit_code != 0 and count < num_of_retries:
            exit_code, output = Machine.runas(user,
                                              oozie_cmd,
                                              cwd=cwd,
                                              env=env,
                                              logoutput=logoutput)
            count += 1
            time.sleep(wait)
        return exit_code, output
Example #30
0
    def ensure_jns_have_new_txn(cls, nodes, last_tx_id):
        num_of_jns = len(nodes)
        actual_tx_ids = {}
        jns_updated = 0
        protocol = 'http'
        jn_port = '8480'
        if Hadoop.isEncrypted():
            protocol = 'https'
            jn_port = '8481'

        # time out of 3 mins
        time_out = 3 * 60
        # stop time for 10s
        step_time = 10

        itr = int(time_out / step_time)

        for i in range(itr):
            logger.info(
                '******************** Check if all Journal Nodes are updated Iteration %s or %s *************************'
                % (i + 1, itr))
            for node in nodes:
                # if all JNS are updated break
                if jns_updated == num_of_jns:
                    return

                try:
                    # if JN is already ahead skip it
                    if actual_tx_ids[node] and int(
                            actual_tx_ids[node]) >= last_tx_id:
                        continue
                except KeyError:
                    pass

                # other wise get the data and compare it
                url = '%s://%s:%s/jmx' % (protocol, node, jn_port)
                actual_tx_ids[node] = util.getJMXData(
                    url, 'Hadoop:service=JournalNode,name=Journal-',
                    'LastWrittenTxId')
                logger.info(
                    '******************** JN: %s LAST TX ID: %s *************************'
                    % (node, last_tx_id))
                if int(actual_tx_ids[node]) >= last_tx_id:
                    jns_updated += 1

            # if all JNS are updated break
            if jns_updated == num_of_jns:
                return

            time.sleep(step_time)

        ruAssert("HDFS", jns_updated == num_of_jns)
Example #31
0
    def share_lib_setup(cls, fs=None):
        # only need to do this on windows
        if not Machine.isWindows():
            # return exit code of 0 and empty stdout
            return 0, ''

        oozie_setup_cmd = os.path.join(OOZIE_HOME, 'bin', 'oozie-setup.cmd')
        oozie_sharelib_location = os.path.join(OOZIE_HOME, "share")
        if not fs:
            fs = Hadoop.getFSDefaultValue()
        cmd = 'sharelib create -fs %s -locallib %s' % (fs,
                                                       oozie_sharelib_location)
        return cls.runas(cls._oozie_user, cmd, base_cmd=oozie_setup_cmd)
Example #32
0
    def runLoadGenerator(cls, numOfNodes=1, elapsedTime=100):
        from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode
        flagFile = UpgradePerNode._HDFS_FLAG_FILE
        # get the jar again as we dont know what version we will be running this job with.
        HADOOP_TEST_JAR = cls.get_hadoop_test_jar()
        TEST_USER = Config.get('hadoop', 'HADOOPQA_USER')

        # load generator
        jobCmd = 'jar %s NNloadGenerator -Dmapred.job.queue.name=hdfs -mr %s %s -root %s -numOfThreads 5 -maxDelayBetweenOps 1000 -elapsedTime %s -flagFile %s' % (
            HADOOP_TEST_JAR, numOfNodes, cls._lgTestOutputDir,
            cls._lgTestDataDir, elapsedTime, flagFile)
        exit_code, stdout = Hadoop.run(jobCmd)
        ruAssert("HDFS", exit_code == 0, "[BGJobSetup] LoadGenerator failed")
Example #33
0
    def serverStop(cls, cwd=None, env=None, logoutput=True):
        cmd = SQOOP2_SERVER + "stop"  #Config.get('sqoop2', 'SQOOP2_CMD')
        osenv = {"JAVA_HOME": Config.get('machine', 'JAVA_HOME')}
        if Hadoop.isSecure():
            if user is None: user = Config.getEnv('USER')
            kerbTicket = Machine.getKerberosTicket(user)
            osenv['KRB5CCNAME'] = kerbTicket
            user = None

        if env:
            for key, value in env.items():
                osenv[key] = value
        return Machine.run(cmd, cwd=cwd, env=osenv, logoutput=logoutput)
def test_CleanUpOfFilesAfterFailedJob():
    testCaseDescription="testCleanUpOfFilesAfterFailedJob"
    testCaseId = "cleanup03"
    util.displayTestCaseMessage(testCaseDescription,testCaseId)
    fileCreated="FileCreatedByJob.log"
    out=insertFileIntoHdfs(CREATE_FILE)
    assert out[0] == 0
        
    logger.info("Try to get Job Tracker")
    JOBTRACKER = MAPRED.getJobTracker()
    assert JOBTRACKER != None
    
    logger.info( "Submitting a streaming job that will create a file ")
    localFilePath = getFullPathOfFile(CREATE_FILE_PATH_IN_LOCAL)
    
    hadoopStreamingCmdFormat = 'jar %s -files %s -input %s -output %s -mapper "python %s" -reducer NONE'
    jobJarHadoopStreamingCmd = hadoopStreamingCmdFormat % (HADOOP_STREAMING_JAR, localFilePath, CREATE_FILE_PATH_IN_HADOOP, OUT_PATH_IN_HADOOP, CREATE_FILE)
    logger.info(jobJarHadoopStreamingCmd)    
    out = Hadoop.runInBackground(jobJarHadoopStreamingCmd)    
    time.sleep(20)
    
    logger.info("Try to get job id.....")    
    for i in range(1, 5):
        jobId = MAPRED.getJobID()        
        if (validateJobId(jobId)):            
            break
        time.sleep(10)
    assert jobId.startswith('job_') == True
    logger.info(" Get JobId: " + jobId + " successfully")
    
    logger.info("Try to get Attempt ID....")   
    attemptId = MAPRED.getAttemptIdsForJobId(jobId)                
    assert attemptId.startswith("attempt_") == True        
    
    logger.info("Try to get Task Tracker...")    
    taskTrackersList = Hadoop.getTasktrackers()
    taskTracker = taskTrackersList[0].rstrip("\n")
    logger.info(" Task Tracker running the map task is " + taskTracker)
    time.sleep(20)
    
    logFileDirList = getLocalDirInfo(taskTracker);
    logger.info("Log file list: " + logFileDirList)
    logFileDirList = logFileDirList.split(',')
    isExistedTempFile = False
    for logFileDir in logFileDirList:        
        logger.info("Directory of log file: " + logFileDir)
        isExistedTempFile = checkJobCreatedTempFileInTT(logFileDir, HADOOPQA_USER, jobId, attemptId, fileCreated, taskTracker)
        if isExistedTempFile == True:            
            break
    assert isExistedTempFile == True
    # Now fail the job
    getAttemptIdsForJobIdAndStoreInFile(jobId)
    attemptIdCount=MAPRED.checkForNewAttemptIds(jobId)
    assert len(attemptIdCount) != 0        
    while len(attemptIdCount) != 0:
        logger.info(" Since there are  attempts ids  proceeding to kill them ")
        MAPRED.failAttempts(attemptIdCount)
        attemptIdCount=MAPRED.checkForNewAttemptIds(jobId)
        
    logger.info("Check job status")    
    isJobFailed=MAPRED.isJobFailed(jobId)
    if isJobFailed==False:
        logger.info( " The job could not be failed successfully and unable to proceed with the tests ")
    assert isJobFailed==True
    
    isExistedTempFile = checkJobCreatedTempFileInTT(logFileDir,HADOOPQA_USER,jobId,attemptId,fileCreated,taskTracker)
    if isExistedTempFile:
        logger.info(" The test case  to check the files cleared after killing of jobs failed ")
        logger.info(" The file created by the job still exists even after the job is successfully killed ")
    assert isExistedTempFile==False
def test_CleanUpOfFilesAfterJobCompletionForFilesWithSymLink():
    testCaseDescription="test_CleanUpOfFilesAfterJobCompletionForFilesWithSymLink"
    testCaseId = "cleanup04"
    util.displayTestCaseMessage(testCaseDescription,testCaseId)
    fileCreated = "mysymlink.txt"    
    
    out = insertFileIntoHdfs(CREATE_FILE_2)    
    assert out[0] == 0
    time.sleep(15)
    
    logger.info("Try to get Job Tracker")
    JOBTRACKER = MAPRED.getJobTracker()
    assert JOBTRACKER != None
    
    localFilePath = getFullPathOfFile(CREATE_FILE_2_PATH_IN_LOCAL)
    
    hadoopStreamingCmdFormat = 'jar %s -files %s -input %s -output %s -mapper "python %s" -reducer NONE'
    jobJarHadoopStreamingCmd = hadoopStreamingCmdFormat % (HADOOP_STREAMING_JAR, localFilePath, CREATE_FILE_2_PATH_IN_HADOOP, OUT_PATH_IN_HADOOP, CREATE_FILE_2)
    logger.info(jobJarHadoopStreamingCmd)    
    out = Hadoop.runInBackground(jobJarHadoopStreamingCmd)    
    time.sleep(15)
    
    logger.info("Try to get job id.....")    
    for i in range(1, 5):
        jobId = MAPRED.getJobID()        
        if (validateJobId(jobId)):            
            break
        time.sleep(10)
    assert jobId.startswith('job_') == True
    logger.info(" Get JobId: " + jobId + " successfully")
    
    logger.info("Try to get Attempt ID....")   
    attemptId = MAPRED.getAttemptIdsForJobId(jobId)                
    assert attemptId.startswith("attempt_") == True
    
    logger.info("Try to get Task Tracker...")    
    taskTrackersList = Hadoop.getTasktrackers()
    taskTracker = taskTrackersList[0].rstrip("\n")
    logger.info(" Task Tracker running the map task is " + taskTracker)
    time.sleep(40)
    
    logFileDirList = getLocalDirInfo(taskTracker);
    logger.info("Log file list: " + logFileDirList)
    logFileDirList = logFileDirList.split(',')
    isExistedTempFile = False
    for logFileDir in logFileDirList:        
        logger.info("Directory of log file: " + logFileDir)
        isExistedTempFile = checkJobCreatedTempFileInTT(logFileDir, HADOOPQA_USER, jobId, attemptId, fileCreated, taskTracker)
        if isExistedTempFile == True:            
            break
    assert isExistedTempFile == True
    
    logger.info("Check job is completed or not")
    for i in range(1, 10):
        isJobCompleted = MAPRED.checkForJobCompletion(jobId)
        if isJobCompleted == True:
            break
        time.sleep(20)
    assert isJobCompleted == True
    logger.info("Job is completed!")
    
    #  Now check for the file to be cleared off  after the job is complete
    isExistedTempFile = checkJobCreatedTempFileInTT(logFileDir,HADOOPQA_USER,jobId,attemptId,fileCreated,taskTracker)
    if isExistedTempFile == True:
        logger.info(" The test case  to check the files cleared after killing of jobs failed ")
        logger.info(" The file created by the job still exists even after the job is successfully killed ")
    assert isExistedTempFile==False