Esempio n. 1
0
    def validate_HDFS_stream_job(cls,
                                 appId,
                                 mode,
                                 patterns,
                                 expected_count,
                                 clientfile=None):
        '''
          count the occurance of word in the yarn logs.
            -> check clientfile for yarn-client mode
            -> check yarn logs for yarn-cluster mode

          appId : application Id
          mode : mode of execution 
          patterns : list of words to check in log
          expected_count : the expected number of occurence for each word in patterns
          clientfile : jobclient output for app
          '''
        if mode == "yarn-client":
            file_to_read = clientfile
        else:
            file_to_read = Spark.createTmpClientFile(appId + ".log")
            YARN.getLogsApplicationID(appId,
                                      appOwner=None,
                                      nodeAddress=None,
                                      containerId=None,
                                      logoutput=False,
                                      grepFilter=None,
                                      pipeToFileOutput=file_to_read,
                                      config=None)

        count = 0
        word_count = {}
        # initialize word_count dictonary
        for p in patterns:
            word_count[p] = 0
        with open(file_to_read) as f:
            for line in f:
                words = line.split()
                for word in words:
                    if word in word_count.keys():
                        word_count[word] = word_count[word] + 1

        logger.info(word_count)
        for key, value in word_count.iteritems():
            assert value >= expected_count, "%s wordcount is %s. expected_count is %s" % (
                key, value, expected_count)
Esempio n. 2
0
 def collect_application_log_locally(cls, appId, user):
     '''
     Collects application log and save it in Local Dir with <appId>.log filename
     :param appId: Application Id
     :param user: Application Id owner
     '''
     try:
         from beaver.component.hadoop import YARN
         filename = os.path.join(cls.LOCAL_TMP_APP_STORAGE, appId + ".log")
         if not Machine.pathExists(None, None, filename, None):
             logger.info("Storing syslog of %s in %s", appId, filename)
             YARN.getLogsApplicationID(appId, user, None, None, False, None,
                                       filename)
         else:
             logger.info("%s already present at %s", appId, filename)
     except Exception:
         logger.error(
             "Exception occured during collect_application_log_locally() call"
         )
         logger.error(traceback.format_exc())