def validate_HDFS_stream_job(cls, appId, mode, patterns, expected_count, clientfile=None): ''' count the occurance of word in the yarn logs. -> check clientfile for yarn-client mode -> check yarn logs for yarn-cluster mode appId : application Id mode : mode of execution patterns : list of words to check in log expected_count : the expected number of occurence for each word in patterns clientfile : jobclient output for app ''' if mode == "yarn-client": file_to_read = clientfile else: file_to_read = Spark.createTmpClientFile(appId + ".log") YARN.getLogsApplicationID(appId, appOwner=None, nodeAddress=None, containerId=None, logoutput=False, grepFilter=None, pipeToFileOutput=file_to_read, config=None) count = 0 word_count = {} # initialize word_count dictonary for p in patterns: word_count[p] = 0 with open(file_to_read) as f: for line in f: words = line.split() for word in words: if word in word_count.keys(): word_count[word] = word_count[word] + 1 logger.info(word_count) for key, value in word_count.iteritems(): assert value >= expected_count, "%s wordcount is %s. expected_count is %s" % ( key, value, expected_count)
def collect_application_log_locally(cls, appId, user): ''' Collects application log and save it in Local Dir with <appId>.log filename :param appId: Application Id :param user: Application Id owner ''' try: from beaver.component.hadoop import YARN filename = os.path.join(cls.LOCAL_TMP_APP_STORAGE, appId + ".log") if not Machine.pathExists(None, None, filename, None): logger.info("Storing syslog of %s in %s", appId, filename) YARN.getLogsApplicationID(appId, user, None, None, False, None, filename) else: logger.info("%s already present at %s", appId, filename) except Exception: logger.error( "Exception occured during collect_application_log_locally() call" ) logger.error(traceback.format_exc())