def run(self, messages=None, host="localhost", port="9999"): # pylint: disable=unused-argument if not messages: messages = ["hello world"] for message in self.messages: Spark.startNetcatServerinBackground(message=message) time.sleep(4) Spark.stopNetcatServer(message=message)
def start_LongRunning_Federation_HDFS_stream_job( cls, inputDir, outputDir, num_executor, mode="yarn-client", inBackground=True, clientfile=None, pythonFile="federation_hdfs_wordcount.py", srcDir=None, keytab=None, principal=None): """ Starts Spark-HDFS Streaming application using python file :param inputDir: :param outputDir: :param num_executor: :param mode: :param inBackground: :param clientfile: :param pythonFile: Python file which need to be run as spark streaming application :param srcDir: Path of the Python file :return: (application ID, Local client log) """ if clientfile == None: Local_clientlog = Spark.createTmpClientFile(pythonFile + "_" + mode) else: Local_clientlog = Spark.createTmpClientFile(clientfile) if pythonFile == "federation_hdfs_wordcount.py": srcDir = os.path.join(Config.getEnv("WORKSPACE"), "tests", "spark", "examples", "streaming") arg = " %s %s 2>&1 | tee %s" % (inputDir, outputDir, Local_clientlog) Spark.submitSparkPyApplication(pythonFile, mode, arg, num_executor=num_executor, inBackground=inBackground, srcDir=srcDir, timeout=120, clientfile=clientfile, conf=None, keytab=keytab, principal=principal) f = open(Local_clientlog, "r") stdout = f.read() f.close() appId = YARN.getApplicationIDFromStdout(stdout) return appId, Local_clientlog
def run_smoke_test(cls, config=None): ''' Run smoke test for spark ''' logger.info("config = %s", config) from beaver.component.spark import Spark from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode UpgradePerNode.reportProgress( "[INFO][Spark][Smoke] Smoke test for Spark started ") exit_code, _ = Spark.submitSparkApplication( "org.apache.spark.examples.SparkPi", "yarn-cluster", "3") if exit_code != 0: UpgradePerNode.reportProgress( "[FAILED][Spark][Smoke] SparkPi Smoke Test Failed in Yarn-cluster mode" ) return exit_code, stdout2 = Spark.submitSparkApplication( "org.apache.spark.examples.SparkPi", "yarn-client", "3") if exit_code != 0: UpgradePerNode.reportProgress( "[FAILED][Spark][Smoke] SparkPi Smoke Test Failed in Yarn-client mode" ) return if Machine.isWindows(): appName_pi = "SparkPi" else: appName_pi = "Spark Pi" HADOOP_QA = Config.get('hadoop', 'HADOOPQA_USER') appId = YARN.getApplicationIDFromStdout(stdout2).strip() logger.info( "Validate http://<host>:<port>/ws/v1/timeline/spark_event_v01/<appId>" ) Spark.getSparkATSAppUrl(appId) time.sleep(30) # Spark-ats check. We will enable it once Ambari enables Spark-ATS by default #cls.validate_ApplicationEntry(appId, appName_pi, HADOOP_QA, mode="yarn-client", url=url) Spark.hitSparkURL() time.sleep(50) result_HS_completeApp = Spark.validateSparkHSCompletedApps( appId, appName_pi, HADOOP_QA) if not result_HS_completeApp: UpgradePerNode.reportProgress( "[FAILED][Spark][Smoke] SparkPi Spark HS complete App Validation failed" ) return result_HS_Jobs = Spark.validateSparkHSJobs(appId, "1/1", "3/3") if not result_HS_Jobs: UpgradePerNode.reportProgress( "[FAILED][Spark][Smoke] SparkPi Spark HS Job page validation failed" ) return
def Spark_getSparkLogDir(cls, logoutput=True): try: from beaver.component.spark import Spark return Spark.getSparkLogDir() except Exception as e: if logoutput: logger.error( "Exception occured during Spark_getSparkLogDir() call: %s", str(e)) return None
def start_LongRunning_HDFS_stream_job(cls, inputDir, num_executor, mode="yarn-client", inBackground=True, clientfile=None): ''' Start Spark-HDFS Streaming application ''' className = "org.apache.spark.examples.streaming.HdfsWordCount" if mode == "yarn-client" and not HDFS.isASV(): jars = Spark.getLzoJar() else: jars = None if clientfile == None: Local_clientlog = Spark.createTmpClientFile(className + "_" + mode) else: Local_clientlog = Spark.createTmpClientFile(clientfile) arg = " %s 2>&1 | tee %s" % (inputDir, Local_clientlog) if Hadoop.isSecure(): keytab = Machine.getHeadlessUserKeytab( Config.get('hadoop', 'HADOOPQA_USER')) principal = Machine.get_user_principal( Config.get('hadoop', 'HADOOPQA_USER')) else: keytab = None principal = None Spark.submitSparkApplication(className, mode, arg, jars=jars, num_executor=num_executor, inBackground=inBackground, timeout=120, keytab=keytab, principal=principal) f = open(Local_clientlog, "r") stdout = f.read() f.close() appId = YARN.getApplicationIDFromStdout(stdout) return appId, Local_clientlog
def getdnjars(cls): # Workaround BUG-58287 org.datanucleus.api.jdo.JDOPersistenceManagerFactory spark_lib_dir = os.path.join(Spark.getSparkHome(), "lib") dn_jars = util.findMatchingFiles(spark_lib_dir, "datanucleus*.jar") jars = '' for jar in dn_jars: jars = jar + "," + jars # remove the last "," in the list jars = jars[:-1] return jars
def __init__(self, is_proxy=False): self.is_proxy = is_proxy self.ambari_connector = ZeppelinAmbariAPIUtil() self.driver = SparkUIClientSession.__instantiate_webdriver() assert self.driver, "Could not initialize selenium webdriver" if self.is_proxy: self.shs_proxy_url = self.get_shs_proxy_url() assert self.shs_proxy_url, "Failed to find SHS knox proxy URL" self.shs_direct_url = Spark.getSparkHistoryServerUrl() assert self.shs_direct_url, "Failed to find SHS direct URL" self.ambari_url = self.get_ambari_url() assert self.ambari_url, "Failed to find ambari web URL"
def validate_HDFS_stream_job(cls, appId, mode, patterns, expected_count, clientfile=None): ''' count the occurance of word in the yarn logs. -> check clientfile for yarn-client mode -> check yarn logs for yarn-cluster mode appId : application Id mode : mode of execution patterns : list of words to check in log expected_count : the expected number of occurence for each word in patterns clientfile : jobclient output for app ''' if mode == "yarn-client": file_to_read = clientfile else: file_to_read = Spark.createTmpClientFile(appId + ".log") YARN.getLogsApplicationID(appId, appOwner=None, nodeAddress=None, containerId=None, logoutput=False, grepFilter=None, pipeToFileOutput=file_to_read, config=None) count = 0 word_count = {} # initialize word_count dictonary for p in patterns: word_count[p] = 0 with open(file_to_read) as f: for line in f: words = line.split() for word in words: if word in word_count.keys(): word_count[word] = word_count[word] + 1 logger.info(word_count) for key, value in word_count.iteritems(): assert value >= expected_count, "%s wordcount is %s. expected_count is %s" % ( key, value, expected_count)
def validate_ApplicationEntry(cls, appId, appName, appUser, mode="yarn-client", url=None): ''' Validate Application entry :param entities: Its output from getCorrectApplicationJsonData :param appId: Application Id :param appName: Application name :param appUser: Application user :return: ''' from beaver.component.spark import Spark if not url: entities = Spark.getCorrectApplicationJsonData(appId) else: entities = Spark.getCorrectApplicationJsonData( appId, url, gatherAppSpecificJson=False) logger.info("***** entities *****") logger.info(entities) logger.info("********************") if mode == "yarn-cluster": ruAssert( "Spark", entities["entity"] == YARN.createAttemptIdFromAppId( appId, "1"), "[Smoke] attemptid entity not found in ATS") else: ruAssert("Spark", entities["entity"] == appId, "[Smoke] appid entity not found in ATS") ruAssert("Spark", entities["domain"] == "DEFAULT", "[Smoke] domain is not default") ruAssert("Spark", entities["entitytype"] == "spark_event_v01", "[Smoke] entitytype is not spark_event_v01") ruAssert( "Spark", entities["primaryfilters"]["endApp"] == [ 'SparkListenerApplicationEnd' ], "[Smoke] endapp event missing from ats") ruAssert( "Spark", entities["primaryfilters"]["startApp"] == [ 'SparkListenerApplicationStart' ], "[Smoke] startapp event missing from ats") if not Machine.isLinux() and appName == "Spark Pi": ruAssert("Spark", entities["otherinfo"]["appName"] == "SparkPi", "[Smoke] otherinfo -> appname is missing from ats") else: ruAssert("Spark", entities["otherinfo"]["appName"] == appName, "[Smoke] otherinfo -> appname is missing from ats") ruAssert("Spark", entities["otherinfo"]["appUser"] == appUser, "[Smoke] otherinfo -> appuser is missing from ats") ruAssert( "Spark", Spark.matchparamater(entities["otherinfo"]["startTime"], "[0-9]{13}"), "[Smoke] otherinfo -> starttime is missing from ats") ruAssert( "Spark", Spark.matchparamater(entities["otherinfo"]["endTime"], "[0-9]{13}"), "[Smoke] otherinfo -> endtime is missing from ats")