def validate_apps(self, local_dir_name="small_rw_jobs"): # pylint: disable=unused-argument ''' Validate small apps passed :param local_dir_name: :return: ''' local_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), self.local_dir_name) appIds = [] for root, _dirs, filenames in os.walk(local_dir): for f in filenames: logfile = open(os.path.join(root, f), 'r') stdout = logfile.read() appId = YARN.getApplicationIDFromStdout(stdout, logoutput=False) appIds.append(appId) # Sleep for 30 seconds before checking App status time.sleep(30) status, d = YARN.checkAppsSucceeded(appIds, logPrefix=None, useWS=True, localDir=None) for app, status in d.items(): if status != "SUCCEEDED": appInfo = YARN.getApplicationInfo(app) logger.info(appInfo) if appInfo: assert appInfo[ 'state'] == 'ACCEPTED', "app is neither in ACCEPTED or SUCCEEDED State"
def run_smoke_test(cls, config=None): ''' Run smoke test for spark ''' logger.info("config = %s", config) from beaver.component.spark import Spark from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode UpgradePerNode.reportProgress( "[INFO][Spark][Smoke] Smoke test for Spark started ") exit_code, _ = Spark.submitSparkApplication( "org.apache.spark.examples.SparkPi", "yarn-cluster", "3") if exit_code != 0: UpgradePerNode.reportProgress( "[FAILED][Spark][Smoke] SparkPi Smoke Test Failed in Yarn-cluster mode" ) return exit_code, stdout2 = Spark.submitSparkApplication( "org.apache.spark.examples.SparkPi", "yarn-client", "3") if exit_code != 0: UpgradePerNode.reportProgress( "[FAILED][Spark][Smoke] SparkPi Smoke Test Failed in Yarn-client mode" ) return if Machine.isWindows(): appName_pi = "SparkPi" else: appName_pi = "Spark Pi" HADOOP_QA = Config.get('hadoop', 'HADOOPQA_USER') appId = YARN.getApplicationIDFromStdout(stdout2).strip() logger.info( "Validate http://<host>:<port>/ws/v1/timeline/spark_event_v01/<appId>" ) Spark.getSparkATSAppUrl(appId) time.sleep(30) # Spark-ats check. We will enable it once Ambari enables Spark-ATS by default #cls.validate_ApplicationEntry(appId, appName_pi, HADOOP_QA, mode="yarn-client", url=url) Spark.hitSparkURL() time.sleep(50) result_HS_completeApp = Spark.validateSparkHSCompletedApps( appId, appName_pi, HADOOP_QA) if not result_HS_completeApp: UpgradePerNode.reportProgress( "[FAILED][Spark][Smoke] SparkPi Spark HS complete App Validation failed" ) return result_HS_Jobs = Spark.validateSparkHSJobs(appId, "1/1", "3/3") if not result_HS_Jobs: UpgradePerNode.reportProgress( "[FAILED][Spark][Smoke] SparkPi Spark HS Job page validation failed" ) return
def start_LongRunning_Federation_HDFS_stream_job( cls, inputDir, outputDir, num_executor, mode="yarn-client", inBackground=True, clientfile=None, pythonFile="federation_hdfs_wordcount.py", srcDir=None, keytab=None, principal=None): """ Starts Spark-HDFS Streaming application using python file :param inputDir: :param outputDir: :param num_executor: :param mode: :param inBackground: :param clientfile: :param pythonFile: Python file which need to be run as spark streaming application :param srcDir: Path of the Python file :return: (application ID, Local client log) """ if clientfile == None: Local_clientlog = Spark.createTmpClientFile(pythonFile + "_" + mode) else: Local_clientlog = Spark.createTmpClientFile(clientfile) if pythonFile == "federation_hdfs_wordcount.py": srcDir = os.path.join(Config.getEnv("WORKSPACE"), "tests", "spark", "examples", "streaming") arg = " %s %s 2>&1 | tee %s" % (inputDir, outputDir, Local_clientlog) Spark.submitSparkPyApplication(pythonFile, mode, arg, num_executor=num_executor, inBackground=inBackground, srcDir=srcDir, timeout=120, clientfile=clientfile, conf=None, keytab=keytab, principal=principal) f = open(Local_clientlog, "r") stdout = f.read() f.close() appId = YARN.getApplicationIDFromStdout(stdout) return appId, Local_clientlog
def start_LongRunning_HDFS_stream_job(cls, inputDir, num_executor, mode="yarn-client", inBackground=True, clientfile=None): ''' Start Spark-HDFS Streaming application ''' className = "org.apache.spark.examples.streaming.HdfsWordCount" if mode == "yarn-client" and not HDFS.isASV(): jars = Spark.getLzoJar() else: jars = None if clientfile == None: Local_clientlog = Spark.createTmpClientFile(className + "_" + mode) else: Local_clientlog = Spark.createTmpClientFile(clientfile) arg = " %s 2>&1 | tee %s" % (inputDir, Local_clientlog) if Hadoop.isSecure(): keytab = Machine.getHeadlessUserKeytab( Config.get('hadoop', 'HADOOPQA_USER')) principal = Machine.get_user_principal( Config.get('hadoop', 'HADOOPQA_USER')) else: keytab = None principal = None Spark.submitSparkApplication(className, mode, arg, jars=jars, num_executor=num_executor, inBackground=inBackground, timeout=120, keytab=keytab, principal=principal) f = open(Local_clientlog, "r") stdout = f.read() f.close() appId = YARN.getApplicationIDFromStdout(stdout) return appId, Local_clientlog