Exemple #1
0
 def validate_apps(self, local_dir_name="small_rw_jobs"):  # pylint: disable=unused-argument
     '''
     Validate small apps passed
     :param local_dir_name:
     :return:
     '''
     local_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'),
                              self.local_dir_name)
     appIds = []
     for root, _dirs, filenames in os.walk(local_dir):
         for f in filenames:
             logfile = open(os.path.join(root, f), 'r')
             stdout = logfile.read()
             appId = YARN.getApplicationIDFromStdout(stdout,
                                                     logoutput=False)
             appIds.append(appId)
     # Sleep for 30 seconds before checking App status
     time.sleep(30)
     status, d = YARN.checkAppsSucceeded(appIds,
                                         logPrefix=None,
                                         useWS=True,
                                         localDir=None)
     for app, status in d.items():
         if status != "SUCCEEDED":
             appInfo = YARN.getApplicationInfo(app)
             logger.info(appInfo)
             if appInfo:
                 assert appInfo[
                     'state'] == 'ACCEPTED', "app is neither in ACCEPTED or SUCCEEDED State"
Exemple #2
0
    def run_smoke_test(cls, config=None):
        '''
        Run smoke test for spark
        '''
        logger.info("config = %s", config)
        from beaver.component.spark import Spark
        from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode
        UpgradePerNode.reportProgress(
            "[INFO][Spark][Smoke] Smoke test for Spark started ")
        exit_code, _ = Spark.submitSparkApplication(
            "org.apache.spark.examples.SparkPi", "yarn-cluster", "3")
        if exit_code != 0:
            UpgradePerNode.reportProgress(
                "[FAILED][Spark][Smoke] SparkPi Smoke Test Failed in Yarn-cluster mode"
            )
            return

        exit_code, stdout2 = Spark.submitSparkApplication(
            "org.apache.spark.examples.SparkPi", "yarn-client", "3")
        if exit_code != 0:
            UpgradePerNode.reportProgress(
                "[FAILED][Spark][Smoke] SparkPi Smoke Test Failed in Yarn-client mode"
            )
            return

        if Machine.isWindows():
            appName_pi = "SparkPi"
        else:
            appName_pi = "Spark Pi"
        HADOOP_QA = Config.get('hadoop', 'HADOOPQA_USER')
        appId = YARN.getApplicationIDFromStdout(stdout2).strip()
        logger.info(
            "Validate http://<host>:<port>/ws/v1/timeline/spark_event_v01/<appId>"
        )
        Spark.getSparkATSAppUrl(appId)
        time.sleep(30)
        # Spark-ats check. We will enable it once Ambari enables Spark-ATS by default
        #cls.validate_ApplicationEntry(appId, appName_pi, HADOOP_QA, mode="yarn-client", url=url)
        Spark.hitSparkURL()
        time.sleep(50)
        result_HS_completeApp = Spark.validateSparkHSCompletedApps(
            appId, appName_pi, HADOOP_QA)
        if not result_HS_completeApp:
            UpgradePerNode.reportProgress(
                "[FAILED][Spark][Smoke] SparkPi Spark HS complete App Validation failed"
            )
            return
        result_HS_Jobs = Spark.validateSparkHSJobs(appId, "1/1", "3/3")
        if not result_HS_Jobs:
            UpgradePerNode.reportProgress(
                "[FAILED][Spark][Smoke] SparkPi Spark HS Job page validation failed"
            )
            return
Exemple #3
0
    def start_LongRunning_Federation_HDFS_stream_job(
            cls,
            inputDir,
            outputDir,
            num_executor,
            mode="yarn-client",
            inBackground=True,
            clientfile=None,
            pythonFile="federation_hdfs_wordcount.py",
            srcDir=None,
            keytab=None,
            principal=None):
        """
          Starts Spark-HDFS Streaming application using python file
          :param inputDir:
          :param outputDir:
          :param num_executor:
          :param mode:
          :param inBackground:
          :param clientfile:
          :param pythonFile: Python file which need to be run as spark streaming application
          :param srcDir: Path of the Python file
          :return: (application ID, Local client log)
          """
        if clientfile == None:
            Local_clientlog = Spark.createTmpClientFile(pythonFile + "_" +
                                                        mode)
        else:
            Local_clientlog = Spark.createTmpClientFile(clientfile)

        if pythonFile == "federation_hdfs_wordcount.py":
            srcDir = os.path.join(Config.getEnv("WORKSPACE"), "tests", "spark",
                                  "examples", "streaming")

        arg = " %s %s 2>&1 | tee %s" % (inputDir, outputDir, Local_clientlog)

        Spark.submitSparkPyApplication(pythonFile,
                                       mode,
                                       arg,
                                       num_executor=num_executor,
                                       inBackground=inBackground,
                                       srcDir=srcDir,
                                       timeout=120,
                                       clientfile=clientfile,
                                       conf=None,
                                       keytab=keytab,
                                       principal=principal)

        f = open(Local_clientlog, "r")
        stdout = f.read()
        f.close()
        appId = YARN.getApplicationIDFromStdout(stdout)
        return appId, Local_clientlog
Exemple #4
0
    def start_LongRunning_HDFS_stream_job(cls,
                                          inputDir,
                                          num_executor,
                                          mode="yarn-client",
                                          inBackground=True,
                                          clientfile=None):
        '''
          Start Spark-HDFS Streaming application
          '''
        className = "org.apache.spark.examples.streaming.HdfsWordCount"
        if mode == "yarn-client" and not HDFS.isASV():
            jars = Spark.getLzoJar()
        else:
            jars = None
        if clientfile == None:
            Local_clientlog = Spark.createTmpClientFile(className + "_" + mode)
        else:
            Local_clientlog = Spark.createTmpClientFile(clientfile)
        arg = " %s 2>&1 | tee %s" % (inputDir, Local_clientlog)
        if Hadoop.isSecure():
            keytab = Machine.getHeadlessUserKeytab(
                Config.get('hadoop', 'HADOOPQA_USER'))
            principal = Machine.get_user_principal(
                Config.get('hadoop', 'HADOOPQA_USER'))
        else:
            keytab = None
            principal = None

        Spark.submitSparkApplication(className,
                                     mode,
                                     arg,
                                     jars=jars,
                                     num_executor=num_executor,
                                     inBackground=inBackground,
                                     timeout=120,
                                     keytab=keytab,
                                     principal=principal)
        f = open(Local_clientlog, "r")
        stdout = f.read()
        f.close()
        appId = YARN.getApplicationIDFromStdout(stdout)
        return appId, Local_clientlog