def spark_submit(self, job_params: SparkJobParameters, ui_port: int = None) -> subprocess.Popen: submission_cmd = [ self.spark_submit_script_path, "--master", self.master_url, "--name", job_params.get_name(), ] if job_params.get_class_name(): submission_cmd.extend(["--class", job_params.get_class_name()]) if ui_port: submission_cmd.extend(["--conf", f"spark.ui.port={ui_port}"]) # Workaround for https://github.com/apache/spark/pull/26552 # Fix running spark job with bigquery connector (w/ shadowing) on JDK 9+ submission_cmd.extend([ "--conf", "spark.executor.extraJavaOptions=" "-Dcom.google.cloud.spark.bigquery.repackaged.io.netty.tryReflectionSetAccessible=true -Duser.timezone=GMT", "--conf", "spark.driver.extraJavaOptions=" "-Dcom.google.cloud.spark.bigquery.repackaged.io.netty.tryReflectionSetAccessible=true -Duser.timezone=GMT", "--conf", "spark.sql.session.timeZone=UTC", # ignore local timezone "--packages", f"com.google.cloud.spark:spark-bigquery-with-dependencies_{self.BQ_CONNECTOR_VERSION}", "--jars", "https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop2-latest.jar," "https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/2.7.3/hadoop-aws-2.7.3.jar," "https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk/1.7.4/aws-java-sdk-1.7.4.jar", "--conf", "spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem", "--conf", "spark.hadoop.fs.gs.impl=com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem", ]) if self.additional_options is not None: for option, value in self.additional_options.items(): submission_cmd.extend(["--conf", f'"{option}"="{value}"']) submission_cmd.append(job_params.get_main_file_path()) submission_cmd.extend(job_params.get_arguments()) return subprocess.Popen(submission_cmd)
def spark_submit(self, job_params: SparkJobParameters) -> subprocess.Popen: submission_cmd = [ self.spark_submit_script_path, "--master", self.master_url, "--name", job_params.get_name(), ] if job_params.get_class_name(): submission_cmd.extend(["--class", job_params.get_class_name()]) submission_cmd.append(job_params.get_main_file_path()) submission_cmd.extend(job_params.get_arguments()) return subprocess.Popen(submission_cmd)
def spark_submit(self, job_params: SparkJobParameters, ui_port: int = None) -> subprocess.Popen: submission_cmd = [ self.spark_submit_script_path, "--master", self.master_url, "--name", job_params.get_name(), ] if job_params.get_class_name(): submission_cmd.extend(["--class", job_params.get_class_name()]) if ui_port: submission_cmd.extend(["--conf", f"spark.ui.port={ui_port}"]) if job_params.get_extra_options(): submission_cmd.extend(job_params.get_extra_options().split(" ")) submission_cmd.append(job_params.get_main_file_path()) submission_cmd.extend(job_params.get_arguments()) return subprocess.Popen(submission_cmd)