class _BaseSparkTask(Task): _conf__task_type_name = TaskType.spark # will get it from env spark_config = parameter( config_path=from_task_env("spark_config"))[SparkConfig] spark_engine = parameter(config_path=from_task_env("spark_engine"))[ SparkEngineConfig] # type: Union[EmrConfig] python_script = None main_class = None def application_args(self): """ 'Arguments for the application being submitted' :return: list """ return [] def _get_spark_ctrl(self): # type: ()-> SparkCtrl return self.spark_engine.get_spark_ctrl(self.current_task_run) def _task_banner(self, banner, verbosity): b = banner b.new_section() try: spark_command_line = subprocess.list2cmdline( list_of_strings(self.application_args())) b.column("SPARK CMD LINE", spark_command_line) except Exception: logger.exception("Failed to get spark command line from %s" % self)
class _BeamTask(Task): beam_config = parameter(config_path=from_task_env("beam_config"))[ApacheBeamConfig] beam_engine = parameter(config_path=from_task_env("beam_engine"))[ EngineConfig ] # type: DataflowConfig def get_beam_task_options(self): """ 'Arguments for the application being submitted' :return: list """ return {} def _get_job_ctrl(self): # type:(...)->ApacheBeamJobCtrl return self.beam_engine.get_beam_ctrl(self.current_task_run)
class DockerRunTask(Task): # _conf__task_family = "docker_cmd" image = parameter( description="Docker image from which to create the container." "If image tag is omitted, 'latest' will be used.")[str] command = parameter( description="Command to be run in the container. (templated)")[str] docker_engine = parameter( config_path=from_task_env("docker_engine"))[ContainerEngineConfig] docker_ctrl = None # type: DockerRunCtrl def _task_submit(self): if hasattr(self.ctrl, "airflow_op"): airflow_context = self.current_task_run.airflow_context self.command = self.ctrl.airflow_op.render_template( self.command, airflow_context) self.log_metric("docker command", self.command) self.docker_ctrl = self.docker_engine.get_docker_ctrl( self.current_task_run) # type: DockerRunCtrl self.docker_ctrl.docker_run() def on_kill(self): if self.docker_ctrl is not None: logger.error("Killing submitted docker for %s", self.task_id) return self.docker_ctrl.on_kill()
class _BaseSparkTask(Task): _conf__task_type_name = TaskType.spark # will get it from env spark_config = parameter( config_path=from_task_env("spark_config"))[SparkConfig] spark_engine = parameter(config_path=from_task_env("spark_engine"))[ SparkEngineConfig] # type: Union[EmrConfig] python_script = None main_class = None spark_conf_extension = parameter( default={}, description="This is an extension for SparkConfig.conf dict, " "every config added to this dict will be merged to spark_config.conf", significant=False, )[dict] spark_resources = parameter.c(default=None, system=True)[Dict[str, FileTarget]] def band(self): result = super(_BaseSparkTask, self).band() if self.spark_config.include_user_project: fat_wheel_task = fat_wheel_building_task( task_version=try_get_databand_context().current_context_uid, task_target_date="today", ) self.spark_resources = {"user_project": fat_wheel_task} if self.spark_engine.disable_task_band: logger.debug("Task band is disabled due to disable_task_band flag") self.task_band = None return result def get_py_files(self): py_files = self.spark_config.py_files.copy() if self.spark_resources and "user_project" in self.spark_resources: project_files = self.spark_resources["user_project"].load(str) py_files.append(project_files) return py_files def application_args(self): """ 'Arguments for the application being submitted' :return: list """ return [] def _get_spark_ctrl(self): # type: ()-> SparkCtrl return self.spark_engine.get_spark_ctrl(self.current_task_run) def _task_banner(self, banner, verbosity): b = banner b.new_section() try: spark_command_line = subprocess.list2cmdline( list_of_strings(self.application_args())) b.column("SPARK CMD LINE", spark_command_line) except Exception: logger.exception("Failed to get spark command line from %s" % self) def get_root(self): return self.spark_engine.root or super(_BaseSparkTask, self).get_root() def _initialize(self): super(_BaseSparkTask, self)._initialize() if self.spark_conf_extension: # adds the last layer for SparkConfig.conf self.spark_config.conf.update(self.spark_conf_extension)