def execute(self): check.inst(self.executor_config, MultiprocessExecutorConfig) pipeline_def = self.executor_config.load_pipeline(self.pipeline_run) instance = DagsterInstance.from_ref(self.instance_ref) start_termination_thread(self.term_event) execution_plan = create_execution_plan( pipeline_def, self.environment_dict, self.pipeline_run).build_subset_plan([self.step_key]) yield instance.report_engine_event( MultiprocessEngine, 'Executing step {} in subprocess'.format(self.step_key), self.pipeline_run, EngineEventData( [ EventMetadataEntry.text(str(os.getpid()), 'pid'), EventMetadataEntry.text(self.step_key, 'step_key'), ], marker_end=DELEGATE_MARKER, ), self.step_key, ) for step_event in execute_plan_iterator( execution_plan, self.pipeline_run, environment_dict=self.environment_dict, retries=self.executor_config.retries.for_inner_plan(), instance=instance, ): yield step_event
def execute(self): pipeline = self.recon_pipeline with DagsterInstance.from_ref(self.instance_ref) as instance: start_termination_thread(self.term_event) execution_plan = create_execution_plan( pipeline=pipeline, run_config=self.run_config, mode=self.pipeline_run.mode, step_keys_to_execute=self.pipeline_run.step_keys_to_execute, ).build_subset_plan([self.step_key]) yield instance.report_engine_event( "Executing step {} in subprocess".format(self.step_key), self.pipeline_run, EngineEventData( [ EventMetadataEntry.text(str(os.getpid()), "pid"), EventMetadataEntry.text(self.step_key, "step_key"), ], marker_end=DELEGATE_MARKER, ), MultiprocessExecutor, self.step_key, ) for step_event in execute_plan_iterator( execution_plan, self.pipeline_run, run_config=self.run_config, retries=self.retries.for_inner_plan(), instance=instance, ): yield step_event
def execute(self): pipeline = self.recon_pipeline with DagsterInstance.from_ref(self.instance_ref) as instance: start_termination_thread(self.term_event) execution_plan = create_execution_plan( pipeline=pipeline, run_config=self.run_config, mode=self.pipeline_run.mode, step_keys_to_execute=[self.step_key], known_state=self.known_state, ) yield instance.report_engine_event( "Executing step {} in subprocess".format(self.step_key), self.pipeline_run, EngineEventData( [ MetadataEntry("pid", value=str(os.getpid())), MetadataEntry("step_key", value=self.step_key), ], marker_end=DELEGATE_MARKER, ), MultiprocessExecutor, self.step_key, ) yield from execute_plan_iterator( execution_plan, pipeline, self.pipeline_run, run_config=self.run_config, retry_mode=self.retry_mode.for_inner_plan(), instance=instance, )
def _in_mp_process(handle, pipeline_run, instance_ref, term_event): """ Execute pipeline using message queue as a transport """ run_id = pipeline_run.run_id pipeline_name = pipeline_run.pipeline_name instance = DagsterInstance.from_ref(instance_ref) instance.handle_new_event( build_process_started_event(run_id, pipeline_name, os.getpid())) start_termination_thread(term_event) try: handle.build_repository_definition() pipeline_def = handle.with_pipeline_name( pipeline_name).build_pipeline_definition() except Exception: # pylint: disable=broad-except repo_error = sys.exc_info() instance.handle_new_event( build_synthetic_pipeline_error_record( run_id, serializable_error_info_from_exc_info(repo_error), pipeline_name)) return try: event_list = [] for event in execute_run_iterator( pipeline_def.build_sub_pipeline( pipeline_run.selector.solid_subset), pipeline_run, instance, ): event_list.append(event) return PipelineExecutionResult(pipeline_def, run_id, event_list, lambda: None) # Add a DagsterEvent for unexpected exceptions # Explicitly ignore KeyboardInterrupts since they are used for termination except DagsterSubprocessError as err: if not all([ err_info.cls_name == 'KeyboardInterrupt' for err_info in err.subprocess_error_infos ]): error_info = serializable_error_info_from_exc_info(sys.exc_info()) instance.handle_new_event( build_synthetic_pipeline_error_record(run_id, error_info, pipeline_name)) except Exception: # pylint: disable=broad-except error_info = serializable_error_info_from_exc_info(sys.exc_info()) instance.handle_new_event( build_synthetic_pipeline_error_record(run_id, error_info, pipeline_name)) finally: instance.handle_new_event( build_process_exited_event(run_id, pipeline_name, os.getpid()))
def execute(self): check.inst(self.executor_config, MultiprocessExecutorConfig) pipeline_def = self.executor_config.handle.build_pipeline_definition() environment_dict = dict(self.environment_dict, execution={'in_process': {}}) start_termination_thread(self.term_event) execution_plan = create_execution_plan( pipeline_def, environment_dict, self.pipeline_run ).build_subset_plan([self.step_key]) for step_event in execute_plan_iterator( execution_plan, self.pipeline_run, environment_dict=environment_dict, instance=DagsterInstance.from_ref(self.instance_ref), ): yield step_event
def execute(self): check.inst(self.executor_config, MultiprocessExecutorConfig) pipeline_def = self.executor_config.load_pipeline(self.pipeline_run) start_termination_thread(self.term_event) execution_plan = create_execution_plan( pipeline_def, self.environment_dict, self.pipeline_run ).build_subset_plan([self.step_key]) for step_event in execute_plan_iterator( execution_plan, self.pipeline_run, environment_dict=override_env_for_inner_executor( self.environment_dict, self.executor_config.retries, self.step_key, DELEGATE_MARKER, ), instance=DagsterInstance.from_ref(self.instance_ref), ): yield step_event
def in_mp_process(cls, handle, pipeline_run, instance_ref, term_event): """ Execute pipeline using message queue as a transport """ run_id = pipeline_run.run_id pipeline_name = pipeline_run.pipeline_name instance = DagsterInstance.from_ref(instance_ref) pid = os.getpid() instance.report_engine_event( 'Started process for pipeline (pid: {pid}).'.format(pid=pid), pipeline_run, EngineEventData.in_process(pid, marker_end='dagit_subprocess_init'), cls, ) start_termination_thread(term_event) try: handle.build_repository_definition() pipeline_def = handle.with_pipeline_name( pipeline_name).build_pipeline_definition() except Exception: # pylint: disable=broad-except instance.report_engine_event( 'Failed attempting to load pipeline "{}"'.format( pipeline_name), pipeline_run, EngineEventData.engine_error( serializable_error_info_from_exc_info(sys.exc_info())), cls, ) return try: event_list = [] for event in execute_run_iterator( pipeline_def.build_sub_pipeline( pipeline_run.selector.solid_subset), pipeline_run, instance, ): event_list.append(event) return PipelineExecutionResult(pipeline_def, run_id, event_list, lambda: None) # Add a DagsterEvent for unexpected exceptions # Explicitly ignore KeyboardInterrupts since they are used for termination except DagsterSubprocessError as err: if not all([ err_info.cls_name == 'KeyboardInterrupt' for err_info in err.subprocess_error_infos ]): instance.report_engine_event( 'An exception was thrown during execution that is likely a framework error, ' 'rather than an error in user code.', pipeline_run, EngineEventData.engine_error( serializable_error_info_from_exc_info(sys.exc_info())), cls, ) except Exception: # pylint: disable=broad-except instance.report_engine_event( 'An exception was thrown during execution that is likely a framework error, ' 'rather than an error in user code.', pipeline_run, EngineEventData.engine_error( serializable_error_info_from_exc_info(sys.exc_info())), cls, ) finally: instance.report_engine_event( 'Process for pipeline exited (pid: {pid}).'.format(pid=pid), pipeline_run, cls=cls, )
def _run_in_subprocess( serialized_execute_run_args, recon_pipeline, termination_event, subprocess_status_handler, run_event_handler, ): start_termination_thread(termination_event) try: execute_run_args = deserialize_json_to_dagster_namedtuple( serialized_execute_run_args) check.inst_param(execute_run_args, 'execute_run_args', ExecuteRunArgs) instance = DagsterInstance.from_ref(execute_run_args.instance_ref) pipeline_run = instance.get_run_by_id(execute_run_args.pipeline_run_id) pid = os.getpid() except: # pylint: disable=bare-except event = IPCErrorMessage( serializable_error_info=serializable_error_info_from_exc_info( sys.exc_info()), message='Error during RPC setup for ExecuteRun', ) subprocess_status_handler(event) subprocess_status_handler(RunInSubprocessComplete()) return subprocess_status_handler(StartRunInSubprocessSuccessful()) run_event_handler( instance.report_engine_event( 'Started process for pipeline (pid: {pid}).'.format(pid=pid), pipeline_run, EngineEventData.in_process(pid, marker_end='cli_api_subprocess_init'), )) # This is so nasty but seemingly unavoidable # https://amir.rachum.com/blog/2017/03/03/generator-cleanup/ closed = False try: for event in _core_execute_run(recon_pipeline, pipeline_run, instance): run_event_handler(event) except KeyboardInterrupt: run_event_handler( instance.report_engine_event( message='Pipeline execution terminated by interrupt', pipeline_run=pipeline_run, )) raise except GeneratorExit: closed = True raise finally: if not closed: run_event_handler( instance.report_engine_event( 'Process for pipeline exited (pid: {pid}).'.format( pid=pid), pipeline_run, )) subprocess_status_handler(RunInSubprocessComplete())
def _run_in_subprocess( serialized_execute_run_args, recon_pipeline, termination_event, subprocess_status_handler, run_event_handler, ): start_termination_thread(termination_event) try: execute_run_args = deserialize_json_to_dagster_namedtuple( serialized_execute_run_args) check.inst_param(execute_run_args, "execute_run_args", ExecuteExternalPipelineArgs) instance = DagsterInstance.from_ref(execute_run_args.instance_ref) pipeline_run = instance.get_run_by_id(execute_run_args.pipeline_run_id) if not pipeline_run: raise DagsterRunNotFoundError( "gRPC server could not load run {run_id} in order to execute it. Make sure that the gRPC server has access to your run storage." .format(run_id=execute_run_args.pipeline_run_id), invalid_run_id=execute_run_args.pipeline_run_id, ) pid = os.getpid() except: # pylint: disable=bare-except serializable_error_info = serializable_error_info_from_exc_info( sys.exc_info()) event = IPCErrorMessage( serializable_error_info=serializable_error_info, message="Error during RPC setup for executing run: {message}". format(message=serializable_error_info.message), ) subprocess_status_handler(event) subprocess_status_handler(RunInSubprocessComplete()) if instance: instance.dispose() return subprocess_status_handler(StartRunInSubprocessSuccessful()) run_event_handler( instance.report_engine_event( "Started process for pipeline (pid: {pid}).".format(pid=pid), pipeline_run, EngineEventData.in_process(pid, marker_end="cli_api_subprocess_init"), )) # This is so nasty but seemingly unavoidable # https://amir.rachum.com/blog/2017/03/03/generator-cleanup/ closed = False try: for event in _core_execute_run(recon_pipeline, pipeline_run, instance): run_event_handler(event) except GeneratorExit: closed = True raise finally: if not closed: run_event_handler( instance.report_engine_event( "Process for pipeline exited (pid: {pid}).".format( pid=pid), pipeline_run, )) subprocess_status_handler(RunInSubprocessComplete()) instance.dispose()