def execute(self): pipeline = self.recon_pipeline with DagsterInstance.from_ref(self.instance_ref) as instance: start_termination_thread(self.term_event) execution_plan = create_execution_plan( pipeline=pipeline, run_config=self.run_config, mode=self.pipeline_run.mode, step_keys_to_execute=self.pipeline_run.step_keys_to_execute, ).build_subset_plan([self.step_key]) yield instance.report_engine_event( "Executing step {} in subprocess".format(self.step_key), self.pipeline_run, EngineEventData( [ EventMetadataEntry.text(str(os.getpid()), "pid"), EventMetadataEntry.text(self.step_key, "step_key"), ], marker_end=DELEGATE_MARKER, ), MultiprocessExecutor, self.step_key, ) for step_event in execute_plan_iterator( execution_plan, self.pipeline_run, run_config=self.run_config, retries=self.retries.for_inner_plan(), instance=instance, ): yield step_event
def _execute_plan(self, execute_step_args_packed, executable_dict): execute_step_args = unpack_value( check.dict_param( execute_step_args_packed, "execute_step_args_packed", )) check.inst_param(execute_step_args, "execute_step_args", ExecuteStepArgs) check.dict_param(executable_dict, "executable_dict") instance = DagsterInstance.from_ref(execute_step_args.instance_ref) pipeline = ReconstructablePipeline.from_dict(executable_dict) retries = Retries.from_config(execute_step_args.retries_dict) pipeline_run = instance.get_run_by_id( execute_step_args.pipeline_run_id) check.invariant( pipeline_run, "Could not load run {}".format(execute_step_args.pipeline_run_id)) step_keys_str = ", ".join(execute_step_args.step_keys_to_execute) execution_plan = create_execution_plan( pipeline, pipeline_run.run_config, mode=pipeline_run.mode, step_keys_to_execute=execute_step_args.step_keys_to_execute, ) engine_event = instance.report_engine_event( "Executing steps {} in celery worker".format(step_keys_str), pipeline_run, EngineEventData( [ EventMetadataEntry.text(step_keys_str, "step_keys"), EventMetadataEntry.text(self.request.hostname, "Celery worker"), ], marker_end=DELEGATE_MARKER, ), CeleryExecutor, step_key=execution_plan.step_key_for_single_step_plans(), ) events = [engine_event] for step_event in execute_plan_iterator( execution_plan, pipeline_run=pipeline_run, run_config=pipeline_run.run_config, instance=instance, retries=retries, ): events.append(step_event) serialized_events = [ serialize_dagster_namedtuple(event) for event in events ] return serialized_events
def _execute_step_command_body(args: ExecuteStepArgs, instance: DagsterInstance, pipeline_run: PipelineRun): single_step_key = (args.step_keys_to_execute[0] if args.step_keys_to_execute and len(args.step_keys_to_execute) == 1 else None) try: check.inst( pipeline_run, PipelineRun, "Pipeline run with id '{}' not found for step execution".format( args.pipeline_run_id), ) if args.should_verify_step: success = verify_step( instance, pipeline_run, check.not_none(args.known_state).get_retry_state(), args.step_keys_to_execute, ) if not success: return recon_pipeline = recon_pipeline_from_origin( args.pipeline_origin).subset_for_execution_from_existing_pipeline( pipeline_run.solids_to_execute) execution_plan = create_execution_plan( recon_pipeline, run_config=pipeline_run.run_config, step_keys_to_execute=args.step_keys_to_execute, mode=pipeline_run.mode, known_state=args.known_state, ) yield from execute_plan_iterator( execution_plan, recon_pipeline, pipeline_run, instance, run_config=pipeline_run.run_config, retry_mode=args.retry_mode, ) except (KeyboardInterrupt, DagsterExecutionInterruptedError): yield instance.report_engine_event( message="Step execution terminated by interrupt", pipeline_run=pipeline_run, step_key=single_step_key, ) raise except Exception: yield instance.report_engine_event( "An exception was thrown during step execution that is likely a framework error, rather than an error in user code.", pipeline_run, EngineEventData.engine_error( serializable_error_info_from_exc_info(sys.exc_info())), step_key=single_step_key, ) raise
def execute(self): check.inst(self.executor_config, MultiprocessExecutorConfig) pipeline_def = self.executor_config.handle.build_pipeline_definition() run_config = self.run_config.with_tags(pid=str(os.getpid())) environment_dict = dict( self.environment_dict, execution={ 'in_process': { 'config': { 'raise_on_error': self.executor_config.raise_on_error } } }, ) execution_plan = create_execution_plan(pipeline_def, environment_dict, run_config) for step_event in execute_plan_iterator( execution_plan, environment_dict, run_config, step_keys_to_execute=[self.step_key]): yield step_event
def execute(self): check.inst(self.executor_config, MultiprocessExecutorConfig) pipeline_def = self.executor_config.load_pipeline(self.pipeline_run) instance = DagsterInstance.from_ref(self.instance_ref) start_termination_thread(self.term_event) execution_plan = create_execution_plan( pipeline_def, self.environment_dict, self.pipeline_run).build_subset_plan([self.step_key]) yield instance.report_engine_event( MultiprocessEngine, 'Executing step {} in subprocess'.format(self.step_key), self.pipeline_run, EngineEventData( [ EventMetadataEntry.text(str(os.getpid()), 'pid'), EventMetadataEntry.text(self.step_key, 'step_key'), ], marker_end=DELEGATE_MARKER, ), self.step_key, ) for step_event in execute_plan_iterator( execution_plan, self.pipeline_run, environment_dict=self.environment_dict, retries=self.executor_config.retries.for_inner_plan(), instance=instance, ): yield step_event
def execute_step_command(input_json): try: signal.signal(signal.SIGTERM, signal.getsignal(signal.SIGINT)) except ValueError: warnings.warn(( "Unexpected error attempting to manage signal handling on thread {thread_name}. " "You should not invoke this API (execute_step) from threads " "other than the main thread.").format( thread_name=threading.current_thread().name)) args = check.inst(deserialize_json_to_dagster_namedtuple(input_json), ExecuteStepArgs) with (DagsterInstance.from_ref(args.instance_ref) if args.instance_ref else DagsterInstance.get()) as instance: pipeline_run = instance.get_run_by_id(args.pipeline_run_id) check.inst( pipeline_run, PipelineRun, "Pipeline run with id '{}' not found for step execution".format( args.pipeline_run_id), ) recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin) retries = Retries.from_config(args.retries_dict) if args.should_verify_step: success = verify_step(instance, pipeline_run, retries, args.step_keys_to_execute) if not success: return execution_plan = create_execution_plan( recon_pipeline.subset_for_execution_from_existing_pipeline( pipeline_run.solids_to_execute), run_config=pipeline_run.run_config, step_keys_to_execute=args.step_keys_to_execute, mode=pipeline_run.mode, ) buff = [] # Flag that the step execution is skipped if should_skip_step(execution_plan, instance=instance, run_id=pipeline_run.run_id): click.echo(serialize_dagster_namedtuple(StepExecutionSkipped())) return for event in execute_plan_iterator( execution_plan, pipeline_run, instance, run_config=pipeline_run.run_config, retries=retries, ): buff.append(serialize_dagster_namedtuple(event)) for line in buff: click.echo(line)
def execute(self): pipeline = self.recon_pipeline with DagsterInstance.from_ref(self.instance_ref) as instance: start_termination_thread(self.term_event) execution_plan = create_execution_plan( pipeline=pipeline, run_config=self.run_config, mode=self.pipeline_run.mode, step_keys_to_execute=[self.step_key], known_state=self.known_state, ) yield instance.report_engine_event( "Executing step {} in subprocess".format(self.step_key), self.pipeline_run, EngineEventData( [ MetadataEntry("pid", value=str(os.getpid())), MetadataEntry("step_key", value=self.step_key), ], marker_end=DELEGATE_MARKER, ), MultiprocessExecutor, self.step_key, ) yield from execute_plan_iterator( execution_plan, pipeline, self.pipeline_run, run_config=self.run_config, retry_mode=self.retry_mode.for_inner_plan(), instance=instance, )
def test_execution_plan_for_composite_solid_with_config_mapping(): environment_dict = { 'solids': { 'composite_with_nested_config_solid_and_config_mapping': { 'config': { 'foo': 'baz', 'bar': 3 } } } } execution_plan = create_execution_plan( composite_pipeline_with_config_mapping, environment_dict=environment_dict) events = [] for evt in execute_plan_iterator(execution_plan, environment_dict=environment_dict): events.append(evt) assert [e.event_type_value for e in events] == [ 'ENGINE_EVENT', 'STEP_START', 'STEP_OUTPUT', 'STEP_SUCCESS', 'STEP_START', 'STEP_INPUT', 'STEP_OUTPUT', 'STEP_SUCCESS', 'ENGINE_EVENT', ]
def execute_step_with_structured_logs_command(input_json): signal.signal(signal.SIGTERM, signal.getsignal(signal.SIGINT)) args = check.inst(deserialize_json_to_dagster_namedtuple(input_json), ExecuteStepArgs) with (DagsterInstance.from_ref(args.instance_ref) if args.instance_ref else DagsterInstance.get()) as instance: pipeline_run = instance.get_run_by_id(args.pipeline_run_id) recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin) execution_plan = create_execution_plan( recon_pipeline.subset_for_execution_from_existing_pipeline( pipeline_run.solids_to_execute), run_config=args.run_config, step_keys_to_execute=args.step_keys_to_execute, mode=args.mode, ) retries = Retries.from_config(args.retries_dict) buff = [] for event in execute_plan_iterator( execution_plan, pipeline_run, instance, run_config=args.run_config, retries=retries, ): buff.append(serialize_dagster_namedtuple(event)) for line in buff: click.echo(line)
def execute_step_command(input_json): with capture_interrupts(): args = check.inst(deserialize_json_to_dagster_namedtuple(input_json), ExecuteStepArgs) with (DagsterInstance.from_ref(args.instance_ref) if args.instance_ref else DagsterInstance.get()) as instance: pipeline_run = instance.get_run_by_id(args.pipeline_run_id) check.inst( pipeline_run, PipelineRun, "Pipeline run with id '{}' not found for step execution". format(args.pipeline_run_id), ) if args.should_verify_step: success = verify_step( instance, pipeline_run, args.known_state.get_retry_state(), args.step_keys_to_execute, ) if not success: return recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin) execution_plan = create_execution_plan( recon_pipeline.subset_for_execution_from_existing_pipeline( pipeline_run.solids_to_execute), run_config=pipeline_run.run_config, step_keys_to_execute=args.step_keys_to_execute, mode=pipeline_run.mode, known_state=args.known_state, ) buff = [] # Flag that the step execution is skipped if should_skip_step(execution_plan, instance=instance, run_id=pipeline_run.run_id): click.echo(serialize_dagster_namedtuple( StepExecutionSkipped())) return for event in execute_plan_iterator( execution_plan, pipeline_run, instance, run_config=pipeline_run.run_config, retry_mode=args.retry_mode, ): buff.append(serialize_dagster_namedtuple(event)) for line in buff: click.echo(line)
def test_execute_plan_iterator(): records = [] def event_callback(record): assert isinstance(record, EventRecord) records.append(record) instance = DagsterInstance.local_temp() pipeline = PipelineDefinition( name='basic_resource_pipeline', solid_defs=[resource_solid], mode_defs=[ ModeDefinition( resource_defs={ 'a': resource_a, 'b': resource_b }, logger_defs={ 'callback': construct_event_logger(event_callback) }, ) ], ) environment_dict = {'loggers': {'callback': {}}} pipeline_run = instance.create_run( PipelineRun( pipeline_name=pipeline.name, run_id=make_new_run_id(), environment_dict={'loggers': { 'callback': {} }}, mode='default', status=PipelineRunStatus.NOT_STARTED, )) execution_plan = create_execution_plan(pipeline, environment_dict) iterator = execute_plan_iterator(execution_plan, pipeline_run, instance, environment_dict=environment_dict) event_type = None while event_type != 'STEP_START': event = next(iterator) event_type = event.event_type_value iterator.close() messages = [ record.user_message for record in records if not record.is_dagster_event ] assert len([message for message in messages if message == 'CLEANING A']) > 0 assert len([message for message in messages if message == 'CLEANING B']) > 0
def test_execute_plan_iterator(): records = [] def event_callback(record): assert isinstance(record, EventRecord) records.append(record) with instance_for_test() as instance: pipeline = PipelineDefinition( name="basic_resource_pipeline", solid_defs=[resource_solid], mode_defs=[ ModeDefinition( resource_defs={ "a": resource_a, "b": resource_b }, logger_defs={ "callback": construct_event_logger(event_callback) }, ) ], ) run_config = {"loggers": {"callback": {}}} execution_plan = create_execution_plan(pipeline, run_config=run_config) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, run_config={"loggers": { "callback": {} }}, execution_plan=execution_plan, ) iterator = execute_plan_iterator( execution_plan, InMemoryPipeline(pipeline), pipeline_run, instance, run_config=run_config, ) event_type = None while event_type != "STEP_START": event = next(iterator) event_type = event.event_type_value iterator.close() messages = [ record.user_message for record in records if not record.is_dagster_event ] assert len( [message for message in messages if message == "CLEANING A"]) > 0 assert len( [message for message in messages if message == "CLEANING B"]) > 0
def _execute_plan(_self, instance_ref_dict, handle_dict, run_id, step_keys, retries_dict): check.dict_param(instance_ref_dict, 'instance_ref_dict') check.dict_param(handle_dict, 'handle_dict') check.str_param(run_id, 'run_id') check.list_param(step_keys, 'step_keys', of_type=str) check.dict_param(retries_dict, 'retries_dict') instance_ref = InstanceRef.from_dict(instance_ref_dict) instance = DagsterInstance.from_ref(instance_ref) handle = ExecutionTargetHandle.from_dict(handle_dict) retries = Retries.from_config(retries_dict) pipeline_run = instance.get_run_by_id(run_id) check.invariant(pipeline_run, 'Could not load run {}'.format(run_id)) pipeline_def = handle.build_pipeline_definition().build_sub_pipeline( pipeline_run.selector.solid_subset) step_keys_str = ", ".join(step_keys) execution_plan = create_execution_plan( pipeline_def, pipeline_run.environment_dict, mode=pipeline_run.mode, step_keys_to_execute=pipeline_run.step_keys_to_execute, ).build_subset_plan(step_keys) engine_event = instance.report_engine_event( 'Executing steps {} in celery worker'.format(step_keys_str), pipeline_run, EngineEventData( [ EventMetadataEntry.text(step_keys_str, 'step_keys'), ], marker_end=DELEGATE_MARKER, ), CeleryEngine, step_key=execution_plan.step_key_for_single_step_plans(), ) events = [engine_event] for step_event in execute_plan_iterator( execution_plan, pipeline_run=pipeline_run, environment_dict=pipeline_run.environment_dict, instance=instance, retries=retries, ): events.append(step_event) serialized_events = [ serialize_dagster_namedtuple(event) for event in events ] return serialized_events
def test_execute_plan_iterator(): records = [] def event_callback(record): assert isinstance(record, EventRecord) records.append(record) instance = DagsterInstance.local_temp() pipeline = PipelineDefinition( name='basic_resource_pipeline', solid_defs=[resource_solid], mode_defs=[ ModeDefinition( resource_defs={ 'a': resource_a, 'b': resource_b }, logger_defs={ 'callback': construct_event_logger(event_callback) }, ) ], ) run_config = {'loggers': {'callback': {}}} execution_plan = create_execution_plan(pipeline, run_config=run_config) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, run_config={'loggers': { 'callback': {} }}, execution_plan=execution_plan, ) iterator = execute_plan_iterator(execution_plan, pipeline_run, instance, run_config=run_config) event_type = None while event_type != 'STEP_START': event = next(iterator) event_type = event.event_type_value iterator.close() messages = [ record.user_message for record in records if not record.is_dagster_event ] assert len([message for message in messages if message == 'CLEANING A']) > 0 assert len([message for message in messages if message == 'CLEANING B']) > 0
def _execute_plan(_self, instance_ref_dict, executable_dict, run_id, step_keys, retries_dict): check.dict_param(instance_ref_dict, "instance_ref_dict") check.dict_param(executable_dict, "executable_dict") check.str_param(run_id, "run_id") check.list_param(step_keys, "step_keys", of_type=str) check.dict_param(retries_dict, "retries_dict") instance_ref = InstanceRef.from_dict(instance_ref_dict) instance = DagsterInstance.from_ref(instance_ref) pipeline = ReconstructablePipeline.from_dict(executable_dict) retries = Retries.from_config(retries_dict) pipeline_run = instance.get_run_by_id(run_id) check.invariant(pipeline_run, "Could not load run {}".format(run_id)) step_keys_str = ", ".join(step_keys) execution_plan = create_execution_plan( pipeline, pipeline_run.run_config, mode=pipeline_run.mode, step_keys_to_execute=pipeline_run.step_keys_to_execute, ).build_subset_plan(step_keys) engine_event = instance.report_engine_event( "Executing steps {} in celery worker".format(step_keys_str), pipeline_run, EngineEventData( [ EventMetadataEntry.text(step_keys_str, "step_keys"), ], marker_end=DELEGATE_MARKER, ), CeleryExecutor, step_key=execution_plan.step_key_for_single_step_plans(), ) events = [engine_event] for step_event in execute_plan_iterator( execution_plan, pipeline_run=pipeline_run, run_config=pipeline_run.run_config, instance=instance, retries=retries, ): events.append(step_event) serialized_events = [ serialize_dagster_namedtuple(event) for event in events ] return serialized_events
def execute(self): check.inst(self.executor_config, MultiprocessExecutorConfig) pipeline_def = self.executor_config.handle.build_pipeline_definition() environment_dict = dict(self.environment_dict, execution={'in_process': {}}) execution_plan = create_execution_plan( pipeline_def, environment_dict, self.pipeline_run ).build_subset_plan([self.step_key]) for step_event in execute_plan_iterator( execution_plan, self.pipeline_run, environment_dict=environment_dict, instance=DagsterInstance.from_ref(self.instance_ref), ): yield step_event
def execute(self): check.inst(self.executor_config, MultiprocessExecutorConfig) pipeline_def = self.executor_config.load_pipeline(self.pipeline_run) start_termination_thread(self.term_event) execution_plan = create_execution_plan( pipeline_def, self.environment_dict, self.pipeline_run ).build_subset_plan([self.step_key]) for step_event in execute_plan_iterator( execution_plan, self.pipeline_run, environment_dict=override_env_for_inner_executor( self.environment_dict, self.executor_config.retries, self.step_key, DELEGATE_MARKER, ), instance=DagsterInstance.from_ref(self.instance_ref), ): yield step_event
def execute(self): check.inst(self.executor_config, MultiprocessExecutorConfig) pipeline_def = self.executor_config.handle.build_pipeline_definition() run_config = self.run_config.with_tags(pid=str(os.getpid())) environment_dict = dict(self.environment_dict, execution={'in_process': {}}) execution_plan = create_execution_plan(pipeline_def, environment_dict, run_config) for step_event in execute_plan_iterator( execution_plan, environment_dict, run_config, step_keys_to_execute=[self.step_key], instance=DagsterInstance.from_ref(self.instance_ref), ): yield step_event
def execute_step_with_structured_logs_command(input_json): try: signal.signal(signal.SIGTERM, signal.getsignal(signal.SIGINT)) except ValueError: warnings.warn(( "Unexpected error attempting to manage signal handling on thread {thread_name}. " "You should not invoke this API (execute_step_with_structured_logs) from threads " "other than the main thread.").format( thread_name=threading.current_thread().name)) args = check.inst(deserialize_json_to_dagster_namedtuple(input_json), ExecuteStepArgs) with (DagsterInstance.from_ref(args.instance_ref) if args.instance_ref else DagsterInstance.get()) as instance: pipeline_run = instance.get_run_by_id(args.pipeline_run_id) recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin) execution_plan = create_execution_plan( recon_pipeline.subset_for_execution_from_existing_pipeline( pipeline_run.solids_to_execute), run_config=args.run_config, step_keys_to_execute=args.step_keys_to_execute, mode=args.mode, ) retries = Retries.from_config(args.retries_dict) buff = [] for event in execute_plan_iterator( execution_plan, pipeline_run, instance, run_config=args.run_config, retries=retries, ): buff.append(serialize_dagster_namedtuple(event)) for line in buff: click.echo(line)
def test_execution_plan_for_composite_solid(): environment_dict = { 'solids': { 'composite_with_nested_config_solid': { 'solids': { 'node_a': { 'config': { 'foo': 'baz' } }, 'node_b': { 'config': { 'bar': 3 } } } } } } execution_plan = create_execution_plan(composite_pipeline, environment_dict=environment_dict) events = [] for evt in execute_plan_iterator(execution_plan, environment_dict=environment_dict, instance=DagsterInstance.ephemeral()): events.append(evt) assert [e.event_type_value for e in events] == [ 'ENGINE_EVENT', 'STEP_START', 'STEP_OUTPUT', 'STEP_SUCCESS', 'STEP_START', 'STEP_INPUT', 'STEP_OUTPUT', 'STEP_SUCCESS', 'ENGINE_EVENT', ]