def launch_run(self, instance, run, external_pipeline): check.inst_param(external_pipeline, 'external_pipeline', ExternalPipeline) recon_pipeline = recon_pipeline_from_pipeline_handle( external_pipeline.handle) execute_run(recon_pipeline, run, instance) return run
def test_execute_run_bad_state(): records = [] def event_callback(record): assert isinstance(record, EventRecord) records.append(record) with instance_for_test() as instance: pipeline_def = PipelineDefinition( name="basic_resource_pipeline", solid_defs=[resource_solid], mode_defs=[ ModeDefinition( resource_defs={"a": resource_a, "b": resource_b}, logger_defs={"callback": construct_event_logger(event_callback)}, ) ], ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, run_config={"loggers": {"callback": {}}}, mode="default", ).with_status(PipelineRunStatus.SUCCESS) with pytest.raises( check.CheckError, match=r"Pipeline run basic_resource_pipeline \({}\) in state" r" PipelineRunStatus.SUCCESS, expected PipelineRunStatus.NOT_STARTED".format( pipeline_run.run_id ), ): execute_run(InMemoryPipeline(pipeline_def), pipeline_run, instance=instance)
def test_event_callback_logging(): events = defaultdict(list) def _event_callback(record): assert isinstance(record, EventRecord) if record.is_dagster_event: events[record.dagster_event.event_type].append(record) pipeline = ReconstructablePipeline.for_module( "dagstermill.examples.repository", "define_hello_logging_pipeline", ) pipeline_def = pipeline.get_definition() instance = DagsterInstance.local_temp() pipeline_run = instance.create_run_for_pipeline(pipeline_def) instance.watch_event_logs(pipeline_run.run_id, -1, _event_callback) execute_run(pipeline, pipeline_run, instance) passed_before_timeout = False retries = 5 while retries > 0: time.sleep(0.333) if DagsterEventType.PIPELINE_FAILURE in events.keys(): break if DagsterEventType.PIPELINE_SUCCESS in events.keys(): passed_before_timeout = True break retries -= 1 assert passed_before_timeout
def launch_run(self, instance, run, external_pipeline): check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) recon_pipeline = recon_pipeline_from_origin( external_pipeline.get_origin()) execute_run(recon_pipeline, run, self._instance) return run
def test_reexecution_fs_storage(): @lambda_solid def return_one(): return 1 @lambda_solid def add_one(num): return num + 1 pipeline_def = PipelineDefinition( solid_defs=[return_one, add_one], dependencies={'add_one': { 'num': DependencyDefinition('return_one') }}, ) environment_dict = {'storage': {'filesystem': {}}} instance = DagsterInstance.ephemeral() pipeline_result = execute_pipeline( pipeline_def, environment_dict={'storage': { 'filesystem': {} }}, instance=instance) assert pipeline_result.success assert pipeline_result.result_for_solid('add_one').output_value() == 2 pipeline_run = instance.create_run_for_pipeline( pipeline_def, environment_dict=environment_dict, parent_run_id=pipeline_result.run_id, root_run_id=pipeline_result.run_id, ) reexecution_result = execute_run(pipeline_def, pipeline_run, instance) assert reexecution_result.success assert len(reexecution_result.solid_result_list) == 2 assert reexecution_result.result_for_solid( 'return_one').output_value() == 1 assert reexecution_result.result_for_solid('add_one').output_value() == 2 reexecution_run = instance.get_run_by_id(reexecution_result.run_id) assert reexecution_run.parent_run_id == pipeline_result.run_id assert reexecution_run.root_run_id == pipeline_result.run_id pipeline_run = instance.create_run_for_pipeline( pipeline_def, environment_dict=environment_dict, parent_run_id=reexecution_result.run_id, root_run_id=pipeline_result.run_id, ) grandchild_result = execute_run(pipeline_def, pipeline_run, instance) assert grandchild_result.success assert len(grandchild_result.solid_result_list) == 2 assert grandchild_result.result_for_solid('return_one').output_value() == 1 assert grandchild_result.result_for_solid('add_one').output_value() == 2 grandchild_run = instance.get_run_by_id(grandchild_result.run_id) assert grandchild_run.parent_run_id == reexecution_result.run_id assert grandchild_run.root_run_id == pipeline_result.run_id
def test_execute_canceled_state(): def event_callback(_record): pass with instance_for_test() as instance: pipeline_def = PipelineDefinition( name="basic_resource_pipeline", solid_defs=[resource_solid], mode_defs=[ ModeDefinition( resource_defs={ "a": resource_a, "b": resource_b }, logger_defs={ "callback": construct_event_logger(event_callback) }, ) ], ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, run_config={ "loggers": { "callback": {} } }, mode="default", ).with_status(PipelineRunStatus.CANCELED) execute_run(InMemoryPipeline(pipeline_def), pipeline_run, instance=instance) logs = instance.all_logs(pipeline_run.run_id) assert len(logs) == 1 assert ( "Not starting execution since the run was canceled before execution could start" in logs[0].message) iter_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, run_config={ "loggers": { "callback": {} } }, mode="default", ).with_status(PipelineRunStatus.CANCELED) iter_events = list( execute_run_iterator(InMemoryPipeline(pipeline_def), iter_run, instance=instance)) assert len(iter_events) == 1 assert ( "Not starting execution since the run was canceled before execution could start" in iter_events[0].message)
def _synchronously_execute_run_within_hosted_user_process( graphene_info, run_id): check.inst_param(graphene_info, 'graphene_info', ResolveInfo) instance = graphene_info.context.instance pipeline_run = instance.get_run_by_id(run_id) if not pipeline_run: return graphene_info.schema.type_named('PipelineRunNotFoundError')( run_id) external_pipeline = legacy_get_external_pipeline_or_raise( graphene_info, pipeline_run.pipeline_name, pipeline_run.solid_subset) validated_config = validate_config_from_snap( external_pipeline.config_schema_snapshot, external_pipeline.root_config_key_for_mode(pipeline_run.mode), pipeline_run.environment_dict, ) if not validated_config.success: # If the config is invalid, we construct a DagsterInvalidConfigError exception and # insert it into the event log. We also return a PipelineConfigValidationInvalid user facing # graphql error. # We currently re-use the engine events machinery to add the error to the event log, but # may need to create a new event type and instance method to handle these errors. invalid_config_exception = DagsterInvalidConfigError( 'Error in config for pipeline {}'.format(external_pipeline.name), validated_config.errors, pipeline_run.environment_dict, ) instance.report_engine_event( str(invalid_config_exception.message), pipeline_run, EngineEventData.engine_error( SerializableErrorInfo( invalid_config_exception.message, [], DagsterInvalidConfigError.__class__.__name__, None, )), ) instance.report_run_failed(pipeline_run) return DauphinPipelineConfigValidationInvalid.for_validation_errors( external_pipeline, validated_config.errors) pipeline_def = pipeline_def_from_pipeline_handle(external_pipeline.handle) execute_run(pipeline_def, pipeline_run, instance) return graphene_info.schema.type_named('StartPipelineRunSuccess')( run=graphene_info.schema.type_named('PipelineRun')(pipeline_run))
def _synthesize_events(solids_fn, run_id=None, check_success=True): events = [] def _append_event(event): events.append(event) @pipeline(mode_defs=[_mode_def(_append_event)]) def a_pipe(): solids_fn() with instance_for_test() as instance: pipeline_run = instance.create_run_for_pipeline( a_pipe, run_id=run_id, run_config={"loggers": { "callback": {}, "console": {} }}) result = execute_run(InMemoryPipeline(a_pipe), pipeline_run, instance) if check_success: assert result.success return events, result
def synthesize_events(solids_fn, run_id=None): events = [] def _append_event(event): events.append(event) @pipeline(mode_defs=[mode_def(_append_event)]) def a_pipe(): solids_fn() instance = DagsterInstance.local_temp() pipeline_run = instance.create_run_for_pipeline( a_pipe, run_id=run_id, environment_dict={'loggers': { 'callback': {}, 'console': {} }}) result = execute_run(InMemoryExecutablePipeline(a_pipe), pipeline_run, instance) assert result.success return events, result
def synthesize_events(solids_fn, run_id=None): events = [] def _append_event(event): events.append(event) @pipeline(mode_defs=[mode_def(_append_event)]) def a_pipe(): solids_fn() instance = DagsterInstance.local_temp() pipeline_run = instance.create_run_for_pipeline( a_pipe, run_id=run_id, run_config={"loggers": { "callback": {}, "console": {} }}) result = execute_run(InMemoryPipeline(a_pipe), pipeline_run, instance) assert result.success return events, result
def test_run_group(): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance.local_temp(temp_dir) repo = get_repo_at_time_1() foo_pipeline = repo.get_pipeline('foo_pipeline') runs = [execute_pipeline(foo_pipeline, instance=instance)] root_run_id = runs[-1].run_id for _ in range(3): # https://github.com/dagster-io/dagster/issues/2433 run = instance.create_run_for_pipeline( foo_pipeline, parent_run_id=root_run_id, root_run_id=root_run_id, tags={ PARENT_RUN_ID_TAG: root_run_id, ROOT_RUN_ID_TAG: root_run_id }, ) execute_run(InMemoryExecutablePipeline(foo_pipeline), run, instance) runs.append(run) context_at_time_1 = define_context_for_file(__file__, 'get_repo_at_time_1', instance) result_one = execute_dagster_graphql( context_at_time_1, RUN_GROUP_QUERY, variables={'runId': root_run_id}, ) assert result_one.data['runGroupOrError']['__typename'] == 'RunGroup' assert len(result_one.data['runGroupOrError']['runs']) == 4 result_two = execute_dagster_graphql( context_at_time_1, RUN_GROUP_QUERY, variables={'runId': runs[-1].run_id}, ) assert result_one.data['runGroupOrError']['__typename'] == 'RunGroup' assert len(result_two.data['runGroupOrError']['runs']) == 4 assert (result_one.data['runGroupOrError']['rootRunId'] == result_two.data['runGroupOrError']['rootRunId']) assert (result_one.data['runGroupOrError']['runs'] == result_two.data['runGroupOrError']['runs'])
def test_single_step_resource_event_logs(): # Test to attribute logs for single-step plans which are often the representation of # sub-plans in a multiprocessing execution environment. Most likely will need to be rewritten # with the refactor detailed in https://github.com/dagster-io/dagster/issues/2239 USER_SOLID_MESSAGE = 'I AM A SOLID' USER_RESOURCE_MESSAGE = 'I AM A RESOURCE' events = [] def event_callback(record): assert isinstance(record, EventRecord) events.append(record) @solid(required_resource_keys={'a'}) def resource_solid(context): context.log.info(USER_SOLID_MESSAGE) @resource def resource_a(context): context.log.info(USER_RESOURCE_MESSAGE) return 'A' pipeline = PipelineDefinition( name='resource_logging_pipeline', solid_defs=[resource_solid], mode_defs=[ ModeDefinition( resource_defs={'a': resource_a}, logger_defs={ 'callback': construct_event_logger(event_callback) }, ) ], ) instance = DagsterInstance.local_temp() pipeline_run = instance.create_run_for_pipeline( pipeline, run_config={'loggers': { 'callback': {} }}, step_keys_to_execute=['resource_solid.compute'], ) result = execute_run(InMemoryExecutablePipeline(pipeline), pipeline_run, instance) assert result.success log_messages = [ event for event in events if isinstance(event, LogMessageRecord) ] assert len(log_messages) == 2 resource_log_message = next( iter([ message for message in log_messages if message.user_message == USER_RESOURCE_MESSAGE ])) assert resource_log_message.step_key == 'resource_solid.compute'
def test_pipeline_step_key_subset_execution(): pipeline_def = define_addy_pipeline() instance = DagsterInstance.ephemeral() environment_dict = env_with_fs( {'solids': { 'add_one': { 'inputs': { 'num': { 'value': 3 } } } }}) result = execute_pipeline(pipeline_def, environment_dict=environment_dict, instance=instance) assert result.success store = build_fs_intermediate_store(instance.intermediates_directory, result.run_id) assert store.get_intermediate(None, 'add_one.compute', Int).obj == 4 assert store.get_intermediate(None, 'add_two.compute', Int).obj == 6 ## re-execute add_two pipeline_run = instance.create_run_for_pipeline( pipeline_def, environment_dict=environment_dict, step_keys_to_execute=['add_two.compute'], parent_run_id=result.run_id, root_run_id=result.run_id, ) pipeline_reexecution_result = execute_run(pipeline_def, pipeline_run, instance) assert pipeline_reexecution_result.success step_events = pipeline_reexecution_result.step_event_list assert step_events store = build_fs_intermediate_store(instance.intermediates_directory, pipeline_reexecution_result.run_id) assert store.get_intermediate(None, 'add_one.compute', Int).obj == 4 assert store.get_intermediate(None, 'add_two.compute', Int).obj == 6 assert not get_step_output_event(step_events, 'add_one.compute') assert get_step_output_event(step_events, 'add_two.compute') with pytest.raises(DagsterExecutionStepNotFoundError, match='Execution plan does not contain step'): pipeline_run = instance.create_run_for_pipeline( pipeline_def, environment_dict=environment_dict, step_keys_to_execute=['nope.compute'], parent_run_id=result.run_id, root_run_id=result.run_id, )
def test_single_step_resource_event_logs(): # Test to attribute logs for single-step plans which are often the representation of # sub-plans in a multiprocessing execution environment. Most likely will need to be rewritten # with the refactor detailed in https://github.com/dagster-io/dagster/issues/2239 USER_SOLID_MESSAGE = "I AM A SOLID" USER_RESOURCE_MESSAGE = "I AM A RESOURCE" events = [] def event_callback(record): assert isinstance(record, EventRecord) events.append(record) @solid(required_resource_keys={"a"}) def resource_solid(context): context.log.info(USER_SOLID_MESSAGE) @resource def resource_a(context): context.log.info(USER_RESOURCE_MESSAGE) return "A" the_pipeline = PipelineDefinition( name="resource_logging_pipeline", solid_defs=[resource_solid], mode_defs=[ ModeDefinition( resource_defs={"a": resource_a}, logger_defs={ "callback": construct_event_logger(event_callback) }, ) ], ) with instance_for_test() as instance: pipeline_run = instance.create_run_for_pipeline( the_pipeline, run_config={"loggers": { "callback": {} }}, step_keys_to_execute=["resource_solid"], ) result = execute_run(InMemoryPipeline(the_pipeline), pipeline_run, instance) assert result.success log_messages = [ event for event in events if isinstance(event, EventRecord) and event.level == coerce_valid_log_level("INFO") ] assert len(log_messages) == 2 resource_log_message = next( iter([ message for message in log_messages if message.user_message == USER_RESOURCE_MESSAGE ])) assert resource_log_message.step_key == "resource_solid"
def _synchronously_execute_run_within_hosted_user_process( graphene_info, repository_location_name, repository_name, run_id, ): run_info_or_error = get_run_execution_info_for_created_run_or_error( graphene_info, repository_location_name, repository_name, run_id ) if not isinstance(run_info_or_error, RunExecutionInfo): # if it is not a success the return value is the dauphin error return run_info_or_error external_pipeline, pipeline_run = run_info_or_error recon_pipeline = recon_pipeline_from_origin(external_pipeline.get_origin()) execute_run(recon_pipeline, pipeline_run, graphene_info.context.instance) return graphene_info.schema.type_named("ExecuteRunInProcessSuccess")( run=graphene_info.schema.type_named("PipelineRun")(pipeline_run) )
def test_run_group(): with instance_for_test() as instance: repo = get_repo_at_time_1() foo_pipeline = repo.get_pipeline("foo_pipeline") runs = [execute_pipeline(foo_pipeline, instance=instance)] root_run_id = runs[-1].run_id for _ in range(3): # https://github.com/dagster-io/dagster/issues/2433 run = instance.create_run_for_pipeline( foo_pipeline, parent_run_id=root_run_id, root_run_id=root_run_id, tags={ PARENT_RUN_ID_TAG: root_run_id, ROOT_RUN_ID_TAG: root_run_id }, ) execute_run(InMemoryPipeline(foo_pipeline), run, instance) runs.append(run) with define_out_of_process_context(__file__, "get_repo_at_time_1", instance) as context_at_time_1: result_one = execute_dagster_graphql( context_at_time_1, RUN_GROUP_QUERY, variables={"runId": root_run_id}, ) assert result_one.data["runGroupOrError"][ "__typename"] == "RunGroup" assert len(result_one.data["runGroupOrError"]["runs"]) == 4 result_two = execute_dagster_graphql( context_at_time_1, RUN_GROUP_QUERY, variables={"runId": runs[-1].run_id}, ) assert result_one.data["runGroupOrError"][ "__typename"] == "RunGroup" assert len(result_two.data["runGroupOrError"]["runs"]) == 4 assert (result_one.data["runGroupOrError"]["rootRunId"] == result_two.data["runGroupOrError"]["rootRunId"]) assert (result_one.data["runGroupOrError"]["runs"] == result_two.data["runGroupOrError"]["runs"])
def execute_pipeline(self, pipeline, pipeline_run, instance): check.inst_param(pipeline, 'pipeline', ExecutablePipeline) check.inst_param(pipeline_run, 'pipeline_run', PipelineRun) check.inst_param(instance, 'instance', DagsterInstance) self._active.add(pipeline_run.run_id) result = execute_run(pipeline, pipeline_run, instance) self._active.remove(pipeline_run.run_id) return result
def execute_pipeline( self, instance, external_pipeline, pipeline_run, ): check.inst_param(instance, "instance", DagsterInstance) check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) check.inst_param(pipeline_run, "pipeline_run", PipelineRun) pipeline = self.get_reconstructable_pipeline( external_pipeline.name ).subset_for_execution_from_existing_pipeline(external_pipeline.solids_to_execute) execution_result = execute_run(pipeline, pipeline_run, instance) return ExternalPipelineExecutionResult(event_list=execution_result.event_list)
def test_execution_plan_subset_with_aliases(): resources_initted = {} @resource def resource_a(_): resources_initted["a"] = True yield "A" @resource def resource_b(_): resources_initted["b"] = True yield "B" @solid(required_resource_keys={"a"}) def consumes_resource_a(context): assert context.resources.a == "A" @solid(required_resource_keys={"b"}) def consumes_resource_b(context): assert context.resources.b == "B" @pipeline( mode_defs=[ ModeDefinition(resource_defs={ "a": resource_a, "b": resource_b, }) ], ) def selective_init_test_pipeline_with_alias(): consumes_resource_a() consumes_resource_b.alias("b_alias")() instance = DagsterInstance.ephemeral() execution_plan = create_execution_plan( selective_init_test_pipeline_with_alias, step_keys_to_execute=["b_alias"]) pipeline_run = instance.create_run_for_pipeline( selective_init_test_pipeline_with_alias, execution_plan=execution_plan, ) result = execute_run( InMemoryPipeline(selective_init_test_pipeline_with_alias), pipeline_run, instance) assert result.success assert set(resources_initted.keys()) == {"b"}
def test_execution_plan_snapshot_backcompat(): src_dir = file_relative_path(__file__, "test_execution_plan_snapshots/") snapshot_dirs = [ f for f in os.listdir(src_dir) if not os.path.isfile(os.path.join(src_dir, f)) ] for snapshot_dir_path in snapshot_dirs: print(f"Executing a saved run from {snapshot_dir_path}") # pylint: disable=print-call with copy_directory(os.path.join(src_dir, snapshot_dir_path)) as test_dir: with DagsterInstance.from_ref( InstanceRef.from_dir(test_dir)) as instance: runs = instance.get_runs() assert len(runs) == 1 run = runs[0] assert run.status == PipelineRunStatus.NOT_STARTED the_pipeline = InMemoryPipeline(dynamic_pipeline) # First create a brand new plan from the pipeline and validate it new_plan = create_execution_plan(the_pipeline, run_config=run.run_config) _validate_execution_plan(new_plan) # Create a snapshot and rebuild it, validate the rebuilt plan new_plan_snapshot = snapshot_from_execution_plan( new_plan, run.pipeline_snapshot_id) rebuilt_plan = ExecutionPlan.rebuild_from_snapshot( "dynamic_pipeline", new_plan_snapshot) _validate_execution_plan(rebuilt_plan) # Then validate the plan built from the historical snapshot on the run stored_snapshot = instance.get_execution_plan_snapshot( run.execution_plan_snapshot_id) rebuilt_plan = ExecutionPlan.rebuild_from_snapshot( "dynamic_pipeline", stored_snapshot) _validate_execution_plan(rebuilt_plan) # Finally, execute the run (using the historical execution plan snapshot) result = execute_run(the_pipeline, run, instance, raise_on_error=True) assert result.success
def test_execution_plan_subset_strict_resources_within_composite(): resources_initted = {} pipeline_def = create_composite_solid_pipeline(resources_initted) instance = DagsterInstance.ephemeral() pipeline_run = instance.create_run_for_pipeline( pipeline_def, step_keys_to_execute=['wraps_b.consumes_resource_b.compute'], ) result = execute_run(InMemoryExecutablePipeline(pipeline_def), pipeline_run, instance) assert result.success assert set(resources_initted.keys()) == {'b'}
def test_execution_plan_subset_strict_resources(): resources_initted = {} instance = DagsterInstance.ephemeral() pipeline_def = get_resource_init_pipeline(resources_initted) pipeline_run = instance.create_run_for_pipeline( pipeline_def, step_keys_to_execute=['consumes_resource_b.compute'], ) result = execute_run(pipeline_def, pipeline_run, instance) assert result.success assert set(resources_initted.keys()) == {'b'}
def test_execution_plan_subset_with_aliases(): resources_initted = {} @resource def resource_a(_): resources_initted['a'] = True yield 'A' @resource def resource_b(_): resources_initted['b'] = True yield 'B' @solid(required_resource_keys={'a'}) def consumes_resource_a(context): assert context.resources.a == 'A' @solid(required_resource_keys={'b'}) def consumes_resource_b(context): assert context.resources.b == 'B' @pipeline( mode_defs=[ ModeDefinition(resource_defs={ 'a': resource_a, 'b': resource_b, }) ], ) def selective_init_test_pipeline_with_alias(): consumes_resource_a() consumes_resource_b.alias('b_alias')() instance = DagsterInstance.ephemeral() pipeline_run = instance.create_run_for_pipeline( selective_init_test_pipeline_with_alias, step_keys_to_execute=['b_alias.compute'], ) result = execute_run( InMemoryExecutablePipeline(selective_init_test_pipeline_with_alias), pipeline_run, instance) assert result.success assert set(resources_initted.keys()) == {'b'}
def test_execution_plan_subset_strict_resources(): resources_initted = {} instance = DagsterInstance.ephemeral() pipeline_def = get_resource_init_pipeline(resources_initted) execution_plan = create_execution_plan( pipeline_def, step_keys_to_execute=["consumes_resource_b"]) pipeline_run = instance.create_run_for_pipeline( pipeline_def, execution_plan=execution_plan, ) result = execute_run(InMemoryPipeline(pipeline_def), pipeline_run, instance) assert result.success assert set(resources_initted.keys()) == {"b"}
def test_single_step_reexecution(): @lambda_solid def return_one(): return 1 @lambda_solid def add_one(num): return num + 1 pipeline_def = PipelineDefinition( solid_defs=[return_one, add_one], dependencies={'add_one': { 'num': DependencyDefinition('return_one') }}, ) environment_dict = {'storage': {'filesystem': {}}} instance = DagsterInstance.ephemeral() pipeline_result = execute_pipeline(pipeline_def, environment_dict, instance=instance) assert pipeline_result.success assert pipeline_result.result_for_solid('add_one').output_value() == 2 # This is how this is actually done in dagster_graphql.implementation.pipeline_execution_manager reexecution_pipeline_run = instance.create_run_for_pipeline( pipeline_def, environment_dict=environment_dict, step_keys_to_execute=['add_one.compute'], parent_run_id=pipeline_result.run_id, root_run_id=pipeline_result.run_id, ) reexecution_result = execute_run(pipeline_def, reexecution_pipeline_run, instance) assert reexecution_result.success assert reexecution_result.result_for_solid( 'return_one').output_value() == None assert reexecution_result.result_for_solid('add_one').output_value() == 2
def test_two_step_reexecution(): @lambda_solid def return_one(): return 1 @lambda_solid def add_one(num): return num + 1 @pipeline def two_step_reexec(): add_one(add_one(return_one())) instance = DagsterInstance.ephemeral() environment_dict = {'storage': {'filesystem': {}}} pipeline_result = execute_pipeline(two_step_reexec, environment_dict=environment_dict, instance=instance) assert pipeline_result.success assert pipeline_result.result_for_solid('add_one_2').output_value() == 3 reexecution_pipeline_run = instance.create_run_for_pipeline( two_step_reexec, environment_dict=environment_dict, step_keys_to_execute=['add_one.compute', 'add_one_2.compute'], parent_run_id=pipeline_result.run_id, root_run_id=pipeline_result.run_id, ) reexecution_result = execute_run( InMemoryExecutablePipeline(two_step_reexec), reexecution_pipeline_run, instance=instance) assert reexecution_result.success assert reexecution_result.result_for_solid( 'return_one').output_value() == None assert reexecution_result.result_for_solid('add_one_2').output_value() == 3
def in_mp_process(cls, pipeline_dict, pipeline_run, instance_ref, term_event): """ Execute pipeline using message queue as a transport """ pipeline_name = pipeline_run.pipeline_name instance = DagsterInstance.from_ref(instance_ref) pid = os.getpid() instance.report_engine_event( 'Started process for pipeline (pid: {pid}).'.format(pid=pid), pipeline_run, EngineEventData.in_process(pid, marker_end='dagit_subprocess_init'), cls, ) start_termination_thread(term_event) try: pipeline = InterProcessExecutablePipeline.from_dict(pipeline_dict) except Exception: # pylint: disable=broad-except instance.report_engine_event( 'Failed attempting to load pipeline "{}"'.format( pipeline_name), pipeline_run, EngineEventData.engine_error( serializable_error_info_from_exc_info(sys.exc_info())), cls, ) return try: return execute_run( pipeline.subset_for_execution( pipeline_run.selector.solid_subset), pipeline_run, instance, ) # Add a DagsterEvent for unexpected exceptions # Explicitly ignore KeyboardInterrupts since they are used for termination except DagsterSubprocessError as err: if not all([ err_info.cls_name == 'KeyboardInterrupt' for err_info in err.subprocess_error_infos ]): instance.report_engine_event( 'An exception was thrown during execution that is likely a framework error, ' 'rather than an error in user code.', pipeline_run, EngineEventData.engine_error( serializable_error_info_from_exc_info(sys.exc_info())), cls, ) except Exception: # pylint: disable=broad-except instance.report_engine_event( 'An exception was thrown during execution that is likely a framework error, ' 'rather than an error in user code.', pipeline_run, EngineEventData.engine_error( serializable_error_info_from_exc_info(sys.exc_info())), cls, ) finally: instance.report_engine_event( 'Process for pipeline exited (pid: {pid}).'.format(pid=pid), pipeline_run, cls=cls, )
def launch_run(self, context: LaunchRunContext) -> None: recon_pipeline = recon_pipeline_from_origin(context.pipeline_code_origin) execute_run(recon_pipeline, context.pipeline_run, self._instance)
def test_get_event_records_sqlite(self, storage): # test for sqlite only because sqlite requires special logic to handle cross-run queries if not isinstance(storage, SqliteEventLogStorage): pytest.skip() asset_key = AssetKey(["path", "to", "asset_one"]) events = [] def _append_event(event): events.append(event) @solid def materialize_one(_): yield AssetMaterialization( asset_key=asset_key, metadata={ "text": "hello", "json": { "hello": "world" }, "one_float": 1.0, "one_int": 1, }, ) yield Output(1) @pipeline(mode_defs=[_mode_def(_append_event)]) def a_pipe(): materialize_one() with instance_for_test() as instance: if not storage._instance: # pylint: disable=protected-access storage.register_instance(instance) # first run execute_run( InMemoryPipeline(a_pipe), instance.create_run_for_pipeline( a_pipe, run_id="1", run_config={"loggers": { "callback": {}, "console": {} }}), instance, ) for event in events: storage.store_event(event) run_records = instance.get_run_records() assert len(run_records) == 1 # all logs returned in descending order all_event_records = storage.get_event_records() assert _event_types([all_event_records[0].event_log_entry ]) == [DagsterEventType.PIPELINE_SUCCESS] assert _event_types([all_event_records[-1].event_log_entry ]) == [DagsterEventType.PIPELINE_START] # second run events = [] execute_run( InMemoryPipeline(a_pipe), instance.create_run_for_pipeline( a_pipe, run_id="2", run_config={"loggers": { "callback": {}, "console": {} }}), instance, ) run_records = instance.get_run_records() assert len(run_records) == 2 for event in events: storage.store_event(event) # third run events = [] execute_run( InMemoryPipeline(a_pipe), instance.create_run_for_pipeline( a_pipe, run_id="3", run_config={"loggers": { "callback": {}, "console": {} }}), instance, ) run_records = instance.get_run_records() assert len(run_records) == 3 for event in events: storage.store_event(event) # of_type filtered_records = storage.get_event_records( EventRecordsFilter( event_type=DagsterEventType.PIPELINE_SUCCESS, after_cursor=RunShardedEventsCursor( id=0, run_updated_after=run_records[-1].update_timestamp ), # events after first run ), ascending=True, ) assert len(filtered_records) == 2 assert _event_types([r.event_log_entry for r in filtered_records]) == [ DagsterEventType.PIPELINE_SUCCESS, DagsterEventType.PIPELINE_SUCCESS, ] assert [r.event_log_entry.run_id for r in filtered_records] == ["2", "3"]
def test_reexecution_fs_storage_with_subset(): @lambda_solid def return_one(): return 1 @lambda_solid def add_one(num): return num + 1 pipeline_def = PipelineDefinition( solid_defs=[return_one, add_one], dependencies={'add_one': { 'num': DependencyDefinition('return_one') }}, ) environment_dict = {'storage': {'filesystem': {}}} instance = DagsterInstance.ephemeral() pipeline_result = execute_pipeline(pipeline_def, environment_dict, instance=instance) assert pipeline_result.success assert pipeline_result.result_for_solid('add_one').output_value() == 2 # This is how this is actually done in dagster_graphql.implementation.pipeline_execution_manager reexecution_pipeline_run = instance.create_run_for_pipeline( pipeline_def, environment_dict=environment_dict, step_keys_to_execute=['return_one.compute'], parent_run_id=pipeline_result.run_id, root_run_id=pipeline_result.run_id, ) reexecution_result_no_subset = execute_run(pipeline_def, reexecution_pipeline_run, instance) assert reexecution_result_no_subset.success assert len(reexecution_result_no_subset.solid_result_list) == 2 assert reexecution_result_no_subset.result_for_solid('add_one').skipped assert reexecution_result_no_subset.result_for_solid( 'return_one').output_value() == 1 pipeline_result_subset = execute_pipeline( pipeline_def, environment_dict=environment_dict, instance=instance, solid_subset=['return_one'], ) assert pipeline_result_subset.success assert len(pipeline_result_subset.solid_result_list) == 1 with pytest.raises(DagsterInvariantViolationError): pipeline_result_subset.result_for_solid('add_one') assert pipeline_result_subset.result_for_solid( 'return_one').output_value() == 1 reexecution_pipeline_run = instance.create_run_for_pipeline( pipeline_def, environment_dict=environment_dict, parent_run_id=pipeline_result_subset.run_id, root_run_id=pipeline_result_subset.run_id, solid_subset=['return_one'], step_keys_to_execute=['return_one.compute'], ) reexecution_result = execute_run(pipeline_def, reexecution_pipeline_run, instance) assert reexecution_result.success assert len(reexecution_result.solid_result_list) == 1 with pytest.raises(DagsterInvariantViolationError): pipeline_result_subset.result_for_solid('add_one') assert reexecution_result.result_for_solid( 'return_one').output_value() == 1 with pytest.raises( DagsterExecutionStepNotFoundError, match=re.escape( 'Execution plan does not contain step: add_one.compute'), ): instance.create_run_for_pipeline( pipeline_def, environment_dict=environment_dict, parent_run_id=pipeline_result_subset.run_id, root_run_id=pipeline_result_subset.run_id, solid_subset=['return_one'], step_keys_to_execute=['add_one.compute'], ) re_reexecution_pipeline_run = instance.create_run_for_pipeline( pipeline_def, environment_dict=environment_dict, parent_run_id=reexecution_result.run_id, root_run_id=reexecution_result.run_id, solid_subset=['return_one'], step_keys_to_execute=['return_one.compute'], ) re_reexecution_result = execute_run(pipeline_def, re_reexecution_pipeline_run, instance) assert re_reexecution_result.success assert len(re_reexecution_result.solid_result_list) == 1 assert re_reexecution_result.result_for_solid( 'return_one').output_value() == 1 with pytest.raises( DagsterExecutionStepNotFoundError, match=re.escape('Execution plan does not contain step: add_one'), ): instance.create_run_for_pipeline( pipeline_def, environment_dict=environment_dict, parent_run_id=reexecution_result.run_id, root_run_id=reexecution_result.run_id, solid_subset=['return_one'], step_keys_to_execute=['add_one.compute'], )