def test_using_gcs_for_subplan(gcs_bucket): pipeline_def = define_inty_pipeline() environment_dict = { 'storage': { 'gcs': { 'config': { 'gcs_bucket': gcs_bucket } } } } run_id = str(uuid.uuid4()) execution_plan = create_execution_plan(pipeline_def, environment_dict=environment_dict, run_config=RunConfig(run_id=run_id)) assert execution_plan.get_step_by_key('return_one.compute') step_keys = ['return_one.compute'] instance = DagsterInstance.ephemeral() pipeline_run = PipelineRun.create_empty_run( pipeline_def.name, run_id=run_id, environment_dict=environment_dict) return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(step_keys), environment_dict=environment_dict, pipeline_run=pipeline_run, instance=instance, )) assert get_step_output(return_one_step_events, 'return_one.compute') with scoped_pipeline_context(pipeline_def, environment_dict, pipeline_run, instance) as context: store = GCSIntermediateStore( gcs_bucket, run_id, client=context.scoped_resources_builder.build().gcs.client) assert store.has_intermediate(context, 'return_one.compute') assert store.get_intermediate(context, 'return_one.compute', Int).obj == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(['add_one.compute']), environment_dict=environment_dict, pipeline_run=pipeline_run, instance=instance, )) assert get_step_output(add_one_step_events, 'add_one.compute') with scoped_pipeline_context(pipeline_def, environment_dict, pipeline_run, instance) as context: assert store.has_intermediate(context, 'add_one.compute') assert store.get_intermediate(context, 'add_one.compute', Int).obj == 2
def test_using_intermediate_file_system_for_subplan_multiprocessing(): with instance_for_test() as instance: run_config = {"intermediate_storage": {"filesystem": {}}} pipeline = reconstructable(define_inty_pipeline) environment_config = EnvironmentConfig.build( pipeline.get_definition(), run_config=run_config, ) execution_plan = ExecutionPlan.build( pipeline, environment_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline.get_definition(), execution_plan=execution_plan) assert execution_plan.get_step_by_key("return_one") return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["return_one"], pipeline.get_definition(), environment_config), pipeline, instance, run_config=dict(run_config, execution={"multiprocess": {}}), pipeline_run=pipeline_run, )) intermediate_storage = build_fs_intermediate_storage( instance.intermediates_directory, pipeline_run.run_id) assert get_step_output(return_one_step_events, "return_one") assert intermediate_storage.has_intermediate( None, StepOutputHandle("return_one")) assert (intermediate_storage.get_intermediate( None, Int, StepOutputHandle("return_one")).obj == 1) add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"], pipeline.get_definition(), environment_config), pipeline, instance, run_config=dict(run_config, execution={"multiprocess": {}}), pipeline_run=pipeline_run, )) assert get_step_output(add_one_step_events, "add_one") assert intermediate_storage.has_intermediate( None, StepOutputHandle("add_one")) assert (intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_one")).obj == 2)
def test_execute_step_wrong_step_key(): pipeline = define_inty_pipeline() execution_plan = create_execution_plan(pipeline) with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info: execute_plan(execution_plan, step_keys_to_execute=['nope']) assert exc_info.value.step_key == 'nope' assert str(exc_info.value) == 'Execution plan does not contain step "nope"'
def test_using_file_system_for_subplan_multiprocessing(): with instance_for_test() as instance: pipeline = reconstructable(define_reconstructable_inty_pipeline) resolved_run_config = ResolvedRunConfig.build( pipeline.get_definition(), ) execution_plan = ExecutionPlan.build( pipeline, resolved_run_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline.get_definition(), execution_plan=execution_plan) assert execution_plan.get_step_by_key("return_one") return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["return_one"], pipeline.get_definition(), resolved_run_config), pipeline, instance, run_config=dict(execution={"multiprocess": {}}), pipeline_run=pipeline_run, )) assert get_step_output(return_one_step_events, "return_one") with open( os.path.join(instance.storage_directory(), pipeline_run.run_id, "return_one", "result"), "rb", ) as read_obj: assert pickle.load(read_obj) == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"], pipeline.get_definition(), resolved_run_config), pipeline, instance, run_config=dict(execution={"multiprocess": {}}), pipeline_run=pipeline_run, )) assert get_step_output(add_one_step_events, "add_one") with open( os.path.join(instance.storage_directory(), pipeline_run.run_id, "add_one", "result"), "rb", ) as read_obj: assert pickle.load(read_obj) == 2
def test_using_s3_for_subplan(s3_bucket): pipeline = define_inty_pipeline() environment_dict = {'storage': {'s3': {'s3_bucket': s3_bucket}}} execution_plan = create_execution_plan(pipeline, environment_dict=environment_dict) assert execution_plan.get_step_by_key('return_one.compute') step_keys = ['return_one.compute'] run_id = str(uuid.uuid4()) store = S3IntermediateStore(s3_bucket, run_id) try: return_one_step_events = list( execute_plan( execution_plan, environment_dict=environment_dict, run_config=RunConfig(run_id=run_id), step_keys_to_execute=step_keys, ) ) assert get_step_output(return_one_step_events, 'return_one.compute') with scoped_pipeline_context( pipeline, environment_dict, RunConfig(run_id=run_id) ) as context: assert store.has_intermediate(context, 'return_one.compute') assert store.get_intermediate(context, 'return_one.compute', Int) == 1 add_one_step_events = list( execute_plan( execution_plan, environment_dict=environment_dict, run_config=RunConfig(run_id=run_id), step_keys_to_execute=['add_one.compute'], ) ) assert get_step_output(add_one_step_events, 'add_one.compute') with scoped_pipeline_context( pipeline, environment_dict, RunConfig(run_id=run_id) ) as context: assert store.has_intermediate(context, 'add_one.compute') assert store.get_intermediate(context, 'add_one.compute', Int) == 2 finally: with scoped_pipeline_context( pipeline, environment_dict, RunConfig(run_id=run_id) ) as context: store.rm_intermediate(context, 'return_one.compute') store.rm_intermediate(context, 'add_one.compute')
def _do_execute_plan(graphene_info, execution_params, dauphin_pipeline): check.inst_param(graphene_info, 'graphene_info', ResolveInfo) check.inst_param(execution_params, 'execution_params', ExecutionParams) run_id = execution_params.execution_metadata.run_id run_config = RunConfig( run_id=run_id, mode=execution_params.mode, tags=execution_params.execution_metadata.tags ) execution_plan = create_execution_plan( pipeline=dauphin_pipeline.get_dagster_pipeline(), environment_dict=execution_params.environment_dict, run_config=run_config, ) if execution_params.step_keys: for step_key in execution_params.step_keys: if not execution_plan.has_step(step_key): raise UserFacingGraphQLError( graphene_info.schema.type_named('InvalidStepError')(invalid_step_key=step_key) ) event_logs = [] def _on_event_record(record): if record.is_dagster_event: event_logs.append(record) graphene_info.context.instance.add_event_listener(run_id, _on_event_record) execute_plan( execution_plan=execution_plan, environment_dict=execution_params.environment_dict, run_config=run_config, step_keys_to_execute=execution_params.step_keys, instance=graphene_info.context.instance, ) def to_graphql_event(event_record): return from_dagster_event_record( graphene_info, event_record, dauphin_pipeline, execution_plan ) return graphene_info.schema.type_named('ExecutePlanSuccess')( pipeline=dauphin_pipeline, has_failures=any( er for er in event_logs if er.is_dagster_event and er.dagster_event.event_type == DagsterEventType.STEP_FAILURE ), step_events=list(map(to_graphql_event, event_logs)), raw_event_records=list(map(serialize_dagster_namedtuple, event_logs)), )
def test_configured_input_should_skip_step(): called = {} @solid(output_defs=[OutputDefinition(is_required=False)]) def one(_): yield Output(1) @solid def solid_should_not_skip(_, input_one, input_two): # pylint: disable=unused-argument called["yup"] = True @pipeline def my_pipeline(): solid_should_not_skip(one()) run_config = { "solids": { "solid_should_not_skip": { "inputs": { "input_two": { "value": "2" } } } } } execute_pipeline(my_pipeline, run_config=run_config) assert called.get("yup") # ensure should_skip_step behave the same as execute_pipeline instance = DagsterInstance.ephemeral() pipeline_run = PipelineRun(pipeline_name="my_pipeline", run_id=make_new_run_id()) execute_plan( create_execution_plan( my_pipeline, step_keys_to_execute=["one"], run_config=run_config, ), InMemoryPipeline(my_pipeline), instance, pipeline_run, run_config=run_config, ) assert not should_skip_step( create_execution_plan( my_pipeline, step_keys_to_execute=["solid_should_not_skip"], run_config=run_config, ), instance, pipeline_run.run_id, )
def test_using_intermediates_file_system_for_subplan(): pipeline = define_inty_pipeline() run_config = {"intermediate_storage": {"filesystem": {}}} instance = DagsterInstance.ephemeral() environment_config = EnvironmentConfig.build( pipeline, run_config=run_config, ) execution_plan = ExecutionPlan.build( InMemoryPipeline(pipeline), environment_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, execution_plan=execution_plan) assert execution_plan.get_step_by_key("return_one") return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["return_one"], pipeline, environment_config), InMemoryPipeline(pipeline), instance, run_config=run_config, pipeline_run=pipeline_run, )) intermediate_storage = build_fs_intermediate_storage( instance.intermediates_directory, pipeline_run.run_id) assert get_step_output(return_one_step_events, "return_one") assert intermediate_storage.has_intermediate( None, StepOutputHandle("return_one")) assert intermediate_storage.get_intermediate( None, Int, StepOutputHandle("return_one")).obj == 1 add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["add_one"], pipeline, environment_config), InMemoryPipeline(pipeline), instance, run_config=run_config, pipeline_run=pipeline_run, )) assert get_step_output(add_one_step_events, "add_one") assert intermediate_storage.has_intermediate(None, StepOutputHandle("add_one")) assert intermediate_storage.get_intermediate( None, Int, StepOutputHandle("add_one")).obj == 2
def test_using_s3_for_subplan(s3_bucket): pipeline_def = define_inty_pipeline() environment_dict = {'storage': {'s3': {'config': {'s3_bucket': s3_bucket}}}} run_id = str(uuid.uuid4()) execution_plan = create_execution_plan( pipeline_def, environment_dict=environment_dict, run_config=RunConfig(run_id=run_id) ) assert execution_plan.get_step_by_key('return_one.compute') step_keys = ['return_one.compute'] instance = DagsterInstance.ephemeral() return_one_step_events = list( execute_plan( execution_plan, environment_dict=environment_dict, run_config=RunConfig(run_id=run_id), step_keys_to_execute=step_keys, instance=instance, ) ) assert get_step_output(return_one_step_events, 'return_one.compute') with scoped_pipeline_context( pipeline_def, environment_dict, RunConfig(run_id=run_id), instance ) as context: store = S3IntermediateStore( s3_bucket, run_id, s3_session=context.scoped_resources_builder.build().s3.session ) assert store.has_intermediate(context, 'return_one.compute') assert store.get_intermediate(context, 'return_one.compute', Int).obj == 1 add_one_step_events = list( execute_plan( execution_plan, environment_dict=environment_dict, run_config=RunConfig(run_id=run_id), step_keys_to_execute=['add_one.compute'], instance=instance, ) ) assert get_step_output(add_one_step_events, 'add_one.compute') with scoped_pipeline_context( pipeline_def, environment_dict, RunConfig(run_id=run_id), instance ) as context: assert store.has_intermediate(context, 'add_one.compute') assert store.get_intermediate(context, 'add_one.compute', Int).obj == 2
def test_execution_plan_wrong_invalid_output_name(): pipeline_def = define_addy_pipeline() old_run_id = str(uuid.uuid4()) environment_dict = { 'solids': { 'add_one': { 'inputs': { 'num': { 'value': 3 } } } } } result = execute_pipeline( pipeline_def, environment_dict=environment_dict, run_config=RunConfig(storage_mode=RunStorageMode.FILESYSTEM, run_id=old_run_id), ) new_run_id = str(uuid.uuid4()) run_config = RunConfig( run_id=new_run_id, reexecution_config=ReexecutionConfig( previous_run_id=result.run_id, step_output_handles=[ StepOutputHandle('add_one.compute', 'not_an_output') ], ), storage_mode=RunStorageMode.FILESYSTEM, ) execution_plan = create_execution_plan(pipeline_def, environment_dict=environment_dict) with pytest.raises(DagsterStepOutputNotFoundError) as exc_info: execute_plan( execution_plan, environment_dict=environment_dict, run_config=run_config, step_keys_to_execute=['add_two.compute'], ) assert str(exc_info.value) == ( 'You specified a step_output_handle in the ReexecutionConfig that does not exist: ' 'Step add_one.compute does not have output not_an_output.') assert exc_info.value.step_key == 'add_one.compute' assert exc_info.value.output_name == 'not_an_output'
def test_resource_init_failure(): @resource def failing_resource(_init_context): raise Exception("Uh oh") @solid(required_resource_keys={"failing_resource"}) def failing_resource_solid(_context): pass the_pipeline = PipelineDefinition( name="test_resource_init_failure", solid_defs=[failing_resource_solid], mode_defs=[ ModeDefinition( resource_defs={"failing_resource": failing_resource}) ], ) res = execute_pipeline(the_pipeline, raise_on_error=False) event_types = [event.event_type_value for event in res.event_list] assert DagsterEventType.PIPELINE_FAILURE.value in event_types instance = DagsterInstance.ephemeral() execution_plan = create_execution_plan(the_pipeline) pipeline_run = instance.create_run_for_pipeline( the_pipeline, execution_plan=execution_plan) with pytest.raises( DagsterResourceFunctionError, match= "Error executing resource_fn on ResourceDefinition failing_resource", ): execute_plan( execution_plan, InMemoryPipeline(the_pipeline), pipeline_run=pipeline_run, instance=instance, ) # Test the pipeline init failure event fires even if we are raising errors events = [] try: for event in execute_pipeline_iterator(the_pipeline): events.append(event) except DagsterResourceFunctionError: pass event_types = [event.event_type_value for event in events] assert DagsterEventType.PIPELINE_FAILURE.value in event_types
def test_using_file_system_for_subplan_multiprocessing(): environment_dict = {'storage': {'filesystem': {}}} instance = DagsterInstance.local_temp() pipeline = reconstructable(define_inty_pipeline) execution_plan = create_execution_plan(pipeline, environment_dict=environment_dict) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline.get_definition(), execution_plan=execution_plan ) assert execution_plan.get_step_by_key('return_one.compute') return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(['return_one.compute']), instance, environment_dict=dict(environment_dict, execution={'multiprocess': {}}), pipeline_run=pipeline_run, ) ) store = build_fs_intermediate_store(instance.intermediates_directory, pipeline_run.run_id) intermediates_manager = IntermediateStoreIntermediatesManager(store) assert get_step_output(return_one_step_events, 'return_one.compute') assert intermediates_manager.has_intermediate(None, StepOutputHandle('return_one.compute')) assert ( intermediates_manager.get_intermediate( None, Int, StepOutputHandle('return_one.compute') ).obj == 1 ) add_one_step_events = list( execute_plan( execution_plan.build_subset_plan(['add_one.compute']), instance, environment_dict=dict(environment_dict, execution={'multiprocess': {}}), pipeline_run=pipeline_run, ) ) assert get_step_output(add_one_step_events, 'add_one.compute') assert intermediates_manager.has_intermediate(None, StepOutputHandle('add_one.compute')) assert ( intermediates_manager.get_intermediate(None, Int, StepOutputHandle('add_one.compute')).obj == 2 )
def test_using_file_system_for_subplan_multiprocessing(): pipeline = define_inty_pipeline() environment_dict = {'storage': {'filesystem': {}}} execution_plan = create_execution_plan(pipeline, environment_dict=environment_dict) assert execution_plan.get_step_by_key('return_one.compute') step_keys = ['return_one.compute'] run_id = str(uuid.uuid4()) return_one_step_events = list( execute_plan( execution_plan, environment_dict=environment_dict, run_config=RunConfig( run_id=run_id, executor_config=MultiprocessExecutorConfig( handle=ExecutionTargetHandle.for_pipeline_fn(define_inty_pipeline) ), ), step_keys_to_execute=step_keys, ) ) store = FileSystemIntermediateStore(run_id) assert get_step_output(return_one_step_events, 'return_one.compute') assert store.has_intermediate(None, 'return_one.compute') assert store.get_intermediate(None, 'return_one.compute', Int) == 1 add_one_step_events = list( execute_plan( execution_plan, environment_dict=environment_dict, run_config=RunConfig( run_id=run_id, executor_config=MultiprocessExecutorConfig( handle=ExecutionTargetHandle.for_pipeline_fn(define_inty_pipeline) ), ), step_keys_to_execute=['add_one.compute'], ) ) assert get_step_output(add_one_step_events, 'add_one.compute') assert store.has_intermediate(None, 'add_one.compute') assert store.get_intermediate(None, 'add_one.compute', Int) == 2
def test_using_file_system_for_subplan_missing_input(): pipeline = define_inty_pipeline() environment_dict = {'storage': {'filesystem': {}}} execution_plan = create_execution_plan(pipeline, environment_dict=environment_dict) pipeline_run = PipelineRun.create_empty_run(pipeline.name, make_new_run_id()) with pytest.raises(DagsterStepOutputNotFoundError): execute_plan( execution_plan.build_subset_plan(['add_one.compute']), DagsterInstance.ephemeral(), environment_dict=environment_dict, pipeline_run=pipeline_run, )
def test_execution_plan_reexecution_with_in_memory(): pipeline_def = define_addy_pipeline() instance = DagsterInstance.ephemeral() old_run_id = make_new_run_id() environment_dict = { 'solids': { 'add_one': { 'inputs': { 'num': { 'value': 3 } } } } } result = execute_pipeline( pipeline_def, environment_dict=environment_dict, run_config=RunConfig(run_id=old_run_id), instance=instance, ) assert result.success ## re-execute add_two new_run_id = make_new_run_id() pipeline_run = PipelineRun( pipeline_name=pipeline_def.name, run_id=new_run_id, environment_dict=environment_dict, mode='default', parent_run_id=result.run_id, root_run_id=result.run_id, ) execution_plan = create_execution_plan( pipeline_def, environment_dict=environment_dict, run_config=RunConfig(run_id=new_run_id, previous_run_id=result.run_id), ) with pytest.raises(DagsterInvariantViolationError): execute_plan( execution_plan.build_subset_plan(['add_two.compute']), environment_dict=environment_dict, pipeline_run=pipeline_run, instance=instance, )
def test_fan_out_should_skip_step(): @solid(output_defs=[ OutputDefinition(Int, "out_1", is_required=False), OutputDefinition(Int, "out_2", is_required=False), OutputDefinition(Int, "out_3", is_required=False), ]) def foo(_): yield Output(1, "out_1") @solid def bar(_, input_arg): return input_arg @pipeline def optional_outputs(): foo_res = foo() # pylint: disable=no-member bar.alias("bar_1")(input_arg=foo_res.out_1) bar.alias("bar_2")(input_arg=foo_res.out_2) bar.alias("bar_3")(input_arg=foo_res.out_3) instance = DagsterInstance.ephemeral() pipeline_run = PipelineRun(pipeline_name="optional_outputs", run_id=make_new_run_id()) execute_plan( create_execution_plan(optional_outputs, step_keys_to_execute=["foo"]), InMemoryPipeline(optional_outputs), instance, pipeline_run, ) assert not should_skip_step( create_execution_plan(optional_outputs, step_keys_to_execute=["bar_1" ]), instance, pipeline_run.run_id, ) assert should_skip_step( create_execution_plan(optional_outputs, step_keys_to_execute=["bar_2"]), instance, pipeline_run.run_id, ) assert should_skip_step( create_execution_plan(optional_outputs, step_keys_to_execute=["bar_3"]), instance, pipeline_run.run_id, )
def test_using_file_system_for_subplan_multiprocessing(): environment_dict = {'storage': {'filesystem': {}}} instance = DagsterInstance.local_temp() execution_plan = create_execution_plan( ExecutionTargetHandle.for_pipeline_fn( define_inty_pipeline).build_pipeline_definition(), environment_dict=environment_dict, ) assert execution_plan.get_step_by_key('return_one.compute') step_keys = ['return_one.compute'] run_id = str(uuid.uuid4()) instance.create_empty_run(run_id, execution_plan.pipeline_def.name) return_one_step_events = list( execute_plan( execution_plan, instance, environment_dict=dict(environment_dict, execution={'multiprocess': {}}), run_config=RunConfig(run_id=run_id), step_keys_to_execute=step_keys, )) store = build_fs_intermediate_store(instance.intermediates_directory, run_id) assert get_step_output(return_one_step_events, 'return_one.compute') assert store.has_intermediate(None, 'return_one.compute') assert store.get_intermediate(None, 'return_one.compute', Int).obj == 1 add_one_step_events = list( execute_plan( execution_plan, instance, environment_dict=dict(environment_dict, execution={'multiprocess': {}}), run_config=RunConfig(run_id=run_id), step_keys_to_execute=['add_one.compute'], )) assert get_step_output(add_one_step_events, 'add_one.compute') assert store.has_intermediate(None, 'add_one.compute') assert store.get_intermediate(None, 'add_one.compute', Int).obj == 2
def test_using_intermediates_to_override(): pipeline = define_inty_pipeline() run_config = { 'storage': { 'filesystem': {} }, 'intermediate_storage': { 'in_memory': {} } } instance = DagsterInstance.ephemeral() execution_plan = create_execution_plan( pipeline, run_config=run_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, execution_plan=execution_plan) assert execution_plan.get_step_by_key('return_one.compute') return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(['return_one.compute']), instance, run_config=run_config, pipeline_run=pipeline_run, )) store = build_fs_intermediate_store(instance.intermediates_directory, pipeline_run.run_id) intermediates_manager = IntermediateStoreIntermediatesManager(store) assert get_step_output(return_one_step_events, 'return_one.compute') assert not intermediates_manager.has_intermediate( None, StepOutputHandle('return_one.compute'))
def test_using_file_system_for_subplan_missing_input(): pipeline = define_inty_pipeline() run_config = {'storage': {'filesystem': {}}} instance = DagsterInstance.ephemeral() execution_plan = create_execution_plan(pipeline, run_config=run_config) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, execution_plan=execution_plan) with pytest.raises(DagsterStepOutputNotFoundError): execute_plan( execution_plan.build_subset_plan(['add_one.compute']), instance, run_config=run_config, pipeline_run=pipeline_run, )
def test_execution_plan_simple_two_steps(): pipeline_def = define_two_int_pipeline() execution_plan = create_execution_plan(pipeline_def) assert isinstance(execution_plan.steps, list) assert len(execution_plan.steps) == 2 assert execution_plan.get_step_by_key('return_one.compute') assert execution_plan.get_step_by_key('add_one.compute') pipeline_run = PipelineRun.create_empty_run(pipeline_def.name, make_new_run_id()) events = execute_plan( execution_plan, pipeline_run=pipeline_run, instance=DagsterInstance.ephemeral() ) # start, out, success, start, input, out, success assert [e.event_type_value for e in events] == [ 'STEP_START', 'STEP_OUTPUT', 'STEP_SUCCESS', 'STEP_START', 'STEP_INPUT', 'STEP_OUTPUT', 'STEP_SUCCESS', ] output_events = [e for e in events if e.event_type_value == 'STEP_OUTPUT'] assert output_events[0].step_key == 'return_one.compute' assert output_events[0].is_successful_output assert output_events[1].step_key == 'add_one.compute' assert output_events[1].is_successful_output
def query_on_dask_worker( dependencies, recon_pipeline, pipeline_run, run_config, step_keys, mode, instance_ref, ): # pylint: disable=unused-argument """Note that we need to pass "dependencies" to ensure Dask sequences futures during task scheduling, even though we do not use this argument within the function. """ with DagsterInstance.from_ref(instance_ref) as instance: execution_plan = create_execution_plan( recon_pipeline.subset_for_execution_from_existing_pipeline( pipeline_run.solids_to_execute), run_config=run_config, step_keys_to_execute=step_keys, mode=mode, ) return execute_plan(execution_plan, instance, pipeline_run, run_config=run_config)
def test_reentrant_execute_plan(): called = {} @solid def has_tag(context): assert context.has_tag("foo") assert context.get_tag("foo") == "bar" called["yup"] = True pipeline_def = PipelineDefinition(name="has_tag_pipeline", solid_defs=[has_tag]) instance = DagsterInstance.ephemeral() execution_plan = create_execution_plan(pipeline_def) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, tags={"foo": "bar"}, execution_plan=execution_plan) step_events = execute_plan(execution_plan, pipeline_run=pipeline_run, instance=instance) assert called["yup"] assert len(step_events) == 4 assert step_events[1].logging_tags["foo"] == "bar"
def test_execution_plan_for_composite_solid_with_config_mapping(): environment_dict = { 'solids': { 'composite_with_nested_config_solid_and_config_mapping': { 'config': { 'foo': 'baz', 'bar': 3 } } } } execution_plan = create_execution_plan( composite_pipeline_with_config_mapping, environment_dict=environment_dict) instance = DagsterInstance.ephemeral() pipeline_run = instance.create_run_for_pipeline( pipeline_def=composite_pipeline_with_config_mapping, execution_plan=execution_plan) events = execute_plan( execution_plan, environment_dict=environment_dict, pipeline_run=pipeline_run, instance=instance, ) assert [e.event_type_value for e in events] == [ 'STEP_START', 'STEP_OUTPUT', 'STEP_SUCCESS', 'STEP_START', 'STEP_INPUT', 'STEP_OUTPUT', 'STEP_SUCCESS', ]
def test_execution_plan_for_composite_solid_with_config_mapping(): environment_dict = { 'solids': { 'composite_with_nested_config_solid_and_config_mapping': { 'config': { 'foo': 'baz', 'bar': 3 } } } } execution_plan = create_execution_plan( composite_pipeline_with_config_mapping, environment_dict=environment_dict) pipeline_run = PipelineRun.create_empty_run( composite_pipeline_with_config_mapping.name, str(uuid.uuid4())) events = execute_plan( execution_plan, environment_dict=environment_dict, pipeline_run=pipeline_run, instance=DagsterInstance.ephemeral(), ) assert [e.event_type_value for e in events] == [ 'ENGINE_EVENT', 'STEP_START', 'STEP_OUTPUT', 'STEP_SUCCESS', 'STEP_START', 'STEP_INPUT', 'STEP_OUTPUT', 'STEP_SUCCESS', 'ENGINE_EVENT', ]
def test_execution_plan_two_outputs(): @solid(output_defs=[ OutputDefinition(Int, "num_one"), OutputDefinition(Int, "num_two") ]) def return_one_two(_context): yield Output(1, "num_one") yield Output(2, "num_two") pipeline_def = PipelineDefinition(name="return_one_two_pipeline", solid_defs=[return_one_two]) execution_plan = create_execution_plan(pipeline_def) instance = DagsterInstance.ephemeral() pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, execution_plan=execution_plan) step_events = execute_plan(execution_plan, pipeline_run=pipeline_run, instance=instance) assert step_events[1].step_key == "return_one_two.compute" assert step_events[1].step_output_data.output_name == "num_one" assert step_events[3].step_key == "return_one_two.compute" assert step_events[3].step_output_data.output_name == "num_two"
def test_using_file_system_for_subplan_missing_input(): pipeline = define_inty_pipeline() run_config = {"storage": {"filesystem": {}}} instance = DagsterInstance.ephemeral() environment_config = EnvironmentConfig.build( pipeline, run_config=run_config, ) execution_plan = ExecutionPlan.build( InMemoryPipeline(pipeline), environment_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, execution_plan=execution_plan) events = execute_plan( execution_plan.build_subset_plan(["add_one"], pipeline, environment_config), InMemoryPipeline(pipeline), instance, run_config=run_config, pipeline_run=pipeline_run, ) failures = [ event for event in events if event.event_type_value == "STEP_FAILURE" ] assert len(failures) == 1 assert failures[0].step_key == "add_one" assert "DagsterStepOutputNotFoundError" in failures[ 0].event_specific_data.error.message
def test_execution_plan_simple_two_steps(): pipeline_def = define_two_int_pipeline() instance = DagsterInstance.ephemeral() execution_plan = create_execution_plan(pipeline_def) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, execution_plan=execution_plan) assert isinstance(execution_plan.steps, list) assert len(execution_plan.steps) == 2 assert execution_plan.get_step_by_key("return_one.compute") assert execution_plan.get_step_by_key("add_one.compute") events = execute_plan(execution_plan, pipeline_run=pipeline_run, instance=instance) step_starts = find_events(events, event_type="STEP_START") assert len(step_starts) == 2 step_successes = find_events(events, event_type="STEP_SUCCESS") assert len(step_successes) == 2 output_events = find_events(events, event_type="STEP_OUTPUT") assert output_events[0].step_key == "return_one.compute" assert output_events[0].is_successful_output assert output_events[1].step_key == "add_one.compute" assert output_events[1].is_successful_output
def test_using_file_system_for_subplan_invalid_step(): pipeline = define_inty_pipeline() environment_dict = {'storage': {'filesystem': {}}} execution_plan = create_execution_plan(pipeline, environment_dict=environment_dict) run_id = str(uuid.uuid4()) with pytest.raises(DagsterExecutionStepNotFoundError): execute_plan( execution_plan, environment_dict=environment_dict, run_config=RunConfig(run_id=run_id), step_keys_to_execute=['nope'], )
def test_using_intermediates_to_override(): pipeline = define_inty_pipeline() run_config = { "storage": { "filesystem": {} }, "intermediate_storage": { "in_memory": {} } } instance = DagsterInstance.ephemeral() execution_plan = create_execution_plan( pipeline, run_config=run_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline, execution_plan=execution_plan) assert execution_plan.get_step_by_key("return_one") return_one_step_events = list( execute_plan( execution_plan.build_subset_plan(["return_one"]), instance, run_config=run_config, pipeline_run=pipeline_run, )) intermediate_storage = build_fs_intermediate_storage( instance.intermediates_directory, pipeline_run.run_id) assert get_step_output(return_one_step_events, "return_one") assert not intermediate_storage.has_intermediate( None, StepOutputHandle("return_one"))
def test_execution_plan_for_composite_solid_with_config_mapping(): run_config = { "solids": { "composite_with_nested_config_solid_and_config_mapping": { "config": { "foo": "baz", "bar": 3 } } } } execution_plan = create_execution_plan( composite_pipeline_with_config_mapping, run_config=run_config) instance = DagsterInstance.ephemeral() pipeline_run = instance.create_run_for_pipeline( pipeline_def=composite_pipeline_with_config_mapping, execution_plan=execution_plan) events = execute_plan( execution_plan, run_config=run_config, pipeline_run=pipeline_run, instance=instance, ) assert [e.event_type_value for e in events] == [ "STEP_START", "STEP_OUTPUT", "STEP_SUCCESS", "STEP_START", "STEP_INPUT", "STEP_OUTPUT", "STEP_SUCCESS", ]