def test_pipeline_step_key_subset_execution():
    pipeline_def = define_addy_pipeline()
    instance = DagsterInstance.ephemeral()
    run_config = env_with_fs(
        {'solids': {
            'add_one': {
                'inputs': {
                    'num': {
                        'value': 3
                    }
                }
            }
        }})
    result = execute_pipeline(pipeline_def,
                              run_config=run_config,
                              instance=instance)

    assert result.success

    intermediate_storage = build_fs_intermediate_storage(
        instance.intermediates_directory, result.run_id)
    assert (intermediate_storage.get_intermediate(
        None, Int, StepOutputHandle('add_one.compute')).obj == 4)
    assert (intermediate_storage.get_intermediate(
        None, Int, StepOutputHandle('add_two.compute')).obj == 6)

    ## re-execute add_two

    pipeline_reexecution_result = reexecute_pipeline(
        pipeline_def,
        parent_run_id=result.run_id,
        run_config=run_config,
        step_keys_to_execute=['add_two.compute'],
        instance=instance,
    )

    assert pipeline_reexecution_result.success

    step_events = pipeline_reexecution_result.step_event_list
    assert step_events

    intermediate_storage = build_fs_intermediate_storage(
        instance.intermediates_directory, result.run_id)
    assert (intermediate_storage.get_intermediate(
        None, Int, StepOutputHandle('add_one.compute')).obj == 4)
    assert (intermediate_storage.get_intermediate(
        None, Int, StepOutputHandle('add_two.compute')).obj == 6)

    assert not get_step_output_event(step_events, 'add_one.compute')
    assert get_step_output_event(step_events, 'add_two.compute')

    with pytest.raises(DagsterExecutionStepNotFoundError,
                       match='Execution plan does not contain step'):
        reexecute_pipeline(
            pipeline_def,
            parent_run_id=result.run_id,
            run_config=run_config,
            step_keys_to_execute=['nope.compute'],
            instance=instance,
        )
def test_pipeline_step_key_subset_execution():
    pipeline_def = define_addy_pipeline(using_file_system=True)
    instance = DagsterInstance.ephemeral()
    run_config = {"solids": {"add_one": {"inputs": {"num": {"value": 3}}}}}
    result = execute_pipeline(pipeline_def, run_config=run_config, instance=instance)

    assert result.success
    with open(
        os.path.join(instance.storage_directory(), result.run_id, "add_one", "result"),
        "rb",
    ) as read_obj:
        assert pickle.load(read_obj) == 4

    with open(
        os.path.join(instance.storage_directory(), result.run_id, "add_two", "result"),
        "rb",
    ) as read_obj:
        assert pickle.load(read_obj) == 6

    ## re-execute add_two

    pipeline_reexecution_result = reexecute_pipeline(
        pipeline_def,
        parent_run_id=result.run_id,
        run_config=run_config,
        step_selection=["add_two"],
        instance=instance,
    )

    assert pipeline_reexecution_result.success

    step_events = pipeline_reexecution_result.step_event_list
    assert step_events
    assert not os.path.exists(
        os.path.join(
            instance.storage_directory(), pipeline_reexecution_result.run_id, "add_one", "result"
        )
    )
    with open(
        os.path.join(
            instance.storage_directory(), pipeline_reexecution_result.run_id, "add_two", "result"
        ),
        "rb",
    ) as read_obj:
        assert pickle.load(read_obj) == 6

    assert not get_step_output_event(step_events, "add_one")
    assert get_step_output_event(step_events, "add_two")

    with pytest.raises(
        DagsterExecutionStepNotFoundError,
        match="Step selection refers to unknown step: nope",
    ):
        reexecute_pipeline(
            pipeline_def,
            parent_run_id=result.run_id,
            run_config=run_config,
            step_selection=["nope"],
            instance=instance,
        )
Ejemplo n.º 3
0
def test_reexec_from_parent_dynamic_fails():
    with instance_for_test() as instance:
        parent_result = execute_pipeline(
            dynamic_pipeline,
            run_config={"storage": {
                "filesystem": {}
            }},
            instance=instance)
        parent_run_id = parent_result.run_id

        # not currently supported, this needs to know all fan outs of previous step, should just run previous step
        with pytest.raises(
                DagsterInvariantViolationError,
                match=
                r'UnresolvedExecutionStep "multiply_inputs\[\?\]" is resolved by "emit" which is not part of the current step selection',
        ):
            reexecute_pipeline(
                pipeline=dynamic_pipeline,
                parent_run_id=parent_run_id,
                run_config={
                    "storage": {
                        "filesystem": {}
                    },
                },
                step_selection=["multiply_inputs[?]"],
                instance=instance,
            )
Ejemplo n.º 4
0
def test_reexecution_fs_storage():
    @lambda_solid
    def return_one():
        return 1

    @lambda_solid
    def add_one(num):
        return num + 1

    pipeline_def = PipelineDefinition(
        solid_defs=[return_one, add_one],
        name="test",
        dependencies={"add_one": {
            "num": DependencyDefinition("return_one")
        }},
    )
    run_config = {"storage": {"filesystem": {}}}
    instance = DagsterInstance.ephemeral()
    pipeline_result = execute_pipeline(
        pipeline_def,
        run_config={"storage": {
            "filesystem": {}
        }},
        instance=instance)
    assert pipeline_result.success
    assert pipeline_result.result_for_solid("add_one").output_value() == 2

    reexecution_result = reexecute_pipeline(
        pipeline_def,
        pipeline_result.run_id,
        run_config=run_config,
        instance=instance,
    )

    assert reexecution_result.success
    assert len(reexecution_result.solid_result_list) == 2
    assert reexecution_result.result_for_solid(
        "return_one").output_value() == 1
    assert reexecution_result.result_for_solid("add_one").output_value() == 2
    reexecution_run = instance.get_run_by_id(reexecution_result.run_id)
    assert reexecution_run.parent_run_id == pipeline_result.run_id
    assert reexecution_run.root_run_id == pipeline_result.run_id

    grandchild_result = reexecute_pipeline(
        pipeline_def,
        reexecution_result.run_id,
        run_config=run_config,
        instance=instance,
    )

    assert grandchild_result.success
    assert len(grandchild_result.solid_result_list) == 2
    assert grandchild_result.result_for_solid("return_one").output_value() == 1
    assert grandchild_result.result_for_solid("add_one").output_value() == 2
    grandchild_run = instance.get_run_by_id(grandchild_result.run_id)
    assert grandchild_run.parent_run_id == reexecution_result.run_id
    assert grandchild_run.root_run_id == pipeline_result.run_id
Ejemplo n.º 5
0
def test_reexecute_subset_of_subset():
    with tempfile.TemporaryDirectory() as tmpdir_path:
        instance = DagsterInstance.ephemeral()

        my_fs_io_manager = fs_io_manager.configured({"base_dir": tmpdir_path})

        def my_pipeline_def(should_fail):
            @solid
            def one(_):
                return 1

            @solid
            def plus_two(_, i):
                if should_fail:
                    raise Exception()
                return i + 2

            @solid
            def plus_three(_, i):
                return i + 3

            @pipeline(mode_defs=[
                ModeDefinition(resource_defs={"io_manager": my_fs_io_manager})
            ])
            def my_pipeline():
                plus_three(plus_two(one()))

            return my_pipeline

        first_result = execute_pipeline(my_pipeline_def(should_fail=True),
                                        instance=instance,
                                        raise_on_error=False)
        assert not first_result.success

        first_run_id = first_result.run_id

        second_result = reexecute_pipeline(
            my_pipeline_def(should_fail=False),
            instance=instance,
            parent_run_id=first_run_id,
            step_selection=["plus_two*"],
        )
        assert second_result.success
        assert second_result.result_for_solid("plus_two").output_value() == 3
        second_run_id = second_result.run_id

        # step_context._get_source_run_id should return first_run_id
        third_result = reexecute_pipeline(
            my_pipeline_def(should_fail=False),
            instance=instance,
            parent_run_id=second_run_id,
            step_selection=["plus_two*"],
        )
        assert third_result.success
        assert third_result.result_for_solid("plus_two").output_value() == 3
Ejemplo n.º 6
0
def test_multiproc_reexecution_fs_storage_after_fail():
    with instance_for_test() as instance:
        run_config = {"execution": {"multiprocess": {}}}
        pipeline_result = execute_pipeline(
            reconstructable(retry_pipeline),
            run_config={
                "execution": {
                    "multiprocess": {}
                },
                "solids": {
                    "return_one": {
                        "config": {
                            "fail": True
                        }
                    }
                },
            },
            instance=instance,
            raise_on_error=False,
        )
        assert not pipeline_result.success

        reexecution_result = reexecute_pipeline(
            reconstructable(retry_pipeline),
            pipeline_result.run_id,
            run_config=run_config,
            instance=instance,
        )

        assert reexecution_result.success
        assert len(reexecution_result.solid_result_list) == 2
        assert reexecution_result.result_for_solid(
            "return_one").output_value() == 1
        assert reexecution_result.result_for_solid(
            "add_one").output_value() == 2
        reexecution_run = instance.get_run_by_id(reexecution_result.run_id)
        assert reexecution_run.parent_run_id == pipeline_result.run_id
        assert reexecution_run.root_run_id == pipeline_result.run_id

        grandchild_result = reexecute_pipeline(
            reconstructable(retry_pipeline),
            reexecution_result.run_id,
            run_config=run_config,
            instance=instance,
        )

        assert grandchild_result.success
        assert len(grandchild_result.solid_result_list) == 2
        assert grandchild_result.result_for_solid(
            "return_one").output_value() == 1
        assert grandchild_result.result_for_solid(
            "add_one").output_value() == 2
        grandchild_run = instance.get_run_by_id(grandchild_result.run_id)
        assert grandchild_run.parent_run_id == reexecution_result.run_id
        assert grandchild_run.root_run_id == pipeline_result.run_id
Ejemplo n.º 7
0
def test_reexecute_subset_of_subset_with_composite():
    @solid
    def one(_):
        return 1

    @solid
    def plus_two(_, i):
        return i + 2

    @composite_solid
    def one_plus_two():
        return plus_two(one())

    @solid
    def plus_three(_, i):
        return i + 3

    with tempfile.TemporaryDirectory() as tmpdir_path:
        instance = DagsterInstance.ephemeral()

        my_fs_io_manager = fs_io_manager.configured({"base_dir": tmpdir_path})

        @pipeline(mode_defs=[
            ModeDefinition(resource_defs={"io_manager": my_fs_io_manager})
        ])
        def my_pipeline():
            plus_three(one_plus_two())

        first_result = execute_pipeline(my_pipeline, instance=instance)
        assert first_result.success
        first_run_id = first_result.run_id

        second_result = reexecute_pipeline(
            my_pipeline,
            instance=instance,
            parent_run_id=first_run_id,
            step_selection=["plus_three"],
        )
        assert second_result.success
        second_run_id = second_result.run_id

        # step_context._get_source_run_id should return first_run_id
        third_result = reexecute_pipeline(
            my_pipeline,
            instance=instance,
            parent_run_id=second_run_id,
            step_selection=["plus_three"],
        )
        assert third_result.success
Ejemplo n.º 8
0
def test_reexecute_pipeline_with_step_selection_multi_clauses():
    instance = DagsterInstance.ephemeral()
    run_config = {"storage": {"filesystem": {}}}
    pipeline_result_full = execute_pipeline(foo_pipeline,
                                            run_config=run_config,
                                            instance=instance)
    assert pipeline_result_full.success
    assert pipeline_result_full.result_for_solid("add_one").output_value() == 7
    assert len(pipeline_result_full.solid_result_list) == 5

    result_multi_disjoint = reexecute_pipeline(
        foo_pipeline,
        parent_run_id=pipeline_result_full.run_id,
        run_config=run_config,
        instance=instance,
        step_selection=[
            "return_one.compute", "return_two.compute", "add_nums.compute+"
        ],
    )
    assert result_multi_disjoint.success
    assert result_multi_disjoint.result_for_solid(
        "multiply_two").output_value() == 6

    result_multi_overlap = reexecute_pipeline(
        foo_pipeline,
        parent_run_id=pipeline_result_full.run_id,
        run_config=run_config,
        instance=instance,
        step_selection=[
            "return_one.compute++", "return_two.compute", "add_nums.compute+"
        ],
    )
    assert result_multi_overlap.success
    assert result_multi_overlap.result_for_solid(
        "multiply_two").output_value() == 6

    with pytest.raises(
            DagsterInvalidSubsetError,
            match=re.escape(
                "No qualified steps to execute found for step_selection"),
    ):
        reexecute_pipeline(
            foo_pipeline,
            parent_run_id=pipeline_result_full.run_id,
            run_config=run_config,
            instance=instance,
            step_selection=["a", "*add_nums.compute"],
        )
Ejemplo n.º 9
0
def test_single_step_reexecution():
    @lambda_solid
    def return_one():
        return 1

    @lambda_solid
    def add_one(num):
        return num + 1

    pipeline_def = PipelineDefinition(
        solid_defs=[return_one, add_one],
        dependencies={"add_one": {"num": DependencyDefinition("return_one")}},
    )
    run_config = {"storage": {"filesystem": {}}}
    instance = DagsterInstance.ephemeral()
    pipeline_result = execute_pipeline(pipeline_def, run_config, instance=instance)
    assert pipeline_result.success
    assert pipeline_result.result_for_solid("add_one").output_value() == 2

    # This is how this is actually done in dagster_graphql.implementation.pipeline_execution_manager
    reexecution_result = reexecute_pipeline(
        pipeline_def,
        parent_run_id=pipeline_result.run_id,
        run_config=run_config,
        instance=instance,
        step_selection=["add_one.compute"],
    )

    assert reexecution_result.success
    assert reexecution_result.result_for_solid("return_one").output_value() == None
    assert reexecution_result.result_for_solid("add_one").output_value() == 2
Ejemplo n.º 10
0
def test_two_step_reexecution():
    @lambda_solid
    def return_one():
        return 1

    @lambda_solid
    def add_one(num):
        return num + 1

    @pipeline
    def two_step_reexec():
        add_one(add_one(return_one()))

    instance = DagsterInstance.ephemeral()
    run_config = {'storage': {'filesystem': {}}}
    pipeline_result = execute_pipeline(two_step_reexec, run_config=run_config, instance=instance)
    assert pipeline_result.success
    assert pipeline_result.result_for_solid('add_one_2').output_value() == 3

    reexecution_result = reexecute_pipeline(
        two_step_reexec,
        parent_run_id=pipeline_result.run_id,
        run_config=run_config,
        instance=instance,
        step_keys_to_execute=['add_one.compute', 'add_one_2.compute'],
    )
    assert reexecution_result.success
    assert reexecution_result.result_for_solid('return_one').output_value() == None
    assert reexecution_result.result_for_solid('add_one_2').output_value() == 3
Ejemplo n.º 11
0
def test_reexec_from_parent_1():
    with instance_for_test() as instance:
        parent_result = execute_pipeline(
            dynamic_pipeline,
            run_config={"storage": {
                "filesystem": {}
            }},
            instance=instance)
        parent_run_id = parent_result.run_id

        reexec_result = reexecute_pipeline(
            pipeline=dynamic_pipeline,
            parent_run_id=parent_run_id,
            run_config={
                "storage": {
                    "filesystem": {}
                },
            },
            step_selection=["multiply_inputs[0]"],
            instance=instance,
        )
        assert reexec_result.success
        assert reexec_result.result_for_solid(
            "multiply_inputs").output_value() == {
                "0": 0,
            }
Ejemplo n.º 12
0
def test_retries(environment):
    instance = DagsterInstance.local_temp()
    pipe = reconstructable(define_run_retry_pipeline)
    fails = dict(environment)
    fails['solids'] = {'can_fail': {'config': {'fail': True}}}

    result = execute_pipeline(
        pipe,
        run_config=fails,
        instance=instance,
        raise_on_error=False,
    )

    assert not result.success

    passes = dict(environment)
    passes['solids'] = {'can_fail': {'config': {'fail': False}}}

    second_result = reexecute_pipeline(
        pipe,
        parent_run_id=result.run_id,
        run_config=passes,
        instance=instance,
    )
    assert second_result.success
    downstream_of_failed = second_result.result_for_solid(
        'downstream_of_failed').output_value()
    assert downstream_of_failed == 'okay perfect'

    will_be_skipped = [
        e for e in second_result.event_list
        if 'will_be_skipped' in str(e.solid_handle)
    ]
    assert str(will_be_skipped[0].event_type_value) == 'STEP_SKIPPED'
    assert str(will_be_skipped[1].event_type_value) == 'STEP_SKIPPED'
Ejemplo n.º 13
0
def test_two_step_reexecution():
    @solid
    def return_one():
        return 1

    @solid
    def add_one(num):
        return num + 1

    @pipeline(mode_defs=[default_mode_def_for_test])
    def two_step_reexec():
        add_one(add_one(return_one()))

    instance = DagsterInstance.ephemeral()

    pipeline_result = execute_pipeline(two_step_reexec, instance=instance)
    assert pipeline_result.success
    assert pipeline_result.result_for_solid("add_one_2").output_value() == 3

    reexecution_result = reexecute_pipeline(
        two_step_reexec,
        parent_run_id=pipeline_result.run_id,
        instance=instance,
        step_selection=["add_one", "add_one_2"],
    )
    assert reexecution_result.success
    assert reexecution_result.result_for_solid("return_one").output_value() == None
    assert reexecution_result.result_for_solid("add_one_2").output_value() == 3
Ejemplo n.º 14
0
def test_single_step_reexecution():
    @solid
    def return_one():
        return 1

    @solid
    def add_one(num):
        return num + 1

    pipeline_def = PipelineDefinition(
        solid_defs=[return_one, add_one],
        name="test",
        dependencies={"add_one": {"num": DependencyDefinition("return_one")}},
        mode_defs=[default_mode_def_for_test],
    )
    instance = DagsterInstance.ephemeral()
    pipeline_result = execute_pipeline(pipeline_def, instance=instance)
    assert pipeline_result.success
    assert pipeline_result.result_for_solid("add_one").output_value() == 2

    # This is how this is actually done in dagster_graphql.implementation.pipeline_execution_manager
    reexecution_result = reexecute_pipeline(
        pipeline_def,
        parent_run_id=pipeline_result.run_id,
        instance=instance,
        step_selection=["add_one"],
    )

    assert reexecution_result.success
    assert reexecution_result.result_for_solid("return_one").output_value() == None
    assert reexecution_result.result_for_solid("add_one").output_value() == 2
Ejemplo n.º 15
0
def test_two_step_reexecution():
    @lambda_solid
    def return_one():
        return 1

    @lambda_solid
    def add_one(num):
        return num + 1

    @pipeline
    def two_step_reexec():
        add_one(add_one(return_one()))

    instance = DagsterInstance.ephemeral()
    run_config = {"storage": {"filesystem": {}}}
    pipeline_result = execute_pipeline(two_step_reexec, run_config=run_config, instance=instance)
    assert pipeline_result.success
    assert pipeline_result.result_for_solid("add_one_2").output_value() == 3

    reexecution_result = reexecute_pipeline(
        two_step_reexec,
        parent_run_id=pipeline_result.run_id,
        run_config=run_config,
        instance=instance,
        step_selection=["add_one.compute", "add_one_2.compute"],
    )
    assert reexecution_result.success
    assert reexecution_result.result_for_solid("return_one").output_value() == None
    assert reexecution_result.result_for_solid("add_one_2").output_value() == 3
Ejemplo n.º 16
0
def test_custom_path_asset_store():
    with seven.TemporaryDirectory() as tmpdir_path:

        instance = DagsterInstance.ephemeral()

        run_config = {
            "resources": {"fs_asset_store": {"config": {"base_dir": tmpdir_path}}},
        }

        result = execute_pipeline(
            custom_path_pipeline, run_config=run_config, mode="test", instance=instance
        )

        assert result.success

        filepath_call_api = os.path.join(tmpdir_path, "call_api_output")
        assert os.path.isfile(filepath_call_api)
        with open(filepath_call_api, "rb") as read_obj:
            assert pickle.load(read_obj) == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

        filepath_parse_df = os.path.join(tmpdir_path, "parse_df_output")
        assert os.path.isfile(filepath_parse_df)
        with open(filepath_parse_df, "rb") as read_obj:
            assert pickle.load(read_obj) == [1, 2, 3, 4, 5]

        assert reexecute_pipeline(
            custom_path_pipeline,
            result.run_id,
            run_config=run_config,
            mode="test",
            instance=instance,
            step_selection=["parse_df.compute*"],
        ).success
Ejemplo n.º 17
0
def test_default_object_manager_reexecution():
    with seven.TemporaryDirectory() as tmpdir_path:
        default_asset_store = fs_object_manager.configured(
            {"base_dir": tmpdir_path})
        pipeline_def = define_pipeline(default_asset_store, {})
        instance = DagsterInstance.ephemeral()

        result = execute_pipeline(pipeline_def, instance=instance)
        assert result.success

        re_result = reexecute_pipeline(
            pipeline_def,
            result.run_id,
            instance=instance,
            step_selection=["solid_b"],
        )

        # re-execution should yield asset_store_operation events instead of intermediate events
        get_asset_events = list(
            filter(
                lambda evt: evt.is_asset_store_operation and
                AssetStoreOperationType(evt.event_specific_data.op
                                        ) == AssetStoreOperationType.GET_ASSET,
                re_result.event_list,
            ))
        assert len(get_asset_events) == 1
        assert get_asset_events[0].event_specific_data.step_key == "solid_a"
Ejemplo n.º 18
0
def test_retries(environment):
    with instance_for_test() as instance:
        pipe = reconstructable(define_run_retry_pipeline)
        fails = dict(environment)
        fails["solids"] = {"can_fail": {"config": {"fail": True}}}

        result = execute_pipeline(
            pipe,
            run_config=fails,
            instance=instance,
            raise_on_error=False,
        )

        assert not result.success

        passes = dict(environment)
        passes["solids"] = {"can_fail": {"config": {"fail": False}}}

        second_result = reexecute_pipeline(
            pipe,
            parent_run_id=result.run_id,
            run_config=passes,
            instance=instance,
        )
        assert second_result.success
        downstream_of_failed = second_result.result_for_solid(
            "downstream_of_failed").output_value()
        assert downstream_of_failed == "okay perfect"

        will_be_skipped = [
            e for e in second_result.event_list
            if "will_be_skipped" in str(e.solid_handle)
        ]
        assert str(will_be_skipped[0].event_type_value) == "STEP_SKIPPED"
        assert str(will_be_skipped[1].event_type_value) == "STEP_SKIPPED"
Ejemplo n.º 19
0
def test_reexec_dynamic_with_optional_output_job_3():
    with instance_for_test() as instance:
        result = dynamic_with_optional_output_job().execute_in_process(
            instance=instance)

        # re-execute the step where the source did not yield
        # -> error because the dynamic step wont exist in execution plan
        with pytest.raises(
                DagsterExecutionStepNotFoundError,
                match=r"Step selection refers to unknown step: echo\[0\]",
        ):
            reexecute_pipeline(
                reconstructable(dynamic_with_optional_output_job),
                parent_run_id=result.run_id,
                instance=instance,
                step_selection=["echo[0]"],
            )
Ejemplo n.º 20
0
def test_reexecute_pipeline_with_step_selection_single_clause():
    instance = DagsterInstance.ephemeral()
    run_config = {"intermediate_storage": {"filesystem": {}}}
    pipeline_result_full = execute_pipeline(foo_pipeline,
                                            run_config=run_config,
                                            instance=instance)
    assert pipeline_result_full.success
    assert pipeline_result_full.result_for_solid("add_one").output_value() == 7
    assert len(pipeline_result_full.solid_result_list) == 5

    reexecution_result_full = reexecute_pipeline(
        foo_pipeline,
        parent_run_id=pipeline_result_full.run_id,
        run_config=run_config,
        instance=instance,
    )

    assert reexecution_result_full.success
    assert len(reexecution_result_full.solid_result_list) == 5
    assert reexecution_result_full.result_for_solid(
        "add_one").output_value() == 7

    reexecution_result_up = reexecute_pipeline(
        foo_pipeline,
        parent_run_id=pipeline_result_full.run_id,
        run_config=run_config,
        instance=instance,
        step_selection=["*add_nums"],
    )

    assert reexecution_result_up.success
    assert reexecution_result_up.result_for_solid(
        "add_nums").output_value() == 3

    reexecution_result_down = reexecute_pipeline(
        foo_pipeline,
        parent_run_id=pipeline_result_full.run_id,
        run_config=run_config,
        instance=instance,
        step_selection=["add_nums++"],
    )
    assert reexecution_result_down.success
    assert reexecution_result_down.result_for_solid(
        "add_one").output_value() == 7
Ejemplo n.º 21
0
def test_reexecute_pipeline_with_step_selection_multi_clauses():
    instance = DagsterInstance.ephemeral()
    run_config = {"intermediate_storage": {"filesystem": {}}}
    pipeline_result_full = execute_pipeline(foo_pipeline,
                                            run_config=run_config,
                                            instance=instance)
    assert pipeline_result_full.success
    assert pipeline_result_full.result_for_solid("add_one").output_value() == 7
    assert len(pipeline_result_full.solid_result_list) == 5

    result_multi_disjoint = reexecute_pipeline(
        foo_pipeline,
        parent_run_id=pipeline_result_full.run_id,
        run_config=run_config,
        instance=instance,
        step_selection=["return_one", "return_two", "add_nums+"],
    )
    assert result_multi_disjoint.success
    assert result_multi_disjoint.result_for_solid(
        "multiply_two").output_value() == 6

    result_multi_overlap = reexecute_pipeline(
        foo_pipeline,
        parent_run_id=pipeline_result_full.run_id,
        run_config=run_config,
        instance=instance,
        step_selection=["return_one++", "return_two", "add_nums+"],
    )
    assert result_multi_overlap.success
    assert result_multi_overlap.result_for_solid(
        "multiply_two").output_value() == 6

    with pytest.raises(
            DagsterExecutionStepNotFoundError,
            match="Can not build subset plan from unknown step: a",
    ):
        reexecute_pipeline(
            foo_pipeline,
            parent_run_id=pipeline_result_full.run_id,
            run_config=run_config,
            instance=instance,
            step_selection=["a", "*add_nums"],
        )
Ejemplo n.º 22
0
def reexecution():
    instance = DagsterInstance.ephemeral()

    # Initial execution
    pipeline_result_full = execute_pipeline(unreliable_pipeline,
                                            instance=instance)

    if not pipeline_result_full.success:
        # Re-execution: Entire pipeline
        reexecution_result_full = reexecute_pipeline(
            unreliable_pipeline,
            parent_run_id=pipeline_result_full.run_id,
            instance=instance,
        )
Ejemplo n.º 23
0
def test_reexec_dynamic_with_optional_output_job_1():
    with instance_for_test() as instance:
        result = dynamic_with_optional_output_job().execute_in_process(
            instance=instance)

        # re-execute all
        re_result = reexecute_pipeline(
            reconstructable(dynamic_with_optional_output_job),
            parent_run_id=result.run_id,
            instance=instance,
        )
        assert re_result.success
        assert re_result.output_for_solid("adder") == sum(
            [i for i in range(10) if i % 2 == 0])
Ejemplo n.º 24
0
def test_reexec_dynamic_with_transitive_optional_output_job_2():
    with instance_for_test() as instance:
        result = dynamic_with_transitive_optional_output_job(
        ).execute_in_process(instance=instance)

        # re-execute the step where the source yielded an output
        re_result = reexecute_pipeline(
            reconstructable(dynamic_with_transitive_optional_output_job),
            parent_run_id=result.run_id,
            instance=instance,
            step_selection=["echo[1]"],
        )
        assert re_result.success
        assert re_result.result_for_solid("echo").output_value() == {"1": 2}
Ejemplo n.º 25
0
def test_reexec_from_parent_2():
    with instance_for_test() as instance:
        parent_result = execute_pipeline(dynamic_pipeline, instance=instance)
        parent_run_id = parent_result.run_id

        reexec_result = reexecute_pipeline(
            pipeline=dynamic_pipeline,
            parent_run_id=parent_run_id,
            step_selection=["multiply_by_two[1]"],
            instance=instance,
        )
        assert reexec_result.success
        assert reexec_result.result_for_solid(
            "multiply_by_two").output_value() == {
                "1": 20,
            }
Ejemplo n.º 26
0
def test_fs_io_manager_reexecution():
    with tempfile.TemporaryDirectory() as tmpdir_path:
        default_io_manager = fs_io_manager.configured({"base_dir": tmpdir_path})
        pipeline_def = define_pipeline(default_io_manager, {})
        instance = DagsterInstance.ephemeral()

        result = execute_pipeline(pipeline_def, instance=instance)
        assert result.success

        re_result = reexecute_pipeline(
            pipeline_def, result.run_id, instance=instance, step_selection=["solid_b"],
        )

        # re-execution should yield asset_store_operation events instead of intermediate events
        loaded_input_events = list(filter(lambda evt: evt.is_loaded_input, re_result.event_list))
        assert len(loaded_input_events) == 1
        assert loaded_input_events[0].event_specific_data.upstream_step_key == "solid_a"
Ejemplo n.º 27
0
def test_reexec_from_parent_basic():
    with instance_for_test() as instance:
        parent_result = execute_pipeline(dynamic_pipeline, instance=instance)
        parent_run_id = parent_result.run_id

        reexec_result = reexecute_pipeline(
            pipeline=dynamic_pipeline,
            parent_run_id=parent_run_id,
            step_selection=["emit"],
            instance=instance,
        )
        assert reexec_result.success
        assert reexec_result.result_for_solid("emit").output_value() == {
            "0": 0,
            "1": 1,
            "2": 2,
        }
Ejemplo n.º 28
0
def test_reexec_dynamic_with_transitive_optional_output_job_3():
    with instance_for_test() as instance:
        result = dynamic_with_transitive_optional_output_job(
        ).execute_in_process(instance=instance)

        # re-execute the step where the source did not yield
        re_result = reexecute_pipeline(
            reconstructable(dynamic_with_transitive_optional_output_job),
            parent_run_id=result.run_id,
            instance=instance,
            step_selection=["echo[0]"],
            raise_on_error=False,
        )
        # when all the previous runs have skipped yielding the source,
        # run would fail because of run_id returns None
        # FIXME: https://github.com/dagster-io/dagster/issues/3511
        # ideally it should skip the step because all its previous runs have skipped and finish the run successfully
        assert not re_result.success
Ejemplo n.º 29
0
def test_reexecute_pipeline_with_step_selection_multi_clauses():
    instance = DagsterInstance.ephemeral()
    pipeline_result_full = execute_pipeline(foo_pipeline, instance=instance)
    assert pipeline_result_full.success
    assert pipeline_result_full.result_for_solid("add_one").output_value() == 7
    assert len(pipeline_result_full.solid_result_list) == 5

    result_multi_disjoint = reexecute_pipeline(
        foo_pipeline,
        parent_run_id=pipeline_result_full.run_id,
        instance=instance,
        step_selection=["return_one", "return_two", "add_nums+"],
    )
    assert result_multi_disjoint.success
    assert result_multi_disjoint.result_for_solid(
        "multiply_two").output_value() == 6

    result_multi_overlap = reexecute_pipeline(
        foo_pipeline,
        parent_run_id=pipeline_result_full.run_id,
        instance=instance,
        step_selection=["return_one++", "return_two", "add_nums+"],
    )
    assert result_multi_overlap.success
    assert result_multi_overlap.result_for_solid(
        "multiply_two").output_value() == 6

    with pytest.raises(
            DagsterExecutionStepNotFoundError,
            match="Step selection refers to unknown step: a",
    ):
        reexecute_pipeline(
            foo_pipeline,
            parent_run_id=pipeline_result_full.run_id,
            instance=instance,
            step_selection=["a", "*add_nums"],
        )

    with pytest.raises(
            DagsterExecutionStepNotFoundError,
            match="Step selection refers to unknown steps: a, b",
    ):
        reexecute_pipeline(
            foo_pipeline,
            parent_run_id=pipeline_result_full.run_id,
            instance=instance,
            step_selection=["a+", "*b"],
        )
Ejemplo n.º 30
0
def test_reexecution(job_fn):
    with tempfile.TemporaryDirectory() as tmpdir:
        run_config = {
            "resources": {
                "initial_launcher": {
                    "config": {
                        "scratch_dir": tmpdir
                    },
                },
                "final_launcher": {
                    "config": {
                        "scratch_dir": tmpdir
                    },
                },
                "io_manager": {
                    "config": {
                        "base_dir": tmpdir
                    }
                },
            }
        }
        with instance_for_test() as instance:
            run1 = execute_pipeline(
                pipeline=reconstructable(job_fn),
                run_config=run_config,
                instance=instance,
            )
            assert run1.success
            assert run1.result_for_solid("combine").output_value() == 3
            run2 = reexecute_pipeline(
                pipeline=reconstructable(job_fn),
                parent_run_id=run1.run_id,
                run_config=run_config,
                instance=instance,
                step_selection=["combine"],
            )
            assert run2.success
            assert run2.result_for_solid("combine").output_value() == 3