Example #1
0
def test_fs_io_manager():
    with tempfile.TemporaryDirectory() as tmpdir_path:
        io_manager = fs_io_manager.configured({"base_dir": tmpdir_path})
        pipeline_def = define_pipeline(io_manager)

        result = execute_pipeline(pipeline_def)
        assert result.success

        handled_output_events = list(
            filter(lambda evt: evt.is_handled_output, result.event_list))
        assert len(handled_output_events) == 2

        filepath_a = os.path.join(tmpdir_path, result.run_id, "solid_a",
                                  "result")
        assert os.path.isfile(filepath_a)
        with open(filepath_a, "rb") as read_obj:
            assert pickle.load(read_obj) == [1, 2, 3]

        loaded_input_events = list(
            filter(lambda evt: evt.is_loaded_input, result.event_list))
        assert len(loaded_input_events) == 1
        assert "solid_a" == loaded_input_events[
            0].event_specific_data.upstream_step_key

        filepath_b = os.path.join(tmpdir_path, result.run_id, "solid_b",
                                  "result")
        assert os.path.isfile(filepath_b)
        with open(filepath_b, "rb") as read_obj:
            assert pickle.load(read_obj) == 1
Example #2
0
def test_fan_in_skip():
    with tempfile.TemporaryDirectory() as tmpdir_path:
        default_io_manager = fs_io_manager.configured(
            {"base_dir": tmpdir_path})

        @solid(output_defs=[OutputDefinition(name="skip", is_required=False)])
        def skip(_):
            return
            yield  # pylint: disable=unreachable

        @solid
        def one(_):
            return 1

        @solid
        def receiver(_, input1):
            assert input1 == [1]

        @pipeline(mode_defs=[
            ModeDefinition(resource_defs={"io_manager": default_io_manager})
        ])
        def my_pipeline():
            receiver(input1=[one(), skip()])

        assert execute_pipeline(my_pipeline).success
Example #3
0
def test_fs_io_manager_reexecution():
    with tempfile.TemporaryDirectory() as tmpdir_path:
        default_io_manager = fs_io_manager.configured(
            {"base_dir": tmpdir_path})
        pipeline_def = define_pipeline(default_io_manager, {})
        instance = DagsterInstance.ephemeral()

        result = execute_pipeline(pipeline_def, instance=instance)
        assert result.success

        re_result = reexecute_pipeline(
            pipeline_def,
            result.run_id,
            instance=instance,
            step_selection=["solid_b"],
        )

        # re-execution should yield asset_store_operation events instead of intermediate events
        get_asset_events = list(
            filter(
                lambda evt: evt.is_asset_store_operation and
                AssetStoreOperationType(evt.event_specific_data.op
                                        ) == AssetStoreOperationType.GET_ASSET,
                re_result.event_list,
            ))
        assert len(get_asset_events) == 1
        assert get_asset_events[0].event_specific_data.step_key == "solid_a"
Example #4
0
def test_reexecute_subset_of_subset():
    with tempfile.TemporaryDirectory() as tmpdir_path:
        instance = DagsterInstance.ephemeral()

        my_fs_io_manager = fs_io_manager.configured({"base_dir": tmpdir_path})

        def my_pipeline_def(should_fail):
            @solid
            def one(_):
                return 1

            @solid
            def plus_two(_, i):
                if should_fail:
                    raise Exception()
                return i + 2

            @solid
            def plus_three(_, i):
                return i + 3

            @pipeline(mode_defs=[
                ModeDefinition(resource_defs={"io_manager": my_fs_io_manager})
            ])
            def my_pipeline():
                plus_three(plus_two(one()))

            return my_pipeline

        first_result = execute_pipeline(my_pipeline_def(should_fail=True),
                                        instance=instance,
                                        raise_on_error=False)
        assert not first_result.success

        first_run_id = first_result.run_id

        second_result = reexecute_pipeline(
            my_pipeline_def(should_fail=False),
            instance=instance,
            parent_run_id=first_run_id,
            step_selection=["plus_two*"],
        )
        assert second_result.success
        assert second_result.result_for_solid("plus_two").output_value() == 3
        second_run_id = second_result.run_id

        # step_context._get_source_run_id should return first_run_id
        third_result = reexecute_pipeline(
            my_pipeline_def(should_fail=False),
            instance=instance,
            parent_run_id=second_run_id,
            step_selection=["plus_two*"],
        )
        assert third_result.success
        assert third_result.result_for_solid("plus_two").output_value() == 3
def test_result_output():
    with tempfile.TemporaryDirectory() as tmpdir_path:
        default_io_manager = fs_io_manager.configured({"base_dir": tmpdir_path})
        pipeline_def = define_pipeline(default_io_manager, {})

        result = execute_pipeline(pipeline_def)
        assert result.success

        # test output_value
        assert result.result_for_solid("solid_a").output_value() == [1, 2, 3]
        assert result.result_for_solid("solid_b").output_value() == 1
Example #6
0
def test_reexecute_subset_of_subset_with_composite():
    @solid
    def one(_):
        return 1

    @solid
    def plus_two(_, i):
        return i + 2

    @composite_solid
    def one_plus_two():
        return plus_two(one())

    @solid
    def plus_three(_, i):
        return i + 3

    with tempfile.TemporaryDirectory() as tmpdir_path:
        instance = DagsterInstance.ephemeral()

        my_fs_io_manager = fs_io_manager.configured({"base_dir": tmpdir_path})

        @pipeline(mode_defs=[
            ModeDefinition(resource_defs={"io_manager": my_fs_io_manager})
        ])
        def my_pipeline():
            plus_three(one_plus_two())

        first_result = execute_pipeline(my_pipeline, instance=instance)
        assert first_result.success
        first_run_id = first_result.run_id

        second_result = reexecute_pipeline(
            my_pipeline,
            instance=instance,
            parent_run_id=first_run_id,
            step_selection=["plus_three"],
        )
        assert second_result.success
        second_run_id = second_result.run_id

        # step_context._get_source_run_id should return first_run_id
        third_result = reexecute_pipeline(
            my_pipeline,
            instance=instance,
            parent_run_id=second_run_id,
            step_selection=["plus_three"],
        )
        assert third_result.success
def test_fan_in():
    with tempfile.TemporaryDirectory() as tmpdir_path:
        default_io_manager = fs_io_manager.configured({"base_dir": tmpdir_path})

        @solid
        def input_solid1(_):
            return 1

        @solid
        def input_solid2(_):
            return 2

        @solid
        def solid1(_, input1):
            assert input1 == [1, 2]

        @pipeline(mode_defs=[ModeDefinition(resource_defs={"io_manager": default_io_manager})])
        def my_pipeline():
            solid1(input1=[input_solid1(), input_solid2()])

        execute_pipeline(my_pipeline)
def test_fs_io_manager():
    with tempfile.TemporaryDirectory() as tmpdir_path:
        io_manager = fs_io_manager.configured({"base_dir": tmpdir_path})
        pipeline_def = define_pipeline(io_manager)

        result = execute_pipeline(pipeline_def)
        assert result.success

        asset_store_operation_events = list(
            filter(lambda evt: evt.is_asset_store_operation,
                   result.event_list))

        assert len(asset_store_operation_events) == 3
        # SET ASSET for step "solid_a" output "result"
        assert (asset_store_operation_events[0].event_specific_data.op ==
                AssetStoreOperationType.SET_ASSET.value)
        filepath_a = os.path.join(tmpdir_path, result.run_id, "solid_a",
                                  "result")
        assert os.path.isfile(filepath_a)
        with open(filepath_a, "rb") as read_obj:
            assert pickle.load(read_obj) == [1, 2, 3]

        # GET ASSET for step "solid_b" input "_df"
        assert (asset_store_operation_events[1].event_specific_data.op ==
                AssetStoreOperationType.GET_ASSET.value)
        assert "solid_a" == asset_store_operation_events[
            1].event_specific_data.step_key

        # SET ASSET for step "solid_b" output "result"
        assert (asset_store_operation_events[2].event_specific_data.op ==
                AssetStoreOperationType.SET_ASSET.value)
        filepath_b = os.path.join(tmpdir_path, result.run_id, "solid_b",
                                  "result")
        assert os.path.isfile(filepath_b)
        with open(filepath_b, "rb") as read_obj:
            assert pickle.load(read_obj) == 1
Example #9
0
def test_fs_io_manager_unpicklable():
    @op
    def unpicklable_local_func_output():
        # locally defined functions can't be pickled (AttributeError)
        def local_func():
            return 1

        return local_func

    @op
    def unpicklable_lambda_output():
        return l

    @op
    def recursion_limit_output():
        # a will exceed the recursion limit of 1000 and can't be pickled (RecursionError)
        a = []
        for _ in range(2000):
            a = [a]
        return a

    @op
    def op_b(_i):
        return 1

    @graph
    def local_func_graph():
        op_b(unpicklable_local_func_output())

    @graph
    def lambda_graph():
        op_b(unpicklable_lambda_output())

    @graph
    def recursion_limit_graph():
        op_b(recursion_limit_output())

    with tempfile.TemporaryDirectory() as tmp_dir:
        with instance_for_test(temp_dir=tmp_dir) as instance:
            io_manager = fs_io_manager.configured({"base_dir": tmp_dir})

            local_func_job = local_func_graph.to_job(
                resource_defs={"io_manager": io_manager})
            with pytest.raises(DagsterInvariantViolationError,
                               match=r"Object .* is not picklable. .*"):
                local_func_job.execute_in_process(instance=instance)

            lambda_job = lambda_graph.to_job(
                resource_defs={"io_manager": io_manager})
            with pytest.raises(DagsterInvariantViolationError,
                               match=r"Object .* is not picklable. .*"):
                lambda_job.execute_in_process(instance=instance)

            recursion_job = recursion_limit_graph.to_job(
                resource_defs={"io_manager": io_manager})
            with pytest.raises(
                    DagsterInvariantViolationError,
                    match=
                    r"Object .* exceeds recursion limit and is not picklable. .*",
            ):
                recursion_job.execute_in_process(instance=instance)