Exemple #1
0
def test_default_asset_store_reexecution():
    with seven.TemporaryDirectory() as tmpdir_path:
        default_asset_store = fs_asset_store.configured(
            {"base_dir": tmpdir_path})
        pipeline_def = define_asset_pipeline(default_asset_store, {})
        instance = DagsterInstance.ephemeral()

        result = execute_pipeline(pipeline_def, instance=instance)
        assert result.success

        re_result = reexecute_pipeline(
            pipeline_def,
            result.run_id,
            instance=instance,
            step_selection=["solid_b"],
        )

        # re-execution should yield asset_store_operation events instead of intermediate events
        get_asset_events = list(
            filter(
                lambda evt: evt.is_asset_store_operation and
                AssetStoreOperationType(evt.event_specific_data.op
                                        ) == AssetStoreOperationType.GET_ASSET,
                re_result.event_list,
            ))
        assert len(get_asset_events) == 1
        assert get_asset_events[0].event_specific_data.step_key == "solid_a"
Exemple #2
0
def test_result_output():
    with seven.TemporaryDirectory() as tmpdir_path:
        asset_store = fs_asset_store.configured({"base_dir": tmpdir_path})
        pipeline_def = define_asset_pipeline(asset_store, {})

        result = execute_pipeline(pipeline_def)
        assert result.success

        # test output_value
        assert result.result_for_solid("solid_a").output_value() == [1, 2, 3]
        assert result.result_for_solid("solid_b").output_value() == 1
Exemple #3
0
def test_fan_in():
    with tempfile.TemporaryDirectory() as tmpdir_path:
        asset_store = fs_asset_store.configured({"base_dir": tmpdir_path})

        @solid
        def input_solid1(_):
            return 1

        @solid
        def input_solid2(_):
            return 2

        @solid
        def solid1(_, input1):
            assert input1 == [1, 2]

        @pipeline(mode_defs=[ModeDefinition(resource_defs={"object_manager": asset_store})])
        def my_pipeline():
            solid1(input1=[input_solid1(), input_solid2()])

        execute_pipeline(my_pipeline)
Exemple #4
0
def test_fs_asset_store():
    with tempfile.TemporaryDirectory() as tmpdir_path:
        asset_store = fs_asset_store.configured({"base_dir": tmpdir_path})
        pipeline_def = define_asset_pipeline(asset_store, {})

        result = execute_pipeline(pipeline_def)
        assert result.success

        asset_store_operation_events = list(
            filter(lambda evt: evt.is_asset_store_operation, result.event_list)
        )

        assert len(asset_store_operation_events) == 3
        # SET ASSET for step "solid_a" output "result"
        assert (
            asset_store_operation_events[0].event_specific_data.op
            == AssetStoreOperationType.SET_ASSET
        )
        filepath_a = os.path.join(tmpdir_path, result.run_id, "solid_a", "result")
        assert os.path.isfile(filepath_a)
        with open(filepath_a, "rb") as read_obj:
            assert pickle.load(read_obj) == [1, 2, 3]

        # GET ASSET for step "solid_b" input "_df"
        assert (
            asset_store_operation_events[1].event_specific_data.op
            == AssetStoreOperationType.GET_ASSET
        )
        assert "solid_a" == asset_store_operation_events[1].event_specific_data.step_key

        # SET ASSET for step "solid_b" output "result"
        assert (
            asset_store_operation_events[2].event_specific_data.op
            == AssetStoreOperationType.SET_ASSET
        )
        filepath_b = os.path.join(tmpdir_path, result.run_id, "solid_b", "result")
        assert os.path.isfile(filepath_b)
        with open(filepath_b, "rb") as read_obj:
            assert pickle.load(read_obj) == 1
Exemple #5
0
def test_asset_store_optional_output():
    with tempfile.TemporaryDirectory() as tmpdir_dir:
        asset_store = fs_asset_store.configured({"base_dir": tmpdir_dir})

        skip = True

        @solid(output_defs=[OutputDefinition(is_required=False)])
        def solid_a(_context):
            if not skip:
                yield Output([1, 2])

        @solid
        def solid_skipped(_context, array):
            return array

        @pipeline(mode_defs=[ModeDefinition("local", resource_defs={"asset_store": asset_store})])
        def asset_pipeline_optional_output():
            solid_skipped(solid_a())

        result = execute_pipeline(asset_pipeline_optional_output)
        assert result.success
        assert result.result_for_solid("solid_skipped").skipped
Exemple #6
0
    ModeDefinition,
    OutputDefinition,
    execute_pipeline,
    pipeline,
    reexecute_pipeline,
    repository,
    solid,
)
from dagster.core.storage.asset_store import fs_asset_store


def train(df):
    return len(df)


local_asset_store = fs_asset_store.configured(
    {"base_dir": "uncommitted/intermediates/"})


@solid(output_defs=[OutputDefinition(manager_key="fs_asset_store")])
def call_api(_):
    return [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]


@solid(output_defs=[OutputDefinition(manager_key="fs_asset_store")])
def parse_df(context, df):
    context.log.info("{}".format(df))
    result_df = df[:5]
    return result_df


@solid(output_defs=[OutputDefinition(manager_key="fs_asset_store")])