Ejemplo n.º 1
0
def test_default_asset_store_reexecution():
    with seven.TemporaryDirectory() as tmpdir_path:
        default_asset_store = default_filesystem_asset_store.configured(
            {"base_dir": tmpdir_path})
        pipeline_def = define_asset_pipeline(default_asset_store, {})
        instance = DagsterInstance.ephemeral()

        result = execute_pipeline(pipeline_def,
                                  run_config={"storage": {
                                      "filesystem": {}
                                  }},
                                  instance=instance)
        assert result.success

        re_result = reexecute_pipeline(
            pipeline_def,
            result.run_id,
            run_config={"storage": {
                "filesystem": {}
            }},
            instance=instance,
            step_selection=["solid_b.compute"],
        )

        # re-execution should yield asset_store_operation events instead of intermediate events
        get_asset_events = list(
            filter(
                lambda evt: evt.is_asset_store_operation and
                AssetStoreOperationType(evt.event_specific_data.op
                                        ) == AssetStoreOperationType.GET_ASSET,
                re_result.event_list,
            ))
        assert len(get_asset_events) == 1
        assert get_asset_events[
            0].event_specific_data.step_key == "solid_a.compute"
Ejemplo n.º 2
0
def test_default_asset_store():
    with seven.TemporaryDirectory() as tmpdir_path:
        default_asset_store = default_filesystem_asset_store.configured(
            {"base_dir": tmpdir_path})
        pipeline_def = define_asset_pipeline(default_asset_store, {})

        result = execute_pipeline(pipeline_def)
        assert result.success

        asset_store_operation_events = list(
            filter(lambda evt: evt.is_asset_store_operation,
                   result.event_list))

        assert len(asset_store_operation_events) == 3
        # SET ASSET for step "solid_a.compute" output "result"
        assert (asset_store_operation_events[0].event_specific_data.op ==
                AssetStoreOperationType.SET_ASSET)
        filepath_a = os.path.join(tmpdir_path, result.run_id,
                                  "solid_a.compute", "result")
        assert os.path.isfile(filepath_a)
        with open(filepath_a, "rb") as read_obj:
            assert pickle.load(read_obj) == [1, 2, 3]

        # GET ASSET for step "solid_b.compute" input "_df"
        assert (asset_store_operation_events[1].event_specific_data.op ==
                AssetStoreOperationType.GET_ASSET)
        assert (StepOutputHandle("solid_a.compute",
                                 "result") == asset_store_operation_events[1].
                event_specific_data.step_output_handle)

        # SET ASSET for step "solid_b.compute" output "result"
        assert (asset_store_operation_events[2].event_specific_data.op ==
                AssetStoreOperationType.SET_ASSET)
        filepath_b = os.path.join(tmpdir_path, result.run_id,
                                  "solid_b.compute", "result")
        assert os.path.isfile(filepath_b)
        with open(filepath_b, "rb") as read_obj:
            assert pickle.load(read_obj) == 1
Ejemplo n.º 3
0
    ModeDefinition,
    OutputDefinition,
    PresetDefinition,
    execute_pipeline,
    pipeline,
    solid,
)
from dagster.core.definitions.utils import struct_to_string
from dagster.core.storage.asset_store import default_filesystem_asset_store


def train(df):
    return len(df)


local_asset_store = default_filesystem_asset_store.configured(
    {"base_dir": "uncommitted/intermediates/"})


@solid(
    output_defs=[OutputDefinition(asset_store_key="default_fs_asset_store")])
def call_api(_):
    return [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]


@solid(
    output_defs=[OutputDefinition(asset_store_key="default_fs_asset_store")])
def parse_df(context, df):
    context.log.info(struct_to_string(df))
    result_df = df[:5]
    return result_df