コード例 #1
0
def test_bad_version_str(graph_for_test, strategy):
    @resource
    def my_resource():
        pass

    @root_input_manager
    def my_manager():
        pass

    with instance_for_test() as instance:
        my_job = graph_for_test.to_job(
            version_strategy=strategy,
            resource_defs={
                "io_manager":
                IOManagerDefinition.hardcoded_io_manager(
                    VersionedInMemoryIOManager()),
                "my_key":
                my_manager,
                "foo":
                my_resource,
            },
        )

        with pytest.raises(
                DagsterInvariantViolationError,
                match=f"'{bad_str}' is not a valid version string."):
            create_execution_plan(my_job, instance_ref=instance.get_ref())
コード例 #2
0
def test_memoized_plan_root_input_manager():
    @root_input_manager(version="foo")
    def my_input_manager():
        return 5

    @solid(
        input_defs=[InputDefinition("x", root_manager_key="my_input_manager")],
        version="foo")
    def my_solid_takes_input(x):
        return x

    @pipeline(
        mode_defs=[
            ModeDefinition(resource_defs={
                "io_manager":
                IOManagerDefinition.hardcoded_io_manager(
                    VersionedInMemoryIOManager()),
                "my_input_manager":
                my_input_manager,
            }, ),
        ],
        tags={MEMOIZED_RUN_TAG: "true"},
    )
    def my_pipeline():
        my_solid_takes_input()

    with instance_for_test() as instance:
        plan = create_execution_plan(my_pipeline,
                                     instance_ref=instance.get_ref())
        assert (plan.get_version_for_step_output_handle(
            StepOutputHandle("my_solid_takes_input", "result")) is not None)
コード例 #3
0
def test_version_strategy_no_resource_version():
    @solid(required_resource_keys={"foo"})
    def my_solid(context):
        return context.resources.foo

    @resource
    def foo_resource():
        return "bar"

    class MyVersionStrategy(VersionStrategy):
        def get_solid_version(self, _):
            return "foo"

    @pipeline(
        version_strategy=MyVersionStrategy(),
        mode_defs=[
            ModeDefinition(
                resource_defs={
                    "io_manager":
                    IOManagerDefinition.hardcoded_io_manager(
                        VersionedInMemoryIOManager()),
                    "foo":
                    foo_resource,
                })
        ],
    )
    def my_pipeline():
        my_solid()

    with instance_for_test() as instance:
        execute_pipeline(my_pipeline, instance=instance)

        memoized_plan = create_execution_plan(my_pipeline,
                                              instance_ref=instance.get_ref())
        assert len(memoized_plan.step_keys_to_execute) == 0
コード例 #4
0
def test_unmemoized_inner_solid():
    @solid
    def solid_no_version():
        pass

    @composite_solid
    def wrap():
        return solid_no_version()

    @pipeline(
        mode_defs=[
            ModeDefinition(
                name="fakemode",
                resource_defs={
                    "fake":
                    IOManagerDefinition.hardcoded_io_manager(
                        VersionedInMemoryIOManager()),
                },
            ),
        ],
        tags={MEMOIZED_RUN_TAG: "true"},
    )
    def wrap_pipeline():
        wrap()

    with instance_for_test() as instance:
        with pytest.raises(
                DagsterInvariantViolationError,
                match=
                "While using memoization, version for solid 'solid_no_version' was None. Please "
                "either provide a versioning strategy for your job, or provide a version using the "
                "solid decorator.",
        ):
            create_execution_plan(wrap_pipeline,
                                  instance_ref=instance.get_ref())
コード例 #5
0
def test_memoized_plan_default_input_val():
    @solid(
        version="42",
        input_defs=[
            InputDefinition("_my_input", String, default_value="DEFAULTVAL")
        ],
    )
    def solid_default_input(_my_input):
        pass

    @pipeline(
        mode_defs=[
            ModeDefinition(resource_defs={
                "io_manager":
                IOManagerDefinition.hardcoded_io_manager(
                    VersionedInMemoryIOManager()),
            }, )
        ],
        tags={MEMOIZED_RUN_TAG: "true"},
    )
    def pipeline_default_value():
        solid_default_input()

    # Ensure that we can build a valid plan with a default input value.
    with instance_for_test() as instance:
        unmemoized_plan = create_execution_plan(
            pipeline_default_value, instance_ref=instance.get_ref())
        assert unmemoized_plan.step_keys_to_execute == ["solid_default_input"]
コード例 #6
0
def test_memoized_plan_root_input_manager_input_config():
    @root_input_manager(version="foo", input_config_schema={"my_str": str})
    def my_input_manager():
        return 5

    @solid(
        input_defs=[InputDefinition("x", root_manager_key="my_input_manager")],
        version="foo")
    def my_solid_takes_input(x):
        return x

    @pipeline(
        mode_defs=[
            ModeDefinition(resource_defs={
                "io_manager":
                IOManagerDefinition.hardcoded_io_manager(
                    VersionedInMemoryIOManager()),
                "my_input_manager":
                my_input_manager,
            }, ),
        ],
        tags={MEMOIZED_RUN_TAG: "true"},
    )
    def my_pipeline():
        my_solid_takes_input()

    input_config = {"my_str": "foo"}
    run_config = {
        "solids": {
            "my_solid_takes_input": {
                "inputs": {
                    "x": input_config
                }
            }
        }
    }
    with instance_for_test() as instance:
        plan = create_execution_plan(
            my_pipeline,
            instance_ref=instance.get_ref(),
            run_config=run_config,
        )
        output_version = plan.get_version_for_step_output_handle(
            StepOutputHandle("my_solid_takes_input", "result"))

        assert output_version is not None

        input_config["my_str"] = "bar"

        plan = create_execution_plan(
            my_pipeline,
            instance_ref=instance.get_ref(),
            run_config=run_config,
        )

        new_output_version = plan.get_version_for_step_output_handle(
            StepOutputHandle("my_solid_takes_input", "result"))

        # Ensure that after changing input config, the version changes.
        assert not new_output_version == output_version
コード例 #7
0
def test_single_partitioned_asset_job():
    partitions_def = StaticPartitionsDefinition(["a", "b", "c", "d"])

    class MyIOManager(IOManager):
        def handle_output(self, context, obj):
            assert context.asset_partition_key == "b"

        def load_input(self, context):
            assert False, "shouldn't get here"

    @asset(partitions_def=partitions_def)
    def my_asset():
        pass

    my_job = build_assets_job(
        "my_job",
        assets=[my_asset],
        resource_defs={
            "io_manager":
            IOManagerDefinition.hardcoded_io_manager(MyIOManager())
        },
    )
    result = my_job.execute_in_process(partition_key="b")
    assert result.asset_materializations_for_node("my_asset") == [
        AssetMaterialization(asset_key=AssetKey(["my_asset"]), partition="b")
    ]
コード例 #8
0
def run_test_with_builtin_type(type_to_test, type_values):

    first_type_val, second_type_val = type_values
    manager = VersionedInMemoryIOManager()

    @solid(version="42",
           input_defs=[InputDefinition("_builtin_type", type_to_test)])
    def solid_ext_input(_builtin_type):
        pass

    @pipeline(
        mode_defs=[
            ModeDefinition(resource_defs={
                "io_manager":
                IOManagerDefinition.hardcoded_io_manager(manager),
            }, )
        ],
        tags={MEMOIZED_RUN_TAG: "true"},
    )
    def my_pipeline():
        versioned_solid_takes_input(solid_ext_input())

    input_config = {"_builtin_type": first_type_val}
    run_config = {"solids": {"solid_ext_input": {"inputs": input_config}}}

    with instance_for_test() as instance:
        unmemoized_plan = create_execution_plan(
            my_pipeline,
            run_config=run_config,
            instance_ref=instance.get_ref(),
        )

        assert len(unmemoized_plan.step_keys_to_execute) == 2

        step_output_handle = StepOutputHandle("solid_ext_input", "result")
        version = unmemoized_plan.get_version_for_step_output_handle(
            step_output_handle)

        manager.values[step_output_handle.step_key,
                       step_output_handle.output_name, version] = 5

        memoized_plan = create_execution_plan(
            my_pipeline,
            run_config=run_config,
            instance_ref=instance.get_ref(),
        )
        assert memoized_plan.step_keys_to_execute == [
            "versioned_solid_takes_input"
        ]

        input_config["_builtin_type"] = second_type_val

        unmemoized_plan = create_execution_plan(
            my_pipeline,
            run_config=run_config,
            instance_ref=instance.get_ref(),
        )

        assert len(unmemoized_plan.step_keys_to_execute) == 2
コード例 #9
0
ファイル: test_resolve_versions.py プロジェクト: keyz/dagster
def test_memoized_plan_affected_by_resource_config():
    @solid(required_resource_keys={"my_resource"}, version="39")
    def solid_reqs_resource():
        pass

    @resource(version="42", config_schema={"foo": str})
    def basic():
        pass

    manager = VersionedInMemoryIOManager()

    @pipeline(
        mode_defs=[
            ModeDefinition(
                resource_defs={
                    "my_resource": basic,
                    "io_manager": IOManagerDefinition.hardcoded_io_manager(manager),
                },
            )
        ],
        tags={MEMOIZED_RUN_TAG: "true"},
    )
    def my_pipeline():
        solid_reqs_resource()

    with instance_for_test() as instance:
        my_resource_config = {"foo": "bar"}
        run_config = {"resources": {"my_resource": {"config": my_resource_config}}}

        unmemoized_plan = create_execution_plan(
            my_pipeline, run_config=run_config, instance_ref=instance.get_ref()
        )

        assert unmemoized_plan.step_keys_to_execute == ["solid_reqs_resource"]

        step_output_handle = StepOutputHandle("solid_reqs_resource", "result")
        version = unmemoized_plan.get_version_for_step_output_handle(step_output_handle)

        manager.values[step_output_handle.step_key, step_output_handle.output_name, version] = 5

        memoized_plan = create_execution_plan(
            my_pipeline, run_config=run_config, instance_ref=instance.get_ref()
        )

        assert len(memoized_plan.step_keys_to_execute) == 0

        my_resource_config["foo"] = "baz"

        changed_config_plan = create_execution_plan(
            my_pipeline, run_config=run_config, instance_ref=instance.get_ref()
        )

        assert changed_config_plan.step_keys_to_execute == ["solid_reqs_resource"]
コード例 #10
0
ファイル: test_resolve_versions.py プロジェクト: keyz/dagster
def versioned_pipeline_factory(manager=VersionedInMemoryIOManager()):
    @pipeline(
        mode_defs=[
            ModeDefinition(
                name="main",
                resource_defs={"io_manager": IOManagerDefinition.hardcoded_io_manager(manager)},
            )
        ],
        tags={MEMOIZED_RUN_TAG: "true"},
    )
    def versioned_pipeline():
        versioned_solid_takes_input(versioned_solid_no_input())

    return versioned_pipeline
コード例 #11
0
def test_memoized_inner_solid():
    @solid(version="versioned")
    def solid_versioned():
        pass

    @composite_solid
    def wrap():
        return solid_versioned()

    mgr = VersionedInMemoryIOManager()

    @pipeline(
        mode_defs=[
            ModeDefinition(
                name="fakemode",
                resource_defs={
                    "io_manager":
                    IOManagerDefinition.hardcoded_io_manager(mgr),
                },
            ),
        ],
        tags={MEMOIZED_RUN_TAG: "true"},
    )
    def wrap_pipeline():
        wrap()

    with instance_for_test() as instance:
        unmemoized_plan = create_execution_plan(
            wrap_pipeline, instance_ref=instance.get_ref())
        step_output_handle = StepOutputHandle("wrap.solid_versioned", "result")
        assert unmemoized_plan.step_keys_to_execute == [
            step_output_handle.step_key
        ]

        # Affix value to expected version for step output.
        step_output_version = unmemoized_plan.get_version_for_step_output_handle(
            step_output_handle)
        mgr.values[(step_output_handle.step_key,
                    step_output_handle.output_name, step_output_version)] = 4
        memoized_plan = unmemoized_plan.build_memoized_plan(
            wrap_pipeline,
            ResolvedRunConfig.build(wrap_pipeline),
            instance=None,
            selected_step_keys=None,
        )
        assert len(memoized_plan.step_keys_to_execute) == 0
コード例 #12
0
ファイル: test_io_manager.py プロジェクト: prezi/dagster
def test_hardcoded_io_manager():
    @solid
    def basic_solid(_):
        return 5

    @pipeline(mode_defs=[
        ModeDefinition(
            resource_defs={
                "io_manager":
                IOManagerDefinition.hardcoded_io_manager(InMemoryIOManager())
            })
    ])
    def basic_pipeline():
        basic_solid()

    result = execute_pipeline(basic_pipeline)
    assert result.success
    assert result.output_for_solid("basic_solid") == 5
コード例 #13
0
def test_access_partition_keys_from_context_only_one_asset_partitioned():
    upstream_partitions_def = StaticPartitionsDefinition(["a", "b", "c"])

    class MyIOManager(IOManager):
        def handle_output(self, context, obj):
            if context.op_def.name == "upstream_asset":
                assert context.asset_partition_key == "b"
            elif context.op_def.name in [
                    "downstream_asset", "double_downstream_asset"
            ]:
                assert not context.has_asset_partitions
                with pytest.raises(Exception):  # TODO: better error message
                    assert context.asset_partition_key_range
            else:
                assert False

        def load_input(self, context):
            assert not context.has_asset_partitions

    @asset(partitions_def=upstream_partitions_def)
    def upstream_asset(context):
        assert context.output_asset_partition_key() == "b"

    @asset
    def downstream_asset(upstream_asset):
        assert upstream_asset is None

    @asset
    def double_downstream_asset(downstream_asset):
        assert downstream_asset is None

    my_job = build_assets_job(
        "my_job",
        assets=[upstream_asset, downstream_asset, double_downstream_asset],
        resource_defs={
            "io_manager":
            IOManagerDefinition.hardcoded_io_manager(MyIOManager())
        },
    )
    result = my_job.execute_in_process(partition_key="b")
    assert result.asset_materializations_for_node("upstream_asset") == [
        AssetMaterialization(asset_key=AssetKey(["upstream_asset"]),
                             partition="b")
    ]
コード例 #14
0
def test_memoized_plan_custom_io_manager_key():
    manager = VersionedInMemoryIOManager()
    mgr_def = IOManagerDefinition.hardcoded_io_manager(manager)

    @solid(version="39",
           output_defs=[OutputDefinition(io_manager_key="my_key")])
    def solid_requires_io_manager():
        return Output(5)

    @pipeline(
        mode_defs=[
            ModeDefinition(resource_defs={
                "my_key": mgr_def,
            }, ),
        ],
        tags={MEMOIZED_RUN_TAG: "true"},
    )
    def io_mgr_pipeline():
        solid_requires_io_manager()

    with instance_for_test() as instance:

        unmemoized_plan = create_execution_plan(
            io_mgr_pipeline, instance_ref=instance.get_ref())

        assert unmemoized_plan.step_keys_to_execute == [
            "solid_requires_io_manager"
        ]

        step_output_handle = StepOutputHandle("solid_requires_io_manager",
                                              "result")
        version = unmemoized_plan.get_version_for_step_output_handle(
            step_output_handle)

        manager.values[(step_output_handle.step_key,
                        step_output_handle.output_name, version)] = 5

        memoized_plan = create_execution_plan(io_mgr_pipeline,
                                              instance_ref=instance.get_ref())

        assert len(memoized_plan.step_keys_to_execute) == 0
コード例 #15
0
def test_output_context_asset_partitions_time_window():
    class MyIOManager(IOManager):
        def handle_output(self, context, _obj):
            assert context.asset_partitions_time_window == TimeWindow(
                pendulum.parse("2021-06-06"), pendulum.parse("2021-06-07"))

        def load_input(self, context):
            raise NotImplementedError()

    @asset(partitions_def=DailyPartitionsDefinition(start_date="2021-05-05"))
    def my_asset():
        pass

    my_job = build_assets_job(
        "my_job",
        assets=[my_asset],
        resource_defs={
            "io_manager":
            IOManagerDefinition.hardcoded_io_manager(MyIOManager())
        },
    )
    my_job.execute_in_process(partition_key="2021-06-06")
コード例 #16
0
def test_memoized_plan_disable_memoization():
    @solid(version="hello")
    def my_solid():
        return 5

    mgr = VersionedInMemoryIOManager()

    @pipeline(
        mode_defs=[
            ModeDefinition(resource_defs={
                "io_manager":
                IOManagerDefinition.hardcoded_io_manager(mgr),
            }, ),
        ],
        tags={MEMOIZED_RUN_TAG: "true"},
    )
    def my_pipeline():
        my_solid()

    with instance_for_test() as instance:
        unmemoized_plan = create_execution_plan(
            my_pipeline, instance_ref=instance.get_ref())
        assert len(unmemoized_plan.step_keys_to_execute) == 1
        step_output_handle = StepOutputHandle("my_solid", "result")
        version = unmemoized_plan.get_version_for_step_output_handle(
            step_output_handle)
        mgr.values[(step_output_handle.step_key,
                    step_output_handle.output_name, version)] = 5
        memoized_plan = create_execution_plan(my_pipeline,
                                              instance_ref=instance.get_ref())
        assert len(memoized_plan.step_keys_to_execute) == 0

        unmemoized_again = create_execution_plan(
            my_pipeline,
            instance_ref=instance.get_ref(),
            tags={MEMOIZED_RUN_TAG: "false"})
        assert len(unmemoized_again.step_keys_to_execute) == 1
コード例 #17
0
ファイル: setup.py プロジェクト: helloworld/dagster
    while True:
        time.sleep(0.1)


@asset
def never_runs_asset(hanging_asset):  # pylint: disable=redefined-outer-name,unused-argument
    pass


hanging_job = build_assets_job(
    name="hanging_job",
    source_assets=[dummy_source_asset],
    assets=[first_asset, hanging_asset, never_runs_asset],
    resource_defs={
        "io_manager":
        IOManagerDefinition.hardcoded_io_manager(DummyIOManager()),
        "hanging_asset_resource": hanging_asset_resource,
    },
)


@asset
def asset_one():
    return 1


@asset
def asset_two(asset_one):  # pylint: disable=redefined-outer-name,unused-argument
    return first_asset + 1

コード例 #18
0
def test_asset_partitions_time_window_non_identity_partition_mapping():
    upstream_partitions_def = DailyPartitionsDefinition(
        start_date="2020-01-01")
    downstream_partitions_def = DailyPartitionsDefinition(
        start_date="2020-01-01")

    class TrailingWindowPartitionMapping(PartitionMapping):
        """
        Maps each downstream partition to two partitions in the upstream asset: itself and the
        preceding partition.
        """
        def get_upstream_partitions_for_partition_range(
            self,
            downstream_partition_key_range: PartitionKeyRange,
            downstream_partitions_def: PartitionsDefinition,
            upstream_partitions_def: PartitionsDefinition,
        ) -> PartitionKeyRange:
            del downstream_partitions_def, upstream_partitions_def

            start, end = downstream_partition_key_range
            assert start == "2020-01-02"
            assert end == "2020-01-02"
            return PartitionKeyRange("2020-01-01", "2020-01-02")

        def get_downstream_partitions_for_partition_range(
            self,
            upstream_partition_key_range: PartitionKeyRange,
            downstream_partitions_def: PartitionsDefinition,
            upstream_partitions_def: PartitionsDefinition,
        ) -> PartitionKeyRange:
            raise NotImplementedError()

    class MyIOManager(IOManager):
        def handle_output(self, context, obj):
            assert context.asset_partitions_time_window == TimeWindow(
                pendulum.parse("2020-01-02"), pendulum.parse("2020-01-03"))

        def load_input(self, context):
            assert context.asset_partitions_time_window == TimeWindow(
                pendulum.parse("2020-01-01"), pendulum.parse("2020-01-03"))

    @asset(partitions_def=upstream_partitions_def)
    def upstream_asset():
        pass

    @asset(
        partitions_def=downstream_partitions_def,
        partition_mappings={
            "upstream_asset": TrailingWindowPartitionMapping()
        },
    )
    def downstream_asset(upstream_asset):
        assert upstream_asset is None

    my_job = build_assets_job(
        "my_job",
        assets=[upstream_asset, downstream_asset],
        resource_defs={
            "io_manager":
            IOManagerDefinition.hardcoded_io_manager(MyIOManager())
        },
    )
    my_job.execute_in_process(partition_key="2020-01-02")
コード例 #19
0
def test_access_partition_keys_from_context_non_identity_partition_mapping():
    upstream_partitions_def = StaticPartitionsDefinition(["1", "2", "3"])
    downstream_partitions_def = StaticPartitionsDefinition(["1", "2", "3"])

    class TrailingWindowPartitionMapping(PartitionMapping):
        """
        Maps each downstream partition to two partitions in the upstream asset: itself and the
        preceding partition.
        """
        def get_upstream_partitions_for_partition_range(
            self,
            downstream_partition_key_range: PartitionKeyRange,
            downstream_partitions_def: PartitionsDefinition,
            upstream_partitions_def: PartitionsDefinition,
        ) -> PartitionKeyRange:
            assert downstream_partitions_def
            assert upstream_partitions_def

            start, end = downstream_partition_key_range
            return PartitionKeyRange(str(max(1, int(start) - 1)), end)

        def get_downstream_partitions_for_partition_range(
            self,
            upstream_partition_key_range: PartitionKeyRange,
            downstream_partitions_def: PartitionsDefinition,
            upstream_partitions_def: PartitionsDefinition,
        ) -> PartitionKeyRange:
            raise NotImplementedError()

    class MyIOManager(IOManager):
        def handle_output(self, context, obj):
            assert context.asset_partition_key == "2"

        def load_input(self, context):
            start, end = context.asset_partition_key_range
            assert start, end == ("1", "2")

    @asset(partitions_def=upstream_partitions_def)
    def upstream_asset(context):
        assert context.output_asset_partition_key() == "2"

    @asset(
        partitions_def=downstream_partitions_def,
        partition_mappings={
            "upstream_asset": TrailingWindowPartitionMapping()
        },
    )
    def downstream_asset(context, upstream_asset):
        assert context.output_asset_partition_key() == "2"
        assert upstream_asset is None

    my_job = build_assets_job(
        "my_job",
        assets=[upstream_asset, downstream_asset],
        resource_defs={
            "io_manager":
            IOManagerDefinition.hardcoded_io_manager(MyIOManager())
        },
    )
    result = my_job.execute_in_process(partition_key="2")
    assert result.asset_materializations_for_node("upstream_asset") == [
        AssetMaterialization(AssetKey(["upstream_asset"]), partition="2")
    ]
    assert result.asset_materializations_for_node("downstream_asset") == [
        AssetMaterialization(AssetKey(["downstream_asset"]), partition="2")
    ]
コード例 #20
0
ファイル: spark_weather_job.py プロジェクト: keyz/dagster
        if context.dagster_type.typing_type == PandasDF:
            fs_path = os.path.abspath(self._get_fs_path(context.asset_key))
            paths = glob.glob(os.path.join(fs_path, "*.csv"))
            check.invariant(len(paths) > 0, f"No csv files found under {fs_path}")
            return pd.concat(map(pd.read_csv, paths))
        elif context.dagster_type.typing_type == SparkDF:
            return (
                SparkSession.builder.getOrCreate()
                .read.format("csv")
                .options(header="true")
                .load(self._get_fs_path(context.asset_key))
            )
        else:
            raise ValueError("Unexpected input type")


# io_manager_end

# build_assets_job_start

spark_weather_job = build_assets_job(
    "spark_weather",
    assets=[daily_temperature_highs, hottest_dates, daily_temperature_high_diffs],
    source_assets=[sfo_q2_weather_sample],
    resource_defs={
        "io_manager": IOManagerDefinition.hardcoded_io_manager(LocalFileSystemIOManager())
    },
)

# build_assets_job_end