Ejemplo n.º 1
0
def test_input_context_asset_partitions_time_window():
    partitions_def = DailyPartitionsDefinition(start_date="2021-05-05")

    class MyIOManager(IOManager):
        def handle_output(self, context, _obj):
            assert context.asset_partitions_time_window == TimeWindow(
                pendulum.parse("2021-06-06"), pendulum.parse("2021-06-07"))

        def load_input(self, context):
            assert context.asset_partitions_time_window == TimeWindow(
                pendulum.parse("2021-06-06"), pendulum.parse("2021-06-07"))

    @asset(partitions_def=partitions_def)
    def upstream_asset():
        pass

    @asset(partitions_def=partitions_def)
    def downstream_asset(upstream_asset):
        assert upstream_asset is None

    my_job = build_assets_job(
        "my_job",
        assets=[downstream_asset, upstream_asset],
        resource_defs={
            "io_manager":
            IOManagerDefinition.hardcoded_io_manager(MyIOManager())
        },
    )
    my_job.execute_in_process(partition_key="2021-06-06")
Ejemplo n.º 2
0
def test_single_partitioned_asset_job():
    partitions_def = StaticPartitionsDefinition(["a", "b", "c", "d"])

    class MyIOManager(IOManager):
        def handle_output(self, context, obj):
            assert context.asset_partition_key == "b"

        def load_input(self, context):
            assert False, "shouldn't get here"

    @asset(partitions_def=partitions_def)
    def my_asset():
        pass

    my_job = build_assets_job(
        "my_job",
        assets=[my_asset],
        resource_defs={
            "io_manager":
            IOManagerDefinition.hardcoded_io_manager(MyIOManager())
        },
    )
    result = my_job.execute_in_process(partition_key="b")
    assert result.asset_materializations_for_node("my_asset") == [
        AssetMaterialization(asset_key=AssetKey(["my_asset"]), partition="b")
    ]
Ejemplo n.º 3
0
def test_bad_version_str(graph_for_test, strategy):
    @resource
    def my_resource():
        pass

    @root_input_manager
    def my_manager():
        pass

    with instance_for_test() as instance:
        my_job = graph_for_test.to_job(
            version_strategy=strategy,
            resource_defs={
                "io_manager": IOManagerDefinition.hardcoded_io_manager(
                    VersionedInMemoryIOManager()
                ),
                "my_key": my_manager,
                "foo": my_resource,
            },
        )

        with pytest.raises(
            DagsterInvariantViolationError, match=f"'{bad_str}' is not a valid version string."
        ):
            create_execution_plan(my_job, instance_ref=instance.get_ref())
Ejemplo n.º 4
0
def test_version_strategy_no_resource_version():
    @solid(required_resource_keys={"foo"})
    def my_solid(context):
        return context.resources.foo

    @resource
    def foo_resource():
        return "bar"

    class MyVersionStrategy(VersionStrategy):
        def get_solid_version(self, _):
            return "foo"

    @pipeline(
        version_strategy=MyVersionStrategy(),
        mode_defs=[
            ModeDefinition(
                resource_defs={
                    "io_manager": IOManagerDefinition.hardcoded_io_manager(
                        VersionedInMemoryIOManager()
                    ),
                    "foo": foo_resource,
                }
            )
        ],
    )
    def my_pipeline():
        my_solid()

    with instance_for_test() as instance:
        execute_pipeline(my_pipeline, instance=instance)

        memoized_plan = create_execution_plan(my_pipeline, instance_ref=instance.get_ref())
        assert len(memoized_plan.step_keys_to_execute) == 0
Ejemplo n.º 5
0
def test_memoized_plan_disable_memoization():
    @solid(version="hello")
    def my_solid():
        return 5

    mgr = VersionedInMemoryIOManager()

    @pipeline(
        mode_defs=[
            ModeDefinition(
                resource_defs={
                    "io_manager": IOManagerDefinition.hardcoded_io_manager(mgr),
                },
            ),
        ],
        tags={MEMOIZED_RUN_TAG: "true"},
    )
    def my_pipeline():
        my_solid()

    with instance_for_test() as instance:
        unmemoized_plan = create_execution_plan(my_pipeline, instance_ref=instance.get_ref())
        assert len(unmemoized_plan.step_keys_to_execute) == 1
        step_output_handle = StepOutputHandle("my_solid", "result")
        version = unmemoized_plan.get_version_for_step_output_handle(step_output_handle)
        mgr.values[(step_output_handle.step_key, step_output_handle.output_name, version)] = 5
        memoized_plan = create_execution_plan(my_pipeline, instance_ref=instance.get_ref())
        assert len(memoized_plan.step_keys_to_execute) == 0

        unmemoized_again = create_execution_plan(
            my_pipeline, instance_ref=instance.get_ref(), tags={MEMOIZED_RUN_TAG: "false"}
        )
        assert len(unmemoized_again.step_keys_to_execute) == 1
Ejemplo n.º 6
0
def test_memoized_plan_root_input_manager():
    @root_input_manager(version="foo")
    def my_input_manager():
        return 5

    @solid(input_defs=[InputDefinition("x", root_manager_key="my_input_manager")], version="foo")
    def my_solid_takes_input(x):
        return x

    @pipeline(
        mode_defs=[
            ModeDefinition(
                resource_defs={
                    "io_manager": IOManagerDefinition.hardcoded_io_manager(
                        VersionedInMemoryIOManager()
                    ),
                    "my_input_manager": my_input_manager,
                },
            ),
        ],
        tags={MEMOIZED_RUN_TAG: "true"},
    )
    def my_pipeline():
        my_solid_takes_input()

    with instance_for_test() as instance:
        plan = create_execution_plan(my_pipeline, instance_ref=instance.get_ref())
        assert (
            plan.get_version_for_step_output_handle(
                StepOutputHandle("my_solid_takes_input", "result")
            )
            is not None
        )
Ejemplo n.º 7
0
def test_memoized_plan_custom_io_manager_key():
    manager = VersionedInMemoryIOManager()
    mgr_def = IOManagerDefinition.hardcoded_io_manager(manager)

    @solid(version="39", output_defs=[OutputDefinition(io_manager_key="my_key")])
    def solid_requires_io_manager():
        return Output(5)

    @pipeline(
        mode_defs=[
            ModeDefinition(
                resource_defs={
                    "my_key": mgr_def,
                },
            ),
        ],
        tags={MEMOIZED_RUN_TAG: "true"},
    )
    def io_mgr_pipeline():
        solid_requires_io_manager()

    with instance_for_test() as instance:

        unmemoized_plan = create_execution_plan(io_mgr_pipeline, instance_ref=instance.get_ref())

        assert unmemoized_plan.step_keys_to_execute == ["solid_requires_io_manager"]

        step_output_handle = StepOutputHandle("solid_requires_io_manager", "result")
        version = unmemoized_plan.get_version_for_step_output_handle(step_output_handle)

        manager.values[(step_output_handle.step_key, step_output_handle.output_name, version)] = 5

        memoized_plan = create_execution_plan(io_mgr_pipeline, instance_ref=instance.get_ref())

        assert len(memoized_plan.step_keys_to_execute) == 0
Ejemplo n.º 8
0
def test_unmemoized_inner_solid():
    @solid
    def solid_no_version():
        pass

    @composite_solid
    def wrap():
        return solid_no_version()

    @pipeline(
        mode_defs=[
            ModeDefinition(
                name="fakemode",
                resource_defs={
                    "fake": IOManagerDefinition.hardcoded_io_manager(VersionedInMemoryIOManager()),
                },
            ),
        ],
        tags={MEMOIZED_RUN_TAG: "true"},
    )
    def wrap_pipeline():
        wrap()

    with instance_for_test() as instance:
        with pytest.raises(
            DagsterInvariantViolationError,
            match="While using memoization, version for solid 'solid_no_version' was None. Please "
            "either provide a versioning strategy for your job, or provide a version using the "
            "solid decorator.",
        ):
            create_execution_plan(wrap_pipeline, instance_ref=instance.get_ref())
Ejemplo n.º 9
0
def test_source_asset_partitions():
    hourly_asset = SourceAsset(
        AssetKey("hourly_asset"),
        partitions_def=HourlyPartitionsDefinition(
            start_date="2021-05-05-00:00"),
    )

    @asset(partitions_def=DailyPartitionsDefinition(start_date="2021-05-05"))
    def daily_asset(hourly_asset):
        assert hourly_asset is None

    class CustomIOManager(IOManager):
        def handle_output(self, context, obj):
            pass

        def load_input(self, context):
            key_range = context.asset_partition_key_range
            assert key_range.start == "2021-06-06-00:00"
            assert key_range.end == "2021-06-06-23:00"

    daily_job = build_assets_job(
        name="daily_job",
        assets=[daily_asset],
        source_assets=[hourly_asset],
        resource_defs={
            "io_manager":
            IOManagerDefinition.hardcoded_io_manager(CustomIOManager())
        },
    )
    assert daily_job.execute_in_process(partition_key="2021-06-06").success
Ejemplo n.º 10
0
def test_memoized_plan_default_input_val():
    @solid(
        version="42",
        input_defs=[InputDefinition("_my_input", String, default_value="DEFAULTVAL")],
    )
    def solid_default_input(_my_input):
        pass

    @pipeline(
        mode_defs=[
            ModeDefinition(
                resource_defs={
                    "io_manager": IOManagerDefinition.hardcoded_io_manager(
                        VersionedInMemoryIOManager()
                    ),
                },
            )
        ],
        tags={MEMOIZED_RUN_TAG: "true"},
    )
    def pipeline_default_value():
        solid_default_input()

    # Ensure that we can build a valid plan with a default input value.
    with instance_for_test() as instance:
        unmemoized_plan = create_execution_plan(
            pipeline_default_value, instance_ref=instance.get_ref()
        )
        assert unmemoized_plan.step_keys_to_execute == ["solid_default_input"]
Ejemplo n.º 11
0
def test_memoized_plan_root_input_manager_input_config():
    @root_input_manager(version="foo", input_config_schema={"my_str": str})
    def my_input_manager():
        return 5

    @solid(
        input_defs=[InputDefinition("x", root_manager_key="my_input_manager")],
        version="foo")
    def my_solid_takes_input(x):
        return x

    @pipeline(
        mode_defs=[
            ModeDefinition(resource_defs={
                "io_manager":
                IOManagerDefinition.hardcoded_io_manager(
                    VersionedInMemoryIOManager()),
                "my_input_manager":
                my_input_manager,
            }, ),
        ],
        tags={MEMOIZED_RUN_TAG: "true"},
    )
    def my_pipeline():
        my_solid_takes_input()

    input_config = {"my_str": "foo"}
    run_config = {
        "solids": {
            "my_solid_takes_input": {
                "inputs": {
                    "x": input_config
                }
            }
        }
    }
    with instance_for_test() as instance:
        plan = create_execution_plan(
            my_pipeline,
            instance_ref=instance.get_ref(),
            run_config=run_config,
        )
        output_version = plan.get_version_for_step_output_handle(
            StepOutputHandle("my_solid_takes_input", "result"))

        assert output_version is not None

        input_config["my_str"] = "bar"

        plan = create_execution_plan(
            my_pipeline,
            instance_ref=instance.get_ref(),
            run_config=run_config,
        )

        new_output_version = plan.get_version_for_step_output_handle(
            StepOutputHandle("my_solid_takes_input", "result"))

        # Ensure that after changing input config, the version changes.
        assert not new_output_version == output_version
Ejemplo n.º 12
0
def test_partition_key():
    @op
    def my_op():
        pass

    @op
    def my_op2(_input1):
        pass

    class MyIOManager(IOManager):
        def load_input(self, context):
            assert context.has_partition_key
            assert context.partition_key == "2020-01-01"

        def handle_output(self, context, _obj):
            assert context.has_partition_key
            assert context.partition_key == "2020-01-01"

    @job(
        partitions_def=DailyPartitionsDefinition(start_date="2020-01-01"),
        resource_defs={
            "io_manager":
            IOManagerDefinition.hardcoded_io_manager(MyIOManager())
        },
    )
    def my_job():
        my_op2(my_op())

    assert my_job.execute_in_process(partition_key="2020-01-01").success
Ejemplo n.º 13
0
def test_asset_key():
    in_asset_key = AssetKey(["a", "b"])
    out_asset_key = AssetKey(["c", "d"])

    @op(out=Out(asset_key=out_asset_key))
    def before():
        pass

    @op(ins={"a": In(asset_key=in_asset_key)}, out={})
    def after(a):
        assert a

    class MyIOManager(IOManager):
        def load_input(self, context):
            assert context.asset_key == in_asset_key
            assert context.upstream_output.asset_key == out_asset_key
            return 1

        def handle_output(self, context, obj):
            assert context.asset_key == out_asset_key

    @graph
    def my_graph():
        after(before())

    result = my_graph.to_job(resource_defs={
        "io_manager":
        IOManagerDefinition.hardcoded_io_manager(MyIOManager())
    }).execute_in_process()
    assert result.success
Ejemplo n.º 14
0
def run_test_with_builtin_type(type_to_test, type_values):

    first_type_val, second_type_val = type_values
    manager = VersionedInMemoryIOManager()

    @solid(version="42",
           input_defs=[InputDefinition("_builtin_type", type_to_test)])
    def solid_ext_input(_builtin_type):
        pass

    @pipeline(
        mode_defs=[
            ModeDefinition(resource_defs={
                "io_manager":
                IOManagerDefinition.hardcoded_io_manager(manager),
            }, )
        ],
        tags={MEMOIZED_RUN_TAG: "true"},
    )
    def my_pipeline():
        versioned_solid_takes_input(solid_ext_input())

    input_config = {"_builtin_type": first_type_val}
    run_config = {"solids": {"solid_ext_input": {"inputs": input_config}}}

    with instance_for_test() as instance:
        unmemoized_plan = create_execution_plan(
            my_pipeline,
            run_config=run_config,
            instance_ref=instance.get_ref(),
        )

        assert len(unmemoized_plan.step_keys_to_execute) == 2

        step_output_handle = StepOutputHandle("solid_ext_input", "result")
        version = unmemoized_plan.get_version_for_step_output_handle(
            step_output_handle)

        manager.values[step_output_handle.step_key,
                       step_output_handle.output_name, version] = 5

        memoized_plan = create_execution_plan(
            my_pipeline,
            run_config=run_config,
            instance_ref=instance.get_ref(),
        )
        assert memoized_plan.step_keys_to_execute == [
            "versioned_solid_takes_input"
        ]

        input_config["_builtin_type"] = second_type_val

        unmemoized_plan = create_execution_plan(
            my_pipeline,
            run_config=run_config,
            instance_ref=instance.get_ref(),
        )

        assert len(unmemoized_plan.step_keys_to_execute) == 2
Ejemplo n.º 15
0
def test_memoized_plan_affected_by_resource_config():
    @solid(required_resource_keys={"my_resource"}, version="39")
    def solid_reqs_resource():
        pass

    @resource(version="42", config_schema={"foo": str})
    def basic():
        pass

    manager = VersionedInMemoryIOManager()

    @pipeline(
        mode_defs=[
            ModeDefinition(
                resource_defs={
                    "my_resource": basic,
                    "io_manager": IOManagerDefinition.hardcoded_io_manager(manager),
                },
            )
        ],
        tags={MEMOIZED_RUN_TAG: "true"},
    )
    def my_pipeline():
        solid_reqs_resource()

    with instance_for_test() as instance:
        my_resource_config = {"foo": "bar"}
        run_config = {"resources": {"my_resource": {"config": my_resource_config}}}

        unmemoized_plan = create_execution_plan(
            my_pipeline, run_config=run_config, instance_ref=instance.get_ref()
        )

        assert unmemoized_plan.step_keys_to_execute == ["solid_reqs_resource"]

        step_output_handle = StepOutputHandle("solid_reqs_resource", "result")
        version = unmemoized_plan.get_version_for_step_output_handle(step_output_handle)

        manager.values[step_output_handle.step_key, step_output_handle.output_name, version] = 5

        memoized_plan = create_execution_plan(
            my_pipeline, run_config=run_config, instance_ref=instance.get_ref()
        )

        assert len(memoized_plan.step_keys_to_execute) == 0

        my_resource_config["foo"] = "baz"

        changed_config_plan = create_execution_plan(
            my_pipeline, run_config=run_config, instance_ref=instance.get_ref()
        )

        assert changed_config_plan.step_keys_to_execute == ["solid_reqs_resource"]
Ejemplo n.º 16
0
def versioned_pipeline_factory(manager=VersionedInMemoryIOManager()):
    @pipeline(
        mode_defs=[
            ModeDefinition(
                name="main",
                resource_defs={"io_manager": IOManagerDefinition.hardcoded_io_manager(manager)},
            )
        ],
        tags={MEMOIZED_RUN_TAG: "true"},
    )
    def versioned_pipeline():
        versioned_solid_takes_input(versioned_solid_no_input())

    return versioned_pipeline
Ejemplo n.º 17
0
def test_memoized_inner_solid():
    @solid(version="versioned")
    def solid_versioned():
        pass

    @composite_solid
    def wrap():
        return solid_versioned()

    mgr = VersionedInMemoryIOManager()

    @pipeline(
        mode_defs=[
            ModeDefinition(
                name="fakemode",
                resource_defs={
                    "io_manager":
                    IOManagerDefinition.hardcoded_io_manager(mgr),
                },
            ),
        ],
        tags={MEMOIZED_RUN_TAG: "true"},
    )
    def wrap_pipeline():
        wrap()

    with instance_for_test() as instance:
        unmemoized_plan = create_execution_plan(
            wrap_pipeline, instance_ref=instance.get_ref())
        step_output_handle = StepOutputHandle("wrap.solid_versioned", "result")
        assert unmemoized_plan.step_keys_to_execute == [
            step_output_handle.step_key
        ]

        # Affix value to expected version for step output.
        step_output_version = unmemoized_plan.get_version_for_step_output_handle(
            step_output_handle)
        mgr.values[(step_output_handle.step_key,
                    step_output_handle.output_name, step_output_version)] = 4
        memoized_plan = unmemoized_plan.build_memoized_plan(
            wrap_pipeline,
            ResolvedRunConfig.build(wrap_pipeline),
            instance=None,
            selected_step_keys=None,
        )
        assert len(memoized_plan.step_keys_to_execute) == 0
Ejemplo n.º 18
0
def test_hardcoded_io_manager():
    @solid
    def basic_solid(_):
        return 5

    @pipeline(mode_defs=[
        ModeDefinition(
            resource_defs={
                "io_manager":
                IOManagerDefinition.hardcoded_io_manager(InMemoryIOManager())
            })
    ])
    def basic_pipeline():
        basic_solid()

    result = execute_pipeline(basic_pipeline)
    assert result.success
    assert result.output_for_solid("basic_solid") == 5
Ejemplo n.º 19
0
def test_access_partition_keys_from_context_only_one_asset_partitioned():
    upstream_partitions_def = StaticPartitionsDefinition(["a", "b", "c"])

    class MyIOManager(IOManager):
        def handle_output(self, context, obj):
            if context.op_def.name == "upstream_asset":
                assert context.asset_partition_key == "b"
            elif context.op_def.name in [
                    "downstream_asset", "double_downstream_asset"
            ]:
                assert not context.has_asset_partitions
                with pytest.raises(Exception):  # TODO: better error message
                    assert context.asset_partition_key_range
            else:
                assert False

        def load_input(self, context):
            assert not context.has_asset_partitions

    @asset(partitions_def=upstream_partitions_def)
    def upstream_asset(context):
        assert context.output_asset_partition_key() == "b"

    @asset
    def downstream_asset(upstream_asset):
        assert upstream_asset is None

    @asset
    def double_downstream_asset(downstream_asset):
        assert downstream_asset is None

    my_job = build_assets_job(
        "my_job",
        assets=[upstream_asset, downstream_asset, double_downstream_asset],
        resource_defs={
            "io_manager":
            IOManagerDefinition.hardcoded_io_manager(MyIOManager())
        },
    )
    result = my_job.execute_in_process(partition_key="b")
    assert result.asset_materializations_for_node("upstream_asset") == [
        AssetMaterialization(asset_key=AssetKey(["upstream_asset"]),
                             partition="b")
    ]
Ejemplo n.º 20
0
def test_output_context_asset_partitions_time_window():
    class MyIOManager(IOManager):
        def handle_output(self, context, _obj):
            assert context.asset_partitions_time_window == TimeWindow(
                pendulum.parse("2021-06-06"), pendulum.parse("2021-06-07"))

        def load_input(self, context):
            raise NotImplementedError()

    @asset(partitions_def=DailyPartitionsDefinition(start_date="2021-05-05"))
    def my_asset():
        pass

    my_job = build_assets_job(
        "my_job",
        assets=[my_asset],
        resource_defs={
            "io_manager":
            IOManagerDefinition.hardcoded_io_manager(MyIOManager())
        },
    )
    my_job.execute_in_process(partition_key="2021-06-06")
Ejemplo n.º 21
0
    while True:
        time.sleep(0.1)


@asset
def never_runs_asset(hanging_asset):  # pylint: disable=redefined-outer-name,unused-argument
    pass


hanging_job = build_assets_job(
    name="hanging_job",
    source_assets=[dummy_source_asset],
    assets=[first_asset, hanging_asset, never_runs_asset],
    resource_defs={
        "io_manager":
        IOManagerDefinition.hardcoded_io_manager(DummyIOManager()),
        "hanging_asset_resource": hanging_asset_resource,
    },
)


@asset
def asset_one():
    return 1


@asset
def asset_two(asset_one):  # pylint: disable=redefined-outer-name,unused-argument
    return first_asset + 1

Ejemplo n.º 22
0
def test_asset_partitions_time_window_non_identity_partition_mapping():
    upstream_partitions_def = DailyPartitionsDefinition(
        start_date="2020-01-01")
    downstream_partitions_def = DailyPartitionsDefinition(
        start_date="2020-01-01")

    class TrailingWindowPartitionMapping(PartitionMapping):
        """
        Maps each downstream partition to two partitions in the upstream asset: itself and the
        preceding partition.
        """
        def get_upstream_partitions_for_partition_range(
            self,
            downstream_partition_key_range: PartitionKeyRange,
            downstream_partitions_def: PartitionsDefinition,
            upstream_partitions_def: PartitionsDefinition,
        ) -> PartitionKeyRange:
            del downstream_partitions_def, upstream_partitions_def

            start, end = downstream_partition_key_range
            assert start == "2020-01-02"
            assert end == "2020-01-02"
            return PartitionKeyRange("2020-01-01", "2020-01-02")

        def get_downstream_partitions_for_partition_range(
            self,
            upstream_partition_key_range: PartitionKeyRange,
            downstream_partitions_def: PartitionsDefinition,
            upstream_partitions_def: PartitionsDefinition,
        ) -> PartitionKeyRange:
            raise NotImplementedError()

    class MyIOManager(IOManager):
        def handle_output(self, context, obj):
            assert context.asset_partitions_time_window == TimeWindow(
                pendulum.parse("2020-01-02"), pendulum.parse("2020-01-03"))

        def load_input(self, context):
            assert context.asset_partitions_time_window == TimeWindow(
                pendulum.parse("2020-01-01"), pendulum.parse("2020-01-03"))

    @asset(partitions_def=upstream_partitions_def)
    def upstream_asset():
        pass

    @asset(
        partitions_def=downstream_partitions_def,
        partition_mappings={
            "upstream_asset": TrailingWindowPartitionMapping()
        },
    )
    def downstream_asset(upstream_asset):
        assert upstream_asset is None

    my_job = build_assets_job(
        "my_job",
        assets=[upstream_asset, downstream_asset],
        resource_defs={
            "io_manager":
            IOManagerDefinition.hardcoded_io_manager(MyIOManager())
        },
    )
    my_job.execute_in_process(partition_key="2020-01-02")
Ejemplo n.º 23
0
def test_access_partition_keys_from_context_non_identity_partition_mapping():
    upstream_partitions_def = StaticPartitionsDefinition(["1", "2", "3"])
    downstream_partitions_def = StaticPartitionsDefinition(["1", "2", "3"])

    class TrailingWindowPartitionMapping(PartitionMapping):
        """
        Maps each downstream partition to two partitions in the upstream asset: itself and the
        preceding partition.
        """
        def get_upstream_partitions_for_partition_range(
            self,
            downstream_partition_key_range: PartitionKeyRange,
            downstream_partitions_def: PartitionsDefinition,
            upstream_partitions_def: PartitionsDefinition,
        ) -> PartitionKeyRange:
            assert downstream_partitions_def
            assert upstream_partitions_def

            start, end = downstream_partition_key_range
            return PartitionKeyRange(str(max(1, int(start) - 1)), end)

        def get_downstream_partitions_for_partition_range(
            self,
            upstream_partition_key_range: PartitionKeyRange,
            downstream_partitions_def: PartitionsDefinition,
            upstream_partitions_def: PartitionsDefinition,
        ) -> PartitionKeyRange:
            raise NotImplementedError()

    class MyIOManager(IOManager):
        def handle_output(self, context, obj):
            assert context.asset_partition_key == "2"

        def load_input(self, context):
            start, end = context.asset_partition_key_range
            assert start, end == ("1", "2")

    @asset(partitions_def=upstream_partitions_def)
    def upstream_asset(context):
        assert context.output_asset_partition_key() == "2"

    @asset(
        partitions_def=downstream_partitions_def,
        partition_mappings={
            "upstream_asset": TrailingWindowPartitionMapping()
        },
    )
    def downstream_asset(context, upstream_asset):
        assert context.output_asset_partition_key() == "2"
        assert upstream_asset is None

    my_job = build_assets_job(
        "my_job",
        assets=[upstream_asset, downstream_asset],
        resource_defs={
            "io_manager":
            IOManagerDefinition.hardcoded_io_manager(MyIOManager())
        },
    )
    result = my_job.execute_in_process(partition_key="2")
    assert result.asset_materializations_for_node("upstream_asset") == [
        AssetMaterialization(AssetKey(["upstream_asset"]), partition="2")
    ]
    assert result.asset_materializations_for_node("downstream_asset") == [
        AssetMaterialization(AssetKey(["downstream_asset"]), partition="2")
    ]
Ejemplo n.º 24
0
        if context.dagster_type.typing_type == PandasDF:
            fs_path = os.path.abspath(self._get_fs_path(context.asset_key))
            paths = glob.glob(os.path.join(fs_path, "*.csv"))
            check.invariant(len(paths) > 0, f"No csv files found under {fs_path}")
            return pd.concat(map(pd.read_csv, paths))
        elif context.dagster_type.typing_type == SparkDF:
            return (
                SparkSession.builder.getOrCreate()
                .read.format("csv")
                .options(header="true")
                .load(self._get_fs_path(context.asset_key))
            )
        else:
            raise ValueError("Unexpected input type")


# io_manager_end

# build_assets_job_start

spark_weather_job = build_assets_job(
    "spark_weather",
    assets=[daily_temperature_highs, hottest_dates, daily_temperature_high_diffs],
    source_assets=[sfo_q2_weather_sample],
    resource_defs={
        "io_manager": IOManagerDefinition.hardcoded_io_manager(LocalFileSystemIOManager())
    },
)

# build_assets_job_end