Esempio n. 1
0
def test_assets_with_same_partitioning():
    partitions_def = StaticPartitionsDefinition(["a", "b", "c", "d"])

    @asset(partitions_def=partitions_def)
    def upstream_asset():
        pass

    @asset(partitions_def=partitions_def)
    def downstream_asset(upstream_asset):
        assert upstream_asset

    assert (get_upstream_partitions_for_partition_range(
        downstream_asset,
        upstream_asset,
        AssetKey("upstream_asset"),
        PartitionKeyRange("a", "c"),
    ) == PartitionKeyRange("a", "c"))

    assert (get_downstream_partitions_for_partition_range(
        downstream_asset,
        upstream_asset,
        AssetKey("upstream_asset"),
        PartitionKeyRange("a", "c"),
    ) == PartitionKeyRange("a", "c"))
Esempio n. 2
0

@asset
def asset_one():
    return 1


@asset
def asset_two(asset_one):  # pylint: disable=redefined-outer-name,unused-argument
    return first_asset + 1


two_assets_job = build_assets_job(name="two_assets_job",
                                  assets=[asset_one, asset_two])

static_partitions_def = StaticPartitionsDefinition(["a", "b", "c", "d"])


@asset(partitions_def=static_partitions_def)
def upstream_static_partitioned_asset():
    return 1


@asset(partitions_def=static_partitions_def)
def downstream_static_partitioned_asset(upstream_static_partitioned_asset, ):  # pylint: disable=redefined-outer-name
    assert upstream_static_partitioned_asset


static_partitioned_assets_job = build_assets_job(
    "static_partitioned_assets_job",
    assets=[
def test_filter_mapping_partitions_dep():
    downstream_partitions = ["john", "ringo", "paul", "george"]
    upstream_partitions = [
        f"{hemisphere}|{beatle}" for beatle in downstream_partitions
        for hemisphere in ["southern", "northern"]
    ]
    downstream_partitions_def = StaticPartitionsDefinition(
        downstream_partitions)
    upstream_partitions_def = StaticPartitionsDefinition(upstream_partitions)

    class HemisphereFilteringPartitionMapping(PartitionMapping):
        def __init__(self, hemisphere: str):
            self.hemisphere = hemisphere

        def get_upstream_partitions_for_partition_range(
                self,
                downstream_partition_key_range: PartitionKeyRange,
                downstream_partitions_def: PartitionsDefinition,  # pylint: disable=unused-argument
                upstream_partitions_def: PartitionsDefinition,  # pylint: disable=unused-argument
        ) -> PartitionKeyRange:
            return PartitionKeyRange(
                f"{self.hemisphere}|{downstream_partition_key_range.start}",
                f"{self.hemisphere}|{downstream_partition_key_range.end}",
            )

        def get_downstream_partitions_for_partition_range(
                self,
                upstream_partition_key_range: PartitionKeyRange,
                downstream_partitions_def: PartitionsDefinition,  # pylint: disable=unused-argument
                upstream_partitions_def: PartitionsDefinition,  # pylint: disable=unused-argument
        ) -> PartitionKeyRange:
            return PartitionKeyRange(
                upstream_partition_key_range.start.split("|")[-1],
                upstream_partition_key_range.end.split("|")[-1],
            )

    @asset(partitions_def=upstream_partitions_def)
    def upstream_asset():
        pass

    @asset(
        partitions_def=downstream_partitions_def,
        partition_mappings={
            "upstream_asset": HemisphereFilteringPartitionMapping("southern")
        },
    )
    def downstream_asset(upstream_asset):
        assert upstream_asset

    assert get_upstream_partitions_for_partition_range(
        downstream_asset,
        upstream_asset,
        AssetKey("upstream_asset"),
        PartitionKeyRange("ringo", "paul"),
    ) == PartitionKeyRange("southern|ringo", "southern|paul")

    assert get_downstream_partitions_for_partition_range(
        downstream_asset,
        upstream_asset,
        AssetKey("upstream_asset"),
        PartitionKeyRange("southern|ringo", "southern|paul"),
    ) == PartitionKeyRange("ringo", "paul")
def test_access_partition_keys_from_context_non_identity_partition_mapping():
    upstream_partitions_def = StaticPartitionsDefinition(["1", "2", "3"])
    downstream_partitions_def = StaticPartitionsDefinition(["1", "2", "3"])

    class TrailingWindowPartitionMapping(PartitionMapping):
        """
        Maps each downstream partition to two partitions in the upstream asset: itself and the
        preceding partition.
        """
        def get_upstream_partitions_for_partition_range(
            self,
            downstream_partition_key_range: PartitionKeyRange,
            downstream_partitions_def: PartitionsDefinition,
            upstream_partitions_def: PartitionsDefinition,
        ) -> PartitionKeyRange:
            assert downstream_partitions_def
            assert upstream_partitions_def

            start, end = downstream_partition_key_range
            return PartitionKeyRange(str(max(1, int(start) - 1)), end)

        def get_downstream_partitions_for_partition_range(
            self,
            upstream_partition_key_range: PartitionKeyRange,
            downstream_partitions_def: PartitionsDefinition,
            upstream_partitions_def: PartitionsDefinition,
        ) -> PartitionKeyRange:
            raise NotImplementedError()

    class MyIOManager(IOManager):
        def handle_output(self, context, obj):
            assert context.asset_partition_key == "2"

        def load_input(self, context):
            start, end = context.asset_partition_key_range
            assert start, end == ("1", "2")

    @asset(partitions_def=upstream_partitions_def)
    def upstream_asset(context):
        assert context.output_asset_partition_key() == "2"

    @asset(
        partitions_def=downstream_partitions_def,
        partition_mappings={
            "upstream_asset": TrailingWindowPartitionMapping()
        },
    )
    def downstream_asset(context, upstream_asset):
        assert context.output_asset_partition_key() == "2"
        assert upstream_asset is None

    my_job = build_assets_job(
        "my_job",
        assets=[upstream_asset, downstream_asset],
        resource_defs={
            "io_manager":
            IOManagerDefinition.hardcoded_io_manager(MyIOManager())
        },
    )
    result = my_job.execute_in_process(partition_key="2")
    assert result.asset_materializations_for_node("upstream_asset") == [
        AssetMaterialization(AssetKey(["upstream_asset"]), partition="2")
    ]
    assert result.asset_materializations_for_node("downstream_asset") == [
        AssetMaterialization(AssetKey(["downstream_asset"]), partition="2")
    ]
Esempio n. 5
0
def test_invalid_partition_key():
    with pytest.raises(DagsterInvalidDefinitionError, match="'...'"):
        StaticPartitionsDefinition(["foo", "foo...bar"])
Esempio n. 6
0
def test_static_partitions(partition_keys: List[str]):
    static_partitions = StaticPartitionsDefinition(partition_keys)

    assert [(p.name, p.value) for p in static_partitions.get_partitions()
            ] == [(p, p) for p in partition_keys]
    assert static_partitions.get_partition_keys() == partition_keys
Esempio n. 7
0
 def get_partitions_definition(self):
     return StaticPartitionsDefinition(self.partition_keys)