コード例 #1
0
def test_input_name_matches_output_name():
    not_result = SourceAsset(key=AssetKey("not_result"), description=None)

    @asset(ins={"result": AssetIn(asset_key=AssetKey("not_result"))})
    def something(result):  # pylint: disable=unused-argument
        pass

    assets_job = build_assets_job("assets_job", [something], source_assets=[not_result])
    external_asset_nodes = external_asset_graph_from_defs([assets_job], source_assets_by_key={})

    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("not_result"),
            dependencies=[],
            depended_by=[
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey("something"), input_name="result"
                )
            ],
            job_names=[],
        ),
        ExternalAssetNode(
            asset_key=AssetKey("something"),
            dependencies=[
                ExternalAssetDependency(
                    upstream_asset_key=AssetKey("not_result"), input_name="result"
                )
            ],
            depended_by=[],
            op_name="something",
            output_name="result",
            job_names=["assets_job"],
        ),
    ]
コード例 #2
0
def test_unused_foreign_asset():
    foo = ForeignAsset(key=AssetKey("foo"), description="abc")
    bar = ForeignAsset(key=AssetKey("bar"), description="def")

    external_asset_nodes = external_asset_graph_from_defs(
        [], foreign_assets_by_key={
            AssetKey("foo"): foo,
            AssetKey("bar"): bar
        })
    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("foo"),
            op_description="abc",
            dependencies=[],
            depended_by=[],
            job_names=[],
        ),
        ExternalAssetNode(
            asset_key=AssetKey("bar"),
            op_description="def",
            dependencies=[],
            depended_by=[],
            job_names=[],
        ),
    ]
コード例 #3
0
def test_source_asset_with_op():

    foo = SourceAsset(key=AssetKey("foo"), description=None)

    @asset
    def bar(foo):  # pylint: disable=unused-argument
        pass

    assets_job = build_assets_job("assets_job", [bar], source_assets=[foo])

    external_asset_nodes = external_asset_graph_from_defs([assets_job], source_assets_by_key={})
    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("foo"),
            op_description=None,
            dependencies=[],
            depended_by=[ExternalAssetDependedBy(AssetKey("bar"), input_name="foo")],
            job_names=[],
        ),
        ExternalAssetNode(
            asset_key=AssetKey("bar"),
            op_name="bar",
            op_description=None,
            dependencies=[ExternalAssetDependency(AssetKey("foo"), input_name="foo")],
            depended_by=[],
            job_names=["assets_job"],
            output_name="result",
        ),
    ]
コード例 #4
0
def test_used_source_asset():
    bar = SourceAsset(key=AssetKey("bar"), description="def")

    @asset
    def foo(bar):
        assert bar

    job1 = build_assets_job("job1", [foo], source_assets=[bar])

    external_asset_nodes = external_asset_graph_from_defs(
        [job1], source_assets_by_key={AssetKey("bar"): bar}
    )
    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("bar"),
            op_description="def",
            dependencies=[],
            depended_by=[
                ExternalAssetDependedBy(downstream_asset_key=AssetKey(["foo"]), input_name="bar")
            ],
            job_names=[],
        ),
        ExternalAssetNode(
            asset_key=AssetKey("foo"),
            op_name="foo",
            op_description=None,
            dependencies=[
                ExternalAssetDependency(upstream_asset_key=AssetKey(["bar"]), input_name="bar")
            ],
            depended_by=[],
            job_names=["job1"],
            output_name="result",
            output_description=None,
        ),
    ]
コード例 #5
0
def test_two_downstream_assets_job():
    @asset
    def asset1():
        return 1

    @asset
    def asset2_a(asset1):
        assert asset1 == 1

    @asset
    def asset2_b(asset1):
        assert asset1 == 1

    assets_job = build_assets_job("assets_job", [asset1, asset2_a, asset2_b])
    external_asset_nodes = external_asset_graph_from_defs([assets_job], source_assets_by_key={})

    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("asset1"),
            dependencies=[],
            depended_by=[
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey("asset2_a"), input_name="asset1"
                ),
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey("asset2_b"), input_name="asset1"
                ),
            ],
            op_name="asset1",
            op_description=None,
            job_names=["assets_job"],
            output_name="result",
            output_description=None,
        ),
        ExternalAssetNode(
            asset_key=AssetKey("asset2_a"),
            dependencies=[
                ExternalAssetDependency(upstream_asset_key=AssetKey("asset1"), input_name="asset1")
            ],
            depended_by=[],
            op_name="asset2_a",
            op_description=None,
            job_names=["assets_job"],
            output_name="result",
            output_description=None,
        ),
        ExternalAssetNode(
            asset_key=AssetKey("asset2_b"),
            dependencies=[
                ExternalAssetDependency(upstream_asset_key=AssetKey("asset1"), input_name="asset1")
            ],
            depended_by=[],
            op_name="asset2_b",
            op_description=None,
            job_names=["assets_job"],
            output_name="result",
            output_description=None,
        ),
    ]
コード例 #6
0
def test_same_asset_in_multiple_pipelines():
    @asset
    def asset1():
        return 1

    @pipeline
    def graph1():
        asset1()

    @pipeline
    def graph2():
        asset1()

    external_asset_nodes = external_asset_graph_from_defs(
        [graph1, graph2], foreign_assets_by_key={})

    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("asset1"),
            dependencies=[],
            depended_by=[],
            op_name="asset1",
            op_description=None,
            job_names=["graph1", "graph2"],
        ),
    ]
コード例 #7
0
def test_basic_multi_asset():
    @multi_asset(
        outs={
            f"out{i}": Out(description=f"foo: {i}", asset_key=AssetKey(f"asset{i}"))
            for i in range(10)
        }
    )
    def assets():
        pass

    assets_job = build_assets_job("assets_job", [assets])

    external_asset_nodes = external_asset_graph_from_defs([assets_job], source_assets_by_key={})

    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey(f"asset{i}"),
            dependencies=[],
            depended_by=[],
            op_name="assets",
            op_description=None,
            job_names=["assets_job"],
            output_name=f"out{i}",
            output_description=f"foo: {i}",
        )
        for i in range(10)
    ]
コード例 #8
0
def test_cross_pipeline_asset_dependency():
    @asset
    def asset1():
        return 1

    @asset
    def asset2(asset1):
        assert asset1 == 1

    @pipeline
    def asset1_graph():
        asset1()

    @pipeline
    def asset2_graph():
        asset2()  # pylint: disable=no-value-for-parameter

    external_asset_nodes = external_asset_graph_from_defs(
        [asset1_graph, asset2_graph], foreign_assets_by_key={})

    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("asset1"),
            dependencies=[],
            depended_by=[
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey("asset2"),
                    input_name="asset1")
            ],
            op_name="asset1",
            op_description=None,
            job_names=["asset1_graph"],
        ),
        ExternalAssetNode(
            asset_key=AssetKey("asset2"),
            dependencies=[
                ExternalAssetDependency(upstream_asset_key=AssetKey("asset1"),
                                        input_name="asset1")
            ],
            depended_by=[],
            op_name="asset2",
            op_description=None,
            job_names=["asset2_graph"],
        ),
    ]
コード例 #9
0
def test_cross_job_asset_dependency():
    @asset
    def asset1():
        return 1

    @asset
    def asset2(asset1):
        assert asset1 == 1

    assets_job1 = build_assets_job("assets_job1", [asset1])
    assets_job2 = build_assets_job("assets_job2", [asset2],
                                   source_assets=[asset1])
    external_asset_nodes = external_asset_graph_from_defs(
        [assets_job1, assets_job2], source_assets_by_key={})

    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("asset1"),
            dependencies=[],
            depended_by=[
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey("asset2"),
                    input_name="asset1")
            ],
            op_name="asset1",
            op_description=None,
            job_names=["assets_job1"],
            output_name="result",
            output_description=None,
        ),
        ExternalAssetNode(
            asset_key=AssetKey("asset2"),
            dependencies=[
                ExternalAssetDependency(upstream_asset_key=AssetKey("asset1"),
                                        input_name="asset1")
            ],
            depended_by=[],
            op_name="asset2",
            op_description=None,
            job_names=["assets_job2"],
            output_name="result",
            output_description=None,
        ),
    ]
コード例 #10
0
def test_two_asset_pipeline():
    @asset
    def asset1():
        return 1

    @asset
    def asset2(asset1):
        assert asset1 == 1

    @pipeline
    def my_graph():
        asset2(asset1())

    external_asset_nodes = external_asset_graph_from_defs(
        [my_graph], foreign_assets_by_key={})

    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("asset1"),
            dependencies=[],
            depended_by=[
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey("asset2"),
                    input_name="asset1")
            ],
            op_name="asset1",
            op_description=None,
            job_names=["my_graph"],
        ),
        ExternalAssetNode(
            asset_key=AssetKey("asset2"),
            dependencies=[
                ExternalAssetDependency(upstream_asset_key=AssetKey("asset1"),
                                        input_name="asset1")
            ],
            depended_by=[],
            op_name="asset2",
            op_description=None,
            job_names=["my_graph"],
        ),
    ]
コード例 #11
0
def test_used_foreign_asset():
    bar = ForeignAsset(key=AssetKey("bar"), description="def")

    @asset
    def foo(bar):
        assert bar

    @job
    def job1():
        foo()  # pylint: disable=no-value-for-parameter

    external_asset_nodes = external_asset_graph_from_defs(
        [job1], foreign_assets_by_key={AssetKey("bar"): bar})
    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("bar"),
            op_description="def",
            dependencies=[],
            depended_by=[
                ExternalAssetDependedBy(downstream_asset_key=AssetKey(["foo"]),
                                        input_name="bar")
            ],
            job_names=[],
        ),
        ExternalAssetNode(
            asset_key=AssetKey("foo"),
            op_name="foo",
            op_description=None,
            dependencies=[
                ExternalAssetDependency(upstream_asset_key=AssetKey(["bar"]),
                                        input_name="bar")
            ],
            depended_by=[],
            job_names=["job1"],
        ),
    ]
コード例 #12
0
def test_single_asset_job():
    @asset
    def asset1():
        return 1

    assets_job = build_assets_job("assets_job", [asset1])
    external_asset_nodes = external_asset_graph_from_defs([assets_job], source_assets_by_key={})

    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("asset1"),
            dependencies=[],
            depended_by=[],
            op_name="asset1",
            op_description=None,
            job_names=["assets_job"],
            output_name="result",
            output_description=None,
        )
    ]
コード例 #13
0
def test_single_asset_pipeline():
    @asset
    def asset1():
        return 1

    @pipeline
    def my_graph():
        asset1()

    external_asset_nodes = external_asset_graph_from_defs(
        [my_graph], foreign_assets_by_key={})

    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("asset1"),
            dependencies=[],
            depended_by=[],
            op_name="asset1",
            op_description=None,
            job_names=["my_graph"],
        )
    ]
コード例 #14
0
def test_same_asset_in_multiple_pipelines():
    @asset
    def asset1():
        return 1

    job1 = build_assets_job("job1", [asset1])
    job2 = build_assets_job("job2", [asset1])

    external_asset_nodes = external_asset_graph_from_defs([job1, job2], source_assets_by_key={})

    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("asset1"),
            dependencies=[],
            depended_by=[],
            op_name="asset1",
            op_description=None,
            job_names=["job1", "job2"],
            output_name="result",
            output_description=None,
        ),
    ]
コード例 #15
0
def test_inter_op_dependency():
    @asset
    def in1():
        pass

    @asset
    def in2():
        pass

    @asset
    def downstream(only_in, mixed, only_out):  # pylint: disable=unused-argument
        pass

    @multi_asset(
        outs={"only_in": Out(), "mixed": Out(), "only_out": Out()},
        internal_asset_deps={
            "mixed": {AssetKey("in1"), AssetKey("only_in")},
            "only_out": {AssetKey("only_in"), AssetKey("mixed")},
        },
    )
    def assets(in1, in2):  # pylint: disable=unused-argument
        pass

    assets_job = build_assets_job("assets_job", [in1, in2, assets, downstream])

    external_asset_nodes = external_asset_graph_from_defs([assets_job], source_assets_by_key={})
    # sort so that test is deterministic
    sorted_nodes = sorted(
        [
            node._replace(
                dependencies=sorted(node.dependencies, key=lambda d: d.upstream_asset_key),
                depended_by=sorted(node.depended_by, key=lambda d: d.downstream_asset_key),
            )
            for node in external_asset_nodes
        ],
        key=lambda n: n.asset_key,
    )

    assert sorted_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey(["downstream"]),
            dependencies=[
                ExternalAssetDependency(upstream_asset_key=AssetKey(["mixed"]), input_name="mixed"),
                ExternalAssetDependency(
                    upstream_asset_key=AssetKey(["only_in"]), input_name="only_in"
                ),
                ExternalAssetDependency(
                    upstream_asset_key=AssetKey(["only_out"]), input_name="only_out"
                ),
            ],
            depended_by=[],
            op_name="downstream",
            op_description=None,
            job_names=["assets_job"],
            output_name="result",
            metadata_entries=[],
        ),
        ExternalAssetNode(
            asset_key=AssetKey(["in1"]),
            dependencies=[],
            depended_by=[
                ExternalAssetDependedBy(downstream_asset_key=AssetKey(["mixed"]), input_name="in1"),
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey(["only_in"]), input_name="in1"
                ),
            ],
            op_name="in1",
            op_description=None,
            job_names=["assets_job"],
            output_name="result",
            metadata_entries=[],
        ),
        ExternalAssetNode(
            asset_key=AssetKey(["in2"]),
            dependencies=[],
            depended_by=[
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey(["only_in"]), input_name="in2"
                )
            ],
            op_name="in2",
            op_description=None,
            job_names=["assets_job"],
            output_name="result",
            metadata_entries=[],
        ),
        ExternalAssetNode(
            asset_key=AssetKey(["mixed"]),
            dependencies=[
                ExternalAssetDependency(upstream_asset_key=AssetKey(["in1"]), input_name="in1"),
                ExternalAssetDependency(
                    upstream_asset_key=AssetKey(["only_in"]), output_name="only_in"
                ),
            ],
            depended_by=[
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey(["downstream"]), input_name="mixed"
                ),
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey(["only_out"]), output_name="mixed"
                ),
            ],
            op_name="assets",
            op_description=None,
            job_names=["assets_job"],
            output_name="mixed",
            metadata_entries=[
                MetadataEntry(
                    label=".dagster/asset_deps",
                    description=None,
                    entry_data=MetadataValue.text("[set] (unserializable)"),
                )
            ],
        ),
        ExternalAssetNode(
            asset_key=AssetKey(["only_in"]),
            dependencies=[
                ExternalAssetDependency(upstream_asset_key=AssetKey(["in1"]), input_name="in1"),
                ExternalAssetDependency(upstream_asset_key=AssetKey(["in2"]), input_name="in2"),
            ],
            depended_by=[
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey(["downstream"]), input_name="only_in"
                ),
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey(["mixed"]), output_name="only_in"
                ),
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey(["only_out"]), output_name="only_in"
                ),
            ],
            op_name="assets",
            op_description=None,
            job_names=["assets_job"],
            output_name="only_in",
            metadata_entries=[],
        ),
        ExternalAssetNode(
            asset_key=AssetKey(["only_out"]),
            dependencies=[
                ExternalAssetDependency(
                    upstream_asset_key=AssetKey(["mixed"]), output_name="mixed"
                ),
                ExternalAssetDependency(
                    upstream_asset_key=AssetKey(["only_in"]), output_name="only_in"
                ),
            ],
            depended_by=[
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey(["downstream"]), input_name="only_out"
                ),
            ],
            op_name="assets",
            op_description=None,
            job_names=["assets_job"],
            output_name="only_out",
            metadata_entries=[
                MetadataEntry(
                    label=".dagster/asset_deps",
                    description=None,
                    entry_data=MetadataValue.text("[set] (unserializable)"),
                )
            ],
        ),
    ]
コード例 #16
0
    def _build_cross_repo_deps(
        self,
    ) -> Tuple[Dict[AssetKey, ExternalAssetNode], Dict[Tuple[str, str], Dict[
            AssetKey, List[ExternalAssetDependedBy]]], ]:
        """
        This method constructs a sink asset as an ExternalAssetNode for every asset immediately
        downstream of a source asset that is defined in another repository as a derived asset.

        In Dagit, sink assets will display as ForeignAssets, which are external from the repository.

        This method also stores a mapping from source asset key to ExternalAssetDependedBy nodes
        that depend on the asset with that key. When get_cross_repo_dependent_assets is called with a derived
        asset's asset key and its location, all dependent ExternalAssetDependedBy nodes are returned.
        """
        depended_by_assets_by_source_asset: Dict[
            AssetKey, List[ExternalAssetDependedBy]] = {}

        map_defined_asset_to_location: Dict[AssetKey, Tuple[str, str]] = {
        }  # key is asset key, value is tuple (location_name, repo_name)

        external_asset_node_by_asset_key: Dict[AssetKey, ExternalAssetNode] = {
        }  # only contains derived assets
        for location in self._context.repository_locations:
            repositories = location.get_repositories()
            for repo_name, external_repo in repositories.items():
                asset_nodes = external_repo.get_external_asset_nodes()
                for asset_node in asset_nodes:
                    if not asset_node.op_name:  # is source asset
                        if asset_node.asset_key not in depended_by_assets_by_source_asset:
                            depended_by_assets_by_source_asset[
                                asset_node.asset_key] = []
                        depended_by_assets_by_source_asset[
                            asset_node.asset_key].extend(
                                asset_node.depended_by)
                    else:
                        map_defined_asset_to_location[asset_node.asset_key] = (
                            location.name,
                            repo_name,
                        )
                        external_asset_node_by_asset_key[
                            asset_node.asset_key] = asset_node

        sink_assets: Dict[AssetKey, ExternalAssetNode] = {}
        external_asset_deps: Dict[Tuple[str, str], Dict[
            AssetKey, List[ExternalAssetDependedBy]]] = (
                {}
            )  # nested dict that maps dependedby assets by asset key by location tuple (repo_location.name, repo_name)

        for source_asset, depended_by_assets in depended_by_assets_by_source_asset.items(
        ):
            asset_def_location = map_defined_asset_to_location.get(
                source_asset, None)
            if asset_def_location:  # source asset is defined as asset in another repository
                if asset_def_location not in external_asset_deps:
                    external_asset_deps[asset_def_location] = {}
                if source_asset not in external_asset_deps[asset_def_location]:
                    external_asset_deps[asset_def_location][source_asset] = []
                external_asset_deps[asset_def_location][source_asset].extend(
                    depended_by_assets)
                for asset in depended_by_assets:
                    # SourceAssets defined as ExternalAssetNodes contain no definition data (e.g.
                    # no output or partition definition data) and no job_names. Dagit displays
                    # all ExternalAssetNodes with no job_names as foreign assets, so sink assets
                    # are defined as ExternalAssetNodes with no definition data.
                    sink_assets[
                        asset.downstream_asset_key] = ExternalAssetNode(
                            asset_key=asset.downstream_asset_key,
                            dependencies=[
                                ExternalAssetDependency(
                                    upstream_asset_key=source_asset,
                                    input_name=asset.input_name,
                                    output_name=asset.output_name,
                                )
                            ],
                            depended_by=[],
                        )
        return sink_assets, external_asset_deps