Example #1
0
def test_input_name_matches_output_name():
    not_result = SourceAsset(key=AssetKey("not_result"), description=None)

    @asset(ins={"result": AssetIn(asset_key=AssetKey("not_result"))})
    def something(result):  # pylint: disable=unused-argument
        pass

    assets_job = build_assets_job("assets_job", [something], source_assets=[not_result])
    external_asset_nodes = external_asset_graph_from_defs([assets_job], source_assets_by_key={})

    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("not_result"),
            dependencies=[],
            depended_by=[
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey("something"), input_name="result"
                )
            ],
            job_names=[],
        ),
        ExternalAssetNode(
            asset_key=AssetKey("something"),
            dependencies=[
                ExternalAssetDependency(
                    upstream_asset_key=AssetKey("not_result"), input_name="result"
                )
            ],
            depended_by=[],
            op_name="something",
            output_name="result",
            job_names=["assets_job"],
        ),
    ]
Example #2
0
def test_input_namespace():
    @asset(ins={"arg1": AssetIn(namespace="abc")})
    def my_asset(arg1):
        assert arg1

    assert my_asset.op.input_defs[0].get_asset_key(None) == AssetKey(
        ["abc", "arg1"])
Example #3
0
def test_asset_group_from_list():
    @asset
    def asset_foo():
        return "foo"

    @asset
    def asset_bar():
        return "bar"

    @asset(ins={"asset_bar": AssetIn(asset_key=AssetKey("asset_foo"))})
    def last_asset(asset_bar):
        return asset_bar

    group = AssetGroup(assets=[asset_foo, asset_bar, last_asset])

    @repository
    def the_repo():
        return [group]

    assert len(the_repo.get_all_jobs()) == 1
    asset_group_underlying_job = the_repo.get_all_jobs()[0]
    assert asset_group_underlying_job.name == group.all_assets_job_name

    result = asset_group_underlying_job.execute_in_process()
    assert result.success
Example #4
0
def test_input_asset_key_and_namespace():
    with pytest.raises(check.CheckError,
                       match="key and namespace cannot both be set"):

        @asset(
            ins={"arg1": AssetIn(asset_key=AssetKey("foo"), namespace="bar")})
        def my_asset(arg1):
            assert arg1
Example #5
0
def test_asset_key_output():
    @asset
    def asset1():
        return 1

    @asset(ins={"hello": AssetIn(asset_key=AssetKey("asset1"))})
    def asset2(hello):
        return hello

    job = build_assets_job("boo", [asset1, asset2])
    result = job.execute_in_process()
    assert result.success
    assert result.output_for_node("asset2") == 1
Example #6
0
def test_asset_key_for_asset_with_namespace_str():
    @asset(namespace="hello")
    def asset_foo():
        return "foo"

    @asset(ins={"foo": AssetIn(asset_key=AssetKey(["hello", "asset_foo"]))})
    def success_asset(foo):
        return foo

    job = build_assets_job("lol", [asset_foo, success_asset])

    result = job.execute_in_process()
    assert result.success
    assert result.output_for_node("success_asset") == "foo"
Example #7
0
def test_asset_key_for_asset_with_namespace():
    @asset(namespace="hello")
    def asset_foo():
        return "foo"

    @asset(
        ins={"foo": AssetIn(asset_key=AssetKey("asset_foo"))}
    )  # Should fail because asset_foo is defined with namespace, so has asset key ["hello", "asset_foo"]
    def failing_asset(foo):
        pass

    with pytest.raises(DagsterInvalidDefinitionError, ):
        build_assets_job("lol", [asset_foo, failing_asset])

    @asset(ins={"foo": AssetIn(asset_key=AssetKey(["hello", "asset_foo"]))})
    def success_asset(foo):
        return foo

    job = build_assets_job("lol", [asset_foo, success_asset])

    result = job.execute_in_process()
    assert result.success
    assert result.output_for_node("success_asset") == "foo"
Example #8
0
def get_assets_job(io_manager_def):
    asset1_namespace = ["one", "two", "three"]

    @asset(namespace=["one", "two", "three"])
    def asset1():
        return [1, 2, 3]

    @asset(namespace=["four", "five"],
           ins={"asset1": AssetIn(namespace=asset1_namespace)})
    def asset2(asset1):
        return asset1 + [4]

    return build_assets_job(name="a",
                            assets=[asset1, asset2],
                            resource_defs={"io_manager": io_manager_def})
Example #9
0
def test_asset_key_and_inferred():
    @asset
    def asset_foo():
        return 2

    @asset
    def asset_bar():
        return 5

    @asset(ins={"foo": AssetIn(asset_key=AssetKey("asset_foo"))})
    def asset_baz(foo, asset_bar):
        return foo + asset_bar

    job = build_assets_job("hello", [asset_foo, asset_bar, asset_baz])
    result = job.execute_in_process()
    assert result.success
    assert result.output_for_node("asset_baz") == 7
Example #10
0
def test_asset_key_matches_input_name():
    @asset
    def asset_foo():
        return "foo"

    @asset
    def asset_bar():
        return "bar"

    @asset(ins={"asset_bar": AssetIn(asset_key=AssetKey("asset_foo"))}
           )  # should still use output from asset_foo
    def last_asset(asset_bar):
        return asset_bar

    job = build_assets_job("lol", [asset_foo, asset_bar, last_asset])
    result = job.execute_in_process()
    assert result.success
    assert result.output_for_node("last_asset") == "foo"
Example #11
0
# pylint: disable=redefined-outer-name
from dagster.core.asset_defs import AssetIn, asset, build_assets_job

namespace1 = ["s3", "superdomain_1", "subdomain_1", "subsubdomain_1"]


@asset(namespace=namespace1)
def asset1():
    pass


@asset(
    namespace=["s3", "superdomain_2", "subdomain_2", "subsubdomain_2"],
    ins={"asset1": AssetIn(namespace=namespace1)},
)
def asset2(asset1):
    assert asset1 is None


long_asset_keys_job = build_assets_job("long_asset_keys_job",
                                       assets=[asset1, asset2])
Example #12
0
def test_input_asset_key():
    @asset(ins={"arg1": AssetIn(asset_key=AssetKey("foo"))})
    def my_asset(arg1):
        assert arg1

    assert my_asset.op.input_defs[0].get_asset_key(None) == AssetKey("foo")
Example #13
0
def test_unknown_in():
    with pytest.raises(DagsterInvalidDefinitionError):

        @asset(ins={"arg1": AssetIn()})
        def _my_asset():
            pass
Example #14
0
def test_input_metadata():
    @asset(ins={"arg1": AssetIn(metadata={"abc": 123})})
    def my_asset(arg1):
        assert arg1

    assert my_asset.op.input_defs[0].metadata == {"abc": 123}
Example #15
0
    svd = TruncatedSVD(n_components=n_components)
    svd.fit(user_story_matrix.matrix)

    total_explained_variance = svd.explained_variance_ratio_.sum()

    yield Output(
        svd,
        metadata={
            "Total explained variance ratio": total_explained_variance,
            "Number of components": n_components,
        },
    )


@asset(
    ins={"stories": AssetIn(metadata={"columns": ["id", "title"]})},
    io_manager_key="warehouse_io_manager",
)
def component_top_stories(
    recommender_model: TruncatedSVD, user_story_matrix: IndexedCooMatrix, stories: DataFrame
):
    """
    For each component in the collaborative filtering model, the titles of the top stories
    it's associated with.
    """
    n_stories = 10

    components_column = []
    titles_column = []

    story_titles = stories.set_index("id")