def test_input_name_matches_output_name(): not_result = SourceAsset(key=AssetKey("not_result"), description=None) @asset(ins={"result": AssetIn(asset_key=AssetKey("not_result"))}) def something(result): # pylint: disable=unused-argument pass assets_job = build_assets_job("assets_job", [something], source_assets=[not_result]) external_asset_nodes = external_asset_graph_from_defs([assets_job], source_assets_by_key={}) assert external_asset_nodes == [ ExternalAssetNode( asset_key=AssetKey("not_result"), dependencies=[], depended_by=[ ExternalAssetDependedBy( downstream_asset_key=AssetKey("something"), input_name="result" ) ], job_names=[], ), ExternalAssetNode( asset_key=AssetKey("something"), dependencies=[ ExternalAssetDependency( upstream_asset_key=AssetKey("not_result"), input_name="result" ) ], depended_by=[], op_name="something", output_name="result", job_names=["assets_job"], ), ]
def test_input_namespace(): @asset(ins={"arg1": AssetIn(namespace="abc")}) def my_asset(arg1): assert arg1 assert my_asset.op.input_defs[0].get_asset_key(None) == AssetKey( ["abc", "arg1"])
def test_asset_group_from_list(): @asset def asset_foo(): return "foo" @asset def asset_bar(): return "bar" @asset(ins={"asset_bar": AssetIn(asset_key=AssetKey("asset_foo"))}) def last_asset(asset_bar): return asset_bar group = AssetGroup(assets=[asset_foo, asset_bar, last_asset]) @repository def the_repo(): return [group] assert len(the_repo.get_all_jobs()) == 1 asset_group_underlying_job = the_repo.get_all_jobs()[0] assert asset_group_underlying_job.name == group.all_assets_job_name result = asset_group_underlying_job.execute_in_process() assert result.success
def test_input_asset_key_and_namespace(): with pytest.raises(check.CheckError, match="key and namespace cannot both be set"): @asset( ins={"arg1": AssetIn(asset_key=AssetKey("foo"), namespace="bar")}) def my_asset(arg1): assert arg1
def test_asset_key_output(): @asset def asset1(): return 1 @asset(ins={"hello": AssetIn(asset_key=AssetKey("asset1"))}) def asset2(hello): return hello job = build_assets_job("boo", [asset1, asset2]) result = job.execute_in_process() assert result.success assert result.output_for_node("asset2") == 1
def test_asset_key_for_asset_with_namespace_str(): @asset(namespace="hello") def asset_foo(): return "foo" @asset(ins={"foo": AssetIn(asset_key=AssetKey(["hello", "asset_foo"]))}) def success_asset(foo): return foo job = build_assets_job("lol", [asset_foo, success_asset]) result = job.execute_in_process() assert result.success assert result.output_for_node("success_asset") == "foo"
def test_asset_key_for_asset_with_namespace(): @asset(namespace="hello") def asset_foo(): return "foo" @asset( ins={"foo": AssetIn(asset_key=AssetKey("asset_foo"))} ) # Should fail because asset_foo is defined with namespace, so has asset key ["hello", "asset_foo"] def failing_asset(foo): pass with pytest.raises(DagsterInvalidDefinitionError, ): build_assets_job("lol", [asset_foo, failing_asset]) @asset(ins={"foo": AssetIn(asset_key=AssetKey(["hello", "asset_foo"]))}) def success_asset(foo): return foo job = build_assets_job("lol", [asset_foo, success_asset]) result = job.execute_in_process() assert result.success assert result.output_for_node("success_asset") == "foo"
def get_assets_job(io_manager_def): asset1_namespace = ["one", "two", "three"] @asset(namespace=["one", "two", "three"]) def asset1(): return [1, 2, 3] @asset(namespace=["four", "five"], ins={"asset1": AssetIn(namespace=asset1_namespace)}) def asset2(asset1): return asset1 + [4] return build_assets_job(name="a", assets=[asset1, asset2], resource_defs={"io_manager": io_manager_def})
def test_asset_key_and_inferred(): @asset def asset_foo(): return 2 @asset def asset_bar(): return 5 @asset(ins={"foo": AssetIn(asset_key=AssetKey("asset_foo"))}) def asset_baz(foo, asset_bar): return foo + asset_bar job = build_assets_job("hello", [asset_foo, asset_bar, asset_baz]) result = job.execute_in_process() assert result.success assert result.output_for_node("asset_baz") == 7
def test_asset_key_matches_input_name(): @asset def asset_foo(): return "foo" @asset def asset_bar(): return "bar" @asset(ins={"asset_bar": AssetIn(asset_key=AssetKey("asset_foo"))} ) # should still use output from asset_foo def last_asset(asset_bar): return asset_bar job = build_assets_job("lol", [asset_foo, asset_bar, last_asset]) result = job.execute_in_process() assert result.success assert result.output_for_node("last_asset") == "foo"
# pylint: disable=redefined-outer-name from dagster.core.asset_defs import AssetIn, asset, build_assets_job namespace1 = ["s3", "superdomain_1", "subdomain_1", "subsubdomain_1"] @asset(namespace=namespace1) def asset1(): pass @asset( namespace=["s3", "superdomain_2", "subdomain_2", "subsubdomain_2"], ins={"asset1": AssetIn(namespace=namespace1)}, ) def asset2(asset1): assert asset1 is None long_asset_keys_job = build_assets_job("long_asset_keys_job", assets=[asset1, asset2])
def test_input_asset_key(): @asset(ins={"arg1": AssetIn(asset_key=AssetKey("foo"))}) def my_asset(arg1): assert arg1 assert my_asset.op.input_defs[0].get_asset_key(None) == AssetKey("foo")
def test_unknown_in(): with pytest.raises(DagsterInvalidDefinitionError): @asset(ins={"arg1": AssetIn()}) def _my_asset(): pass
def test_input_metadata(): @asset(ins={"arg1": AssetIn(metadata={"abc": 123})}) def my_asset(arg1): assert arg1 assert my_asset.op.input_defs[0].metadata == {"abc": 123}
svd = TruncatedSVD(n_components=n_components) svd.fit(user_story_matrix.matrix) total_explained_variance = svd.explained_variance_ratio_.sum() yield Output( svd, metadata={ "Total explained variance ratio": total_explained_variance, "Number of components": n_components, }, ) @asset( ins={"stories": AssetIn(metadata={"columns": ["id", "title"]})}, io_manager_key="warehouse_io_manager", ) def component_top_stories( recommender_model: TruncatedSVD, user_story_matrix: IndexedCooMatrix, stories: DataFrame ): """ For each component in the collaborative filtering model, the titles of the top stories it's associated with. """ n_stories = 10 components_column = [] titles_column = [] story_titles = stories.set_index("id")