コード例 #1
0
def test_assets_job_with_different_partitions_defs():
    with pytest.raises(DagsterInvalidDefinitionError):

        @asset(partitions_def=StaticPartitionsDefinition(["a", "b", "c"]))
        def upstream():
            pass

        @asset(partitions_def=StaticPartitionsDefinition(["a", "b", "c", "d"]))
        def downstream(upstream):
            assert upstream is None

        build_assets_job("my_job", assets=[upstream, downstream])
コード例 #2
0
def test_io_manager():
    df_value = pandas.DataFrame({"foo": ["bar", "baz"], "quux": [1, 2]})

    @asset(partitions_def=hourly_partitions)
    def pandas_df_asset():
        return df_value

    @asset(partitions_def=hourly_partitions)
    def spark_input_asset(pandas_df_asset: SparkDF):
        assert isinstance(pandas_df_asset, SparkDF)
        assert pandas_df_asset.count() == 2
        assert set(pandas_df_asset.columns) == {"foo", "quux"}
        return pandas_df_asset

    with tempfile.TemporaryDirectory() as temp_dir:
        io_manager_test_job = build_assets_job(
            "io_manager_test_job",
            assets=[pandas_df_asset, spark_input_asset],
            resource_defs={
                "pyspark":
                pyspark_resource,
                "io_manager":
                local_partitioned_parquet_io_manager.configured(
                    {"base_path": temp_dir}),
            },
        )

        expected_path = os.path.join(
            temp_dir, "pandas_df_asset-20220101160000_20220101170000.pq")
        res = io_manager_test_job.execute_in_process(
            partition_key="2022-01-01-16:00")
        assert res.success
        assert os.path.exists(expected_path)
        intermediate_df = pandas.read_parquet(expected_path)
        assert all(intermediate_df == df_value)
コード例 #3
0
def test_source_asset_partitions():
    hourly_asset = SourceAsset(
        AssetKey("hourly_asset"),
        partitions_def=HourlyPartitionsDefinition(
            start_date="2021-05-05-00:00"),
    )

    @asset(partitions_def=DailyPartitionsDefinition(start_date="2021-05-05"))
    def daily_asset(hourly_asset):
        assert hourly_asset is None

    class CustomIOManager(IOManager):
        def handle_output(self, context, obj):
            pass

        def load_input(self, context):
            key_range = context.asset_partition_key_range
            assert key_range.start == "2021-06-06-00:00"
            assert key_range.end == "2021-06-06-23:00"

    daily_job = build_assets_job(
        name="daily_job",
        assets=[daily_asset],
        source_assets=[hourly_asset],
        resource_defs={
            "io_manager":
            IOManagerDefinition.hardcoded_io_manager(CustomIOManager())
        },
    )
    assert daily_job.execute_in_process(partition_key="2021-06-06").success
コード例 #4
0
ファイル: test_asset_defs.py プロジェクト: trevenrawr/dagster
def test_select_from_manifest(dbt_seed, conn_string, test_project_dir,
                              dbt_config_dir):  # pylint: disable=unused-argument

    manifest_path = file_relative_path(__file__, "sample_manifest.json")
    with open(manifest_path, "r") as f:
        manifest_json = json.load(f)
    dbt_assets = load_assets_from_dbt_manifest(
        manifest_json,
        selected_unique_ids={
            "model.dagster_dbt_test_project.sort_by_calories",
            "model.dagster_dbt_test_project.least_caloric",
        },
    )

    result = build_assets_job(
        "test_job",
        dbt_assets,
        resource_defs={
            "dbt":
            dbt_cli_resource.configured({
                "project_dir": test_project_dir,
                "profiles_dir": dbt_config_dir
            })
        },
    ).execute_in_process()

    assert result.success
    materializations = [
        event.event_specific_data.materialization
        for event in result.events_for_node(dbt_assets[0].op.name)
        if event.event_type_value == "ASSET_MATERIALIZATION"
    ]
    assert len(materializations) == 2
コード例 #5
0
ファイル: test_asset_defs.py プロジェクト: trevenrawr/dagster
def test_node_info_to_asset_key(dbt_seed, conn_string, test_project_dir,
                                dbt_config_dir):  # pylint: disable=unused-argument
    dbt_assets = load_assets_from_dbt_project(
        test_project_dir,
        dbt_config_dir,
        node_info_to_asset_key=lambda node_info: AssetKey(
            ["foo", node_info["name"]]),
    )

    result = build_assets_job(
        "test_job",
        dbt_assets,
        resource_defs={
            "dbt":
            dbt_cli_resource.configured({
                "project_dir": test_project_dir,
                "profiles_dir": dbt_config_dir
            })
        },
    ).execute_in_process()

    assert result.success
    materializations = [
        event.event_specific_data.materialization
        for event in result.events_for_node(dbt_assets[0].op.name)
        if event.event_type_value == "ASSET_MATERIALIZATION"
    ]
    assert len(materializations) == 4
    assert materializations[0].asset_key == AssetKey(
        ["foo", "sort_by_calories"])
コード例 #6
0
ファイル: test_asset_defs.py プロジェクト: trevenrawr/dagster
def test_select_from_project(dbt_seed, conn_string, test_project_dir,
                             dbt_config_dir):  # pylint: disable=unused-argument

    dbt_assets = load_assets_from_dbt_project(
        test_project_dir,
        dbt_config_dir,
        select="sort_by_calories subdir.least_caloric")

    assert dbt_assets[0].op.name == "run_dbt_dagster_dbt_test_project_e4753"

    result = build_assets_job(
        "test_job",
        dbt_assets,
        resource_defs={
            "dbt":
            dbt_cli_resource.configured({
                "project_dir": test_project_dir,
                "profiles_dir": dbt_config_dir
            })
        },
    ).execute_in_process()

    assert result.success
    materializations = [
        event.event_specific_data.materialization
        for event in result.events_for_node(dbt_assets[0].op.name)
        if event.event_type_value == "ASSET_MATERIALIZATION"
    ]
    assert len(materializations) == 2
コード例 #7
0
def test_runtime_metadata_fn():
    manifest_path = file_relative_path(__file__, "sample_manifest.json")
    with open(manifest_path, "r") as f:
        manifest_json = json.load(f)

    def runtime_metadata_fn(context, node_info):
        return {
            "op_name": context.solid_def.name,
            "dbt_model": node_info["name"]
        }

    assets = load_assets_from_dbt_manifest(
        manifest_json=manifest_json, runtime_metadata_fn=runtime_metadata_fn)
    assert_assets_match_project(assets)

    dbt = MagicMock()
    assets_job = build_assets_job(
        "assets_job",
        assets,
        resource_defs={"dbt": ResourceDefinition.hardcoded_resource(dbt)})
    result = assets_job.execute_in_process()
    assert result.success

    for asset in assets:
        materializations = [
            event.event_specific_data.materialization
            for event in result.events_for_node(asset.op.name)
            if event.event_type_value == "ASSET_MATERIALIZATION"
        ]
        assert len(materializations) == 1
        assert materializations[0].metadata_entries == [
            EventMetadataEntry.text(asset.op.name, label="op_name"),
            EventMetadataEntry.text(asset.op.name, label="dbt_model"),
        ]
コード例 #8
0
ファイル: test_assets_job.py プロジェクト: trevenrawr/dagster
def test_source_asset():
    @asset
    def asset1(source1):
        assert source1 == 5
        return 1

    class MyIOManager(IOManager):
        def handle_output(self, context, obj):
            pass

        def load_input(self, context):
            assert context.resource_config["a"] == 7
            assert context.resources.subresource == 9
            assert context.upstream_output.resources.subresource == 9
            return 5

    @io_manager(config_schema={"a": int},
                required_resource_keys={"subresource"})
    def my_io_manager(_):
        return MyIOManager()

    job = build_assets_job(
        "a",
        [asset1],
        source_assets=[
            SourceAsset(AssetKey("source1"),
                        io_manager_key="special_io_manager")
        ],
        resource_defs={
            "special_io_manager": my_io_manager.configured({"a": 7}),
            "subresource": ResourceDefinition.hardcoded_resource(9),
        },
    )
    assert job.graph.node_defs == [asset1.op]
    assert job.execute_in_process().success
コード例 #9
0
def test_used_source_asset():
    bar = SourceAsset(key=AssetKey("bar"), description="def")

    @asset
    def foo(bar):
        assert bar

    job1 = build_assets_job("job1", [foo], source_assets=[bar])

    external_asset_nodes = external_asset_graph_from_defs(
        [job1], source_assets_by_key={AssetKey("bar"): bar}
    )
    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("bar"),
            op_description="def",
            dependencies=[],
            depended_by=[
                ExternalAssetDependedBy(downstream_asset_key=AssetKey(["foo"]), input_name="bar")
            ],
            job_names=[],
        ),
        ExternalAssetNode(
            asset_key=AssetKey("foo"),
            op_name="foo",
            op_description=None,
            dependencies=[
                ExternalAssetDependency(upstream_asset_key=AssetKey(["bar"]), input_name="bar")
            ],
            depended_by=[],
            job_names=["job1"],
            output_name="result",
            output_description=None,
        ),
    ]
コード例 #10
0
def test_source_op_asset():
    @asset(io_manager_key="special_io_manager")
    def source1():
        pass

    @asset
    def asset1(source1):
        assert source1 == 5
        return 1

    class MyIOManager(IOManager):
        def handle_output(self, context, obj):
            pass

        def load_input(self, context):
            return 5

    @io_manager
    def my_io_manager(_):
        return MyIOManager()

    job = build_assets_job(
        "a",
        [asset1],
        source_assets=[source1],
        resource_defs={"special_io_manager": my_io_manager},
    )
    assert job.graph.node_defs == [asset1.op]
    assert job.execute_in_process().success
コード例 #11
0
def test_download():
    with tempfile.TemporaryDirectory() as temp_dir:
        test_job = build_assets_job(
            "test_job",
            assets=ASSETS,
            resource_defs={
                "io_manager":
                fs_io_manager,
                "partition_start":
                ResourceDefinition.string_resource(),
                "partition_end":
                ResourceDefinition.string_resource(),
                "parquet_io_manager":
                local_partitioned_parquet_io_manager.configured(
                    {"base_path": temp_dir}),
                "warehouse_io_manager":
                mem_io_manager,
                "pyspark":
                pyspark_resource,
                "hn_client":
                hn_snapshot_client,
            },
        )
        result = test_job.execute_in_process(partition_key="2020-12-30-00:00")

        assert result.success
コード例 #12
0
def test_single_partitioned_asset_job():
    partitions_def = StaticPartitionsDefinition(["a", "b", "c", "d"])

    class MyIOManager(IOManager):
        def handle_output(self, context, obj):
            assert context.asset_partition_key == "b"

        def load_input(self, context):
            assert False, "shouldn't get here"

    @asset(partitions_def=partitions_def)
    def my_asset():
        pass

    my_job = build_assets_job(
        "my_job",
        assets=[my_asset],
        resource_defs={
            "io_manager":
            IOManagerDefinition.hardcoded_io_manager(MyIOManager())
        },
    )
    result = my_job.execute_in_process(partition_key="b")
    assert result.asset_materializations_for_node("my_asset") == [
        AssetMaterialization(asset_key=AssetKey(["my_asset"]), partition="b")
    ]
コード例 #13
0
def test_input_context_asset_partitions_time_window():
    partitions_def = DailyPartitionsDefinition(start_date="2021-05-05")

    class MyIOManager(IOManager):
        def handle_output(self, context, _obj):
            assert context.asset_partitions_time_window == TimeWindow(
                pendulum.parse("2021-06-06"), pendulum.parse("2021-06-07"))

        def load_input(self, context):
            assert context.asset_partitions_time_window == TimeWindow(
                pendulum.parse("2021-06-06"), pendulum.parse("2021-06-07"))

    @asset(partitions_def=partitions_def)
    def upstream_asset():
        pass

    @asset(partitions_def=partitions_def)
    def downstream_asset(upstream_asset):
        assert upstream_asset is None

    my_job = build_assets_job(
        "my_job",
        assets=[downstream_asset, upstream_asset],
        resource_defs={
            "io_manager":
            IOManagerDefinition.hardcoded_io_manager(MyIOManager())
        },
    )
    my_job.execute_in_process(partition_key="2021-06-06")
コード例 #14
0
def test_input_name_matches_output_name():
    not_result = SourceAsset(key=AssetKey("not_result"), description=None)

    @asset(ins={"result": AssetIn(asset_key=AssetKey("not_result"))})
    def something(result):  # pylint: disable=unused-argument
        pass

    assets_job = build_assets_job("assets_job", [something], source_assets=[not_result])
    external_asset_nodes = external_asset_graph_from_defs([assets_job], source_assets_by_key={})

    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("not_result"),
            dependencies=[],
            depended_by=[
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey("something"), input_name="result"
                )
            ],
            job_names=[],
        ),
        ExternalAssetNode(
            asset_key=AssetKey("something"),
            dependencies=[
                ExternalAssetDependency(
                    upstream_asset_key=AssetKey("not_result"), input_name="result"
                )
            ],
            depended_by=[],
            op_name="something",
            output_name="result",
            job_names=["assets_job"],
        ),
    ]
コード例 #15
0
def test_source_asset_with_op():

    foo = SourceAsset(key=AssetKey("foo"), description=None)

    @asset
    def bar(foo):  # pylint: disable=unused-argument
        pass

    assets_job = build_assets_job("assets_job", [bar], source_assets=[foo])

    external_asset_nodes = external_asset_graph_from_defs([assets_job], source_assets_by_key={})
    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("foo"),
            op_description=None,
            dependencies=[],
            depended_by=[ExternalAssetDependedBy(AssetKey("bar"), input_name="foo")],
            job_names=[],
        ),
        ExternalAssetNode(
            asset_key=AssetKey("bar"),
            op_name="bar",
            op_description=None,
            dependencies=[ExternalAssetDependency(AssetKey("foo"), input_name="foo")],
            depended_by=[],
            job_names=["assets_job"],
            output_name="result",
        ),
    ]
コード例 #16
0
def test_basic_multi_asset():
    @multi_asset(
        outs={
            f"out{i}": Out(description=f"foo: {i}", asset_key=AssetKey(f"asset{i}"))
            for i in range(10)
        }
    )
    def assets():
        pass

    assets_job = build_assets_job("assets_job", [assets])

    external_asset_nodes = external_asset_graph_from_defs([assets_job], source_assets_by_key={})

    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey(f"asset{i}"),
            dependencies=[],
            depended_by=[],
            op_name="assets",
            op_description=None,
            job_names=["assets_job"],
            output_name=f"out{i}",
            output_description=f"foo: {i}",
        )
        for i in range(10)
    ]
コード例 #17
0
def test_join():
    @asset
    def asset1():
        return 1

    @asset
    def asset2():
        return 2

    @asset
    def asset3(asset1, asset2):
        assert asset1 == 1
        assert asset2 == 2

    job = build_assets_job("a", [asset1, asset2, asset3])
    assert job.graph.node_defs == [asset1.op, asset2.op, asset3.op]
    assert job.dependencies == {
        "asset1": {},
        "asset2": {},
        "asset3": {
            "asset1": DependencyDefinition("asset1", "result"),
            "asset2": DependencyDefinition("asset2", "result"),
        },
    }
    assert job.execute_in_process().success
コード例 #18
0
def test_two_downstream_assets_job():
    @asset
    def asset1():
        return 1

    @asset
    def asset2_a(asset1):
        assert asset1 == 1

    @asset
    def asset2_b(asset1):
        assert asset1 == 1

    assets_job = build_assets_job("assets_job", [asset1, asset2_a, asset2_b])
    external_asset_nodes = external_asset_graph_from_defs([assets_job], source_assets_by_key={})

    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("asset1"),
            dependencies=[],
            depended_by=[
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey("asset2_a"), input_name="asset1"
                ),
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey("asset2_b"), input_name="asset1"
                ),
            ],
            op_name="asset1",
            op_description=None,
            job_names=["assets_job"],
            output_name="result",
            output_description=None,
        ),
        ExternalAssetNode(
            asset_key=AssetKey("asset2_a"),
            dependencies=[
                ExternalAssetDependency(upstream_asset_key=AssetKey("asset1"), input_name="asset1")
            ],
            depended_by=[],
            op_name="asset2_a",
            op_description=None,
            job_names=["assets_job"],
            output_name="result",
            output_description=None,
        ),
        ExternalAssetNode(
            asset_key=AssetKey("asset2_b"),
            dependencies=[
                ExternalAssetDependency(upstream_asset_key=AssetKey("asset1"), input_name="asset1")
            ],
            depended_by=[],
            op_name="asset2_b",
            op_description=None,
            job_names=["assets_job"],
            output_name="result",
            output_description=None,
        ),
    ]
コード例 #19
0
def test_single_asset_pipeline():
    @asset
    def asset1():
        return 1

    job = build_assets_job("a", [asset1])
    assert job.graph.node_defs == [asset1.op]
    assert job.execute_in_process().success
コード例 #20
0
def test_cross_job_asset_dependency():
    @asset
    def asset1():
        return 1

    @asset
    def asset2(asset1):
        assert asset1 == 1

    assets_job1 = build_assets_job("assets_job1", [asset1])
    assets_job2 = build_assets_job("assets_job2", [asset2],
                                   source_assets=[asset1])
    external_asset_nodes = external_asset_graph_from_defs(
        [assets_job1, assets_job2], source_assets_by_key={})

    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("asset1"),
            dependencies=[],
            depended_by=[
                ExternalAssetDependedBy(
                    downstream_asset_key=AssetKey("asset2"),
                    input_name="asset1")
            ],
            op_name="asset1",
            op_description=None,
            job_names=["assets_job1"],
            output_name="result",
            output_description=None,
        ),
        ExternalAssetNode(
            asset_key=AssetKey("asset2"),
            dependencies=[
                ExternalAssetDependency(upstream_asset_key=AssetKey("asset1"),
                                        input_name="asset1")
            ],
            depended_by=[],
            op_name="asset2",
            op_description=None,
            job_names=["assets_job2"],
            output_name="result",
            output_description=None,
        ),
    ]
コード例 #21
0
def test_source_asset_conflicts_with_asset():
    bar_source_asset = SourceAsset(key=AssetKey("bar"), description="def")

    @asset
    def bar():
        pass

    job1 = build_assets_job("job1", [bar])

    with pytest.raises(DagsterInvariantViolationError):
        external_asset_graph_from_defs(
            [job1], source_assets_by_key={AssetKey("bar"): bar_source_asset})
コード例 #22
0
def test_single_partitioned_asset_job():
    partitions_def = StaticPartitionsDefinition(["a", "b", "c", "d"])

    @asset(partitions_def=partitions_def)
    def my_asset():
        pass

    my_job = build_assets_job("my_job", assets=[my_asset])
    result = my_job.execute_in_process(partition_key="b")
    assert result.asset_materializations_for_node("my_asset") == [
        AssetMaterialization(asset_key=AssetKey(["my_asset"]), partition="b")
    ]
コード例 #23
0
def test_same_asset_in_multiple_pipelines():
    @asset
    def asset1():
        return 1

    job1 = build_assets_job("job1", [asset1])
    job2 = build_assets_job("job2", [asset1])

    external_asset_nodes = external_asset_graph_from_defs([job1, job2], source_assets_by_key={})

    assert external_asset_nodes == [
        ExternalAssetNode(
            asset_key=AssetKey("asset1"),
            dependencies=[],
            depended_by=[],
            op_name="asset1",
            op_description=None,
            job_names=["job1", "job2"],
            output_name="result",
            output_description=None,
        ),
    ]
コード例 #24
0
def test_asset_key_output():
    @asset
    def asset1():
        return 1

    @asset(ins={"hello": AssetIn(asset_key=AssetKey("asset1"))})
    def asset2(hello):
        return hello

    job = build_assets_job("boo", [asset1, asset2])
    result = job.execute_in_process()
    assert result.success
    assert result.output_for_node("asset2") == 1
コード例 #25
0
def test_load_from_manifest_json():
    manifest_path = file_relative_path(__file__, "sample_manifest.json")
    with open(manifest_path, "r") as f:
        manifest_json = json.load(f)

    assets = load_assets_from_dbt_manifest(manifest_json=manifest_json)
    assert_assets_match_project(assets)

    dbt = MagicMock()
    assets_job = build_assets_job(
        "assets_job",
        assets,
        resource_defs={"dbt": ResourceDefinition.hardcoded_resource(dbt)})
    assert assets_job.execute_in_process().success
コード例 #26
0
ファイル: test_assets_job.py プロジェクト: amarrella/dagster
def test_asset_key_for_asset_with_namespace():
    @asset(namespace="hello")
    def asset_foo():
        return "foo"

    @asset(
        ins={"foo": AssetIn(asset_key=AssetKey("asset_foo"))}
    )  # Should fail because asset_foo is defined with namespace, so has asset key ["hello", "asset_foo"]
    def failing_asset(foo):
        pass

    with pytest.raises(DagsterInvalidDefinitionError, ):
        build_assets_job("lol", [asset_foo, failing_asset])

    @asset(ins={"foo": AssetIn(asset_key=AssetKey(["hello", "asset_foo"]))})
    def success_asset(foo):
        return foo

    job = build_assets_job("lol", [asset_foo, success_asset])

    result = job.execute_in_process()
    assert result.success
    assert result.output_for_node("success_asset") == "foo"
コード例 #27
0
def test_asset_key_for_asset_with_namespace_str():
    @asset(namespace="hello")
    def asset_foo():
        return "foo"

    @asset(ins={"foo": AssetIn(asset_key=AssetKey(["hello", "asset_foo"]))})
    def success_asset(foo):
        return foo

    job = build_assets_job("lol", [asset_foo, success_asset])

    result = job.execute_in_process()
    assert result.success
    assert result.output_for_node("success_asset") == "foo"
コード例 #28
0
def get_assets_job(io_manager_def):
    asset1_namespace = ["one", "two", "three"]

    @asset(namespace=["one", "two", "three"])
    def asset1():
        return [1, 2, 3]

    @asset(namespace=["four", "five"],
           ins={"asset1": AssetIn(namespace=asset1_namespace)})
    def asset2(asset1):
        return asset1 + [4]

    return build_assets_job(name="a",
                            assets=[asset1, asset2],
                            resource_defs={"io_manager": io_manager_def})
コード例 #29
0
ファイル: test_decorators.py プロジェクト: trevenrawr/dagster
def test_invoking_asset_with_deps():
    @asset
    def upstream():
        return [1]

    @asset
    def downstream(upstream):
        return upstream + [2, 3]

    # check that the asset dependencies are in place
    job = build_assets_job("foo", [upstream, downstream])
    assert job.execute_in_process().success

    out = downstream([3])
    assert out == [3, 2, 3]
コード例 #30
0
def test_non_argument_deps():
    with safe_tempfile_path() as path:

        @asset
        def foo():
            with open(path, "w") as ff:
                ff.write("yup")

        @asset(non_argument_deps={AssetKey("foo")})
        def bar():
            # assert that the foo asset already executed
            assert os.path.exists(path)

        job = build_assets_job("a", [foo, bar])
        assert job.execute_in_process().success