Beispiel #1
0
def test_asset_group_source_asset():
    foo_fa = SourceAsset(key=AssetKey("foo"), io_manager_key="the_manager")

    @asset
    def asset_depends_on_source(foo):
        return foo

    class MyIOManager(IOManager):
        def handle_output(self, context, obj):
            pass

        def load_input(self, context):
            return 5

    @io_manager
    def the_manager():
        return MyIOManager()

    group = AssetGroup(
        assets=[asset_depends_on_source],
        source_assets=[foo_fa],
        resource_defs={"the_manager": the_manager},
    )

    @repository
    def the_repo():
        return [group]

    asset_group_underlying_job = the_repo.get_all_jobs()[0]
    assert asset_group_underlying_job.name == group.all_assets_job_name

    result = asset_group_underlying_job.execute_in_process()
    assert result.success
Beispiel #2
0
def test_materialize_with_selection():
    @asset
    def start_asset():
        return "foo"

    @multi_asset(outs={"o1": Out(asset_key=AssetKey("o1")), "o2": Out(asset_key=AssetKey("o2"))})
    def middle_asset(start_asset):
        return (start_asset, start_asset)

    @asset
    def follows_o1(o1):
        return o1

    @asset
    def follows_o2(o2):
        return o2

    _, io_manager_def = asset_aware_io_manager()
    group = AssetGroup(
        [start_asset, middle_asset, follows_o1, follows_o2],
        resource_defs={"io_manager": io_manager_def},
    )

    result = group.materialize(selection="*follows_o2")
    assert result.success
    assert result.output_for_node("middle_asset", "o1") == "foo"
    assert result.output_for_node("follows_o2") == "foo"
    assert result.output_for_node("start_asset") == "foo"
Beispiel #3
0
def test_asset_group_from_list():
    @asset
    def asset_foo():
        return "foo"

    @asset
    def asset_bar():
        return "bar"

    @asset(ins={"asset_bar": AssetIn(asset_key=AssetKey("asset_foo"))})
    def last_asset(asset_bar):
        return asset_bar

    group = AssetGroup(assets=[asset_foo, asset_bar, last_asset])

    @repository
    def the_repo():
        return [group]

    assert len(the_repo.get_all_jobs()) == 1
    asset_group_underlying_job = the_repo.get_all_jobs()[0]
    assert asset_group_underlying_job.name == group.all_assets_job_name

    result = asset_group_underlying_job.execute_in_process()
    assert result.success
Beispiel #4
0
def test_materialize():
    @asset
    def asset_foo():
        return "foo"

    group = AssetGroup(assets=[asset_foo])

    result = group.materialize()
    assert result.success
Beispiel #5
0
def test_asset_group_missing_resources():
    @asset(required_resource_keys={"foo"})
    def asset_foo(context):
        return context.resources.foo

    with pytest.raises(
        DagsterInvalidDefinitionError,
        match=r"AssetGroup is missing required resource keys for asset 'asset_foo'. Missing resource keys: \['foo'\]",
    ):
        AssetGroup([asset_foo])

    source_asset_io_req = SourceAsset(key=AssetKey("foo"), io_manager_key="foo")

    with pytest.raises(
        DagsterInvalidDefinitionError,
        match=r"SourceAsset with key AssetKey\(\['foo'\]\) requires io manager with key 'foo', which was not provided on AssetGroup. Provided keys: \['io_manager', 'root_manager'\]",
    ):
        AssetGroup([], source_assets=[source_asset_io_req])
Beispiel #6
0
def test_default_io_manager():
    @asset
    def asset_foo():
        return "foo"

    group = AssetGroup(assets=[asset_foo])
    assert (
        group.resource_defs["io_manager"]  # pylint: disable=comparison-with-callable
        == fs_asset_io_manager
    )
Beispiel #7
0
def test_asset_group_requires_root_manager():
    @asset(io_manager_key="blah")
    def asset_foo():
        pass

    with pytest.raises(
            DagsterInvalidDefinitionError,
            match=r"Output 'result' with AssetKey 'AssetKey\(\['asset_foo'\]\)' "
            r"requires io manager 'blah' but was not provided on asset group. "
            r"Provided resources: \['io_manager', 'root_manager'\]",
    ):
        AssetGroup([asset_foo])
Beispiel #8
0
def test_materialize_with_out_of_process_executor():
    @asset
    def asset_foo():
        return "foo"

    group = AssetGroup(assets=[asset_foo], executor_def=multiprocess_executor)

    with pytest.raises(
        DagsterUnmetExecutorRequirementsError,
        match="'materialize' can only be invoked on AssetGroups which have no executor or have "
        "the in_process_executor, but the AssetGroup had executor 'multiprocess'",
    ):
        group.materialize()
Beispiel #9
0
def test_multiple_partitions_defs():
    @asset(partitions_def=DailyPartitionsDefinition(start_date="2021-05-05"))
    def daily_asset():
        ...

    @asset(partitions_def=DailyPartitionsDefinition(start_date="2021-05-05"))
    def daily_asset2():
        ...

    @asset(partitions_def=DailyPartitionsDefinition(start_date="2020-05-05"))
    def daily_asset_different_start_date():
        ...

    @asset(partitions_def=HourlyPartitionsDefinition(start_date="2021-05-05-00:00"))
    def hourly_asset():
        ...

    @asset
    def unpartitioned_asset():
        ...

    group = AssetGroup(
        [
            daily_asset,
            daily_asset2,
            daily_asset_different_start_date,
            hourly_asset,
            unpartitioned_asset,
        ]
    )

    jobs = group.get_base_jobs()
    assert len(jobs) == 3
    assert {job_def.name for job_def in jobs} == {
        "__ASSET_GROUP_0",
        "__ASSET_GROUP_1",
        "__ASSET_GROUP_2",
    }
    assert {
        frozenset([node_def.name for node_def in job_def.all_node_defs]) for job_def in jobs
    } == {
        frozenset(["daily_asset", "daily_asset2", "unpartitioned_asset"]),
        frozenset(["hourly_asset", "unpartitioned_asset"]),
        frozenset(["daily_asset_different_start_date", "unpartitioned_asset"]),
    }
Beispiel #10
0
def test_asset_group_with_resources():
    @asset(required_resource_keys={"foo"})
    def asset_foo(context):
        return context.resources.foo

    @resource
    def the_resource():
        return "foo"

    group = AssetGroup([asset_foo], resource_defs={"foo": the_resource})

    @repository
    def the_repo():
        return [group]

    asset_group_underlying_job = the_repo.get_all_jobs()[0]
    assert asset_group_underlying_job.name == group.all_assets_job_name

    result = asset_group_underlying_job.execute_in_process()
    assert result.success
    assert result.output_for_node("asset_foo") == "foo"
Beispiel #11
0
def test_resource_override():
    @resource
    def the_resource():
        pass

    with pytest.raises(
        DagsterInvalidDefinitionError,
        match="Resource dictionary included resource with key 'root_manager', "
        "which is a reserved resource keyword in Dagster. Please change this "
        "key, and then change all places that require this key to a new value.",
    ):
        AssetGroup([], resource_defs={"root_manager": the_resource})

    @repository
    def the_repo():
        return [AssetGroup([], resource_defs={"io_manager": mem_io_manager})]

    asset_group_underlying_job = the_repo.get_all_jobs()[0]
    assert (  # pylint: disable=comparison-with-callable
        asset_group_underlying_job.resource_defs["io_manager"] == mem_io_manager
    )
Beispiel #12
0
def test_asset_group_build_subset_job():
    @asset
    def start_asset():
        return "foo"

    @multi_asset(outs={
        "o1": Out(asset_key=AssetKey("o1")),
        "o2": Out(asset_key=AssetKey("o2"))
    })
    def middle_asset(start_asset):
        return (start_asset, start_asset)

    @asset
    def follows_o1(o1):
        return o1

    @asset
    def follows_o2(o2):
        return o2

    _, io_manager_def = asset_aware_io_manager()
    group = AssetGroup(
        [start_asset, middle_asset, follows_o1, follows_o2],
        resource_defs={"io_manager": io_manager_def},
    )

    full_job = group.build_job("full", selection="*")

    result = full_job.execute_in_process()

    assert result.success
    assert result.output_for_node("follows_o1") == "foo"
    assert result.output_for_node("follows_o2") == "foo"

    test_single = group.build_job(name="test_single", selection="follows_o2")
    assert len(test_single.all_node_defs) == 1
    assert test_single.all_node_defs[0].name == "follows_o2"

    result = test_single.execute_in_process()
    assert result.success
    assert result.output_for_node("follows_o2") == "foo"

    test_up_star = group.build_job(name="test_up_star",
                                   selection="*follows_o2")
    assert len(test_up_star.all_node_defs) == 3
    assert set([node.name for node in test_up_star.all_node_defs]) == {
        "follows_o2",
        "middle_asset",
        "start_asset",
    }

    result = test_up_star.execute_in_process()
    assert result.success
    assert result.output_for_node("middle_asset", "o1") == "foo"
    assert result.output_for_node("follows_o2") == "foo"
    assert result.output_for_node("start_asset") == "foo"

    test_down_star = group.build_job(name="test_down_star",
                                     selection="start_asset*")

    assert len(test_down_star.all_node_defs) == 4
    assert set([node.name for node in test_down_star.all_node_defs]) == {
        "follows_o2",
        "middle_asset",
        "start_asset",
        "follows_o1",
    }

    result = test_down_star.execute_in_process()
    assert result.success
    assert result.output_for_node("follows_o2") == "foo"

    test_both_plus = group.build_job(name="test_both_plus",
                                     selection=["+o1+", "o2"])

    assert len(test_both_plus.all_node_defs) == 4
    assert set([node.name for node in test_both_plus.all_node_defs]) == {
        "follows_o1",
        "follows_o2",
        "middle_asset",
        "start_asset",
    }

    result = test_both_plus.execute_in_process()
    assert result.success
    assert result.output_for_node("follows_o2") == "foo"

    test_selection_with_overlap = group.build_job(
        name="test_multi_asset_multi_selection", selection=["o1", "o2+"])

    assert len(test_selection_with_overlap.all_node_defs) == 3
    assert set(
        [node.name for node in test_selection_with_overlap.all_node_defs]) == {
            "follows_o1",
            "follows_o2",
            "middle_asset",
        }

    result = test_selection_with_overlap.execute_in_process()
    assert result.success
    assert result.output_for_node("follows_o2") == "foo"

    with pytest.raises(
            DagsterInvalidDefinitionError,
            match=r"When attempting to create job 'bad_subset', the clause "
            r"'doesnt_exist' within the asset key selection did not match any asset "
            r"keys. Present asset keys: \['start_asset', 'o1', 'o2', 'follows_o1', 'follows_o2'\]",
    ):
        group.build_job(name="bad_subset", selection="doesnt_exist")

    with pytest.raises(
            DagsterInvalidDefinitionError,
            match=
            r"When attempting to create job 'bad_query_arguments', the clause "
            r"follows_o1= within the asset key selection was invalid. Please review "
            r"the selection syntax here: "
            r"https://docs.dagster.io/concepts/ops-jobs-graphs/job-execution#op-selection-syntax.",
    ):
        group.build_job(name="bad_query_arguments", selection="follows_o1=")

    with pytest.raises(
            DagsterInvalidDefinitionError,
            match=r"When building job 'test_subselect_only_one_key', the asset "
            r"'middle_asset' contains asset keys \['o1', 'o2'\], but attempted to "
            r"select only \['o1'\]. Selecting only some of the asset keys for a "
            r"particular asset is not yet supported behavior. Please select all "
            r"asset keys produced by a given asset when subsetting.",
    ):
        group.build_job(name="test_subselect_only_one_key", selection="o1")
Beispiel #13
0
 def the_repo():
     return [AssetGroup([], resource_defs={"io_manager": mem_io_manager})]
Beispiel #14
0
 def the_repo():
     return [AssetGroup([the_asset], executor_def=in_process_executor)]
Beispiel #15
0
 def the_repo():  # pylint: disable=unused-variable
     return [AssetGroup(assets=[]), AssetGroup(assets=[])]