def test_asset_group_source_asset():
    foo_fa = SourceAsset(key=AssetKey("foo"), io_manager_key="the_manager")

    @asset
    def asset_depends_on_source(foo):
        return foo

    class MyIOManager(IOManager):
        def handle_output(self, context, obj):
            pass

        def load_input(self, context):
            return 5

    @io_manager
    def the_manager():
        return MyIOManager()

    group = AssetGroup(
        assets=[asset_depends_on_source],
        source_assets=[foo_fa],
        resource_defs={"the_manager": the_manager},
    )

    @repository
    def the_repo():
        return [group]

    asset_group_underlying_job = the_repo.get_all_jobs()[0]
    assert AssetGroup.is_base_job_name(asset_group_underlying_job.name)

    result = asset_group_underlying_job.execute_in_process()
    assert result.success
def test_materialize_with_selection():
    @asset
    def start_asset():
        return "foo"

    @multi_asset(outs={"o1": Out(asset_key=AssetKey("o1")), "o2": Out(asset_key=AssetKey("o2"))})
    def middle_asset(start_asset):
        return (start_asset, start_asset)

    @asset
    def follows_o1(o1):
        return o1

    @asset
    def follows_o2(o2):
        return o2

    _, io_manager_def = asset_aware_io_manager()
    group = AssetGroup(
        [start_asset, middle_asset, follows_o1, follows_o2],
        resource_defs={"io_manager": io_manager_def},
    )

    result = group.materialize(selection="*follows_o2")
    assert result.success
    assert result.output_for_node("middle_asset", "o1") == "foo"
    assert result.output_for_node("follows_o2") == "foo"
    assert result.output_for_node("start_asset") == "foo"
def test_asset_group_from_modules(monkeypatch):
    from . import asset_package
    from .asset_package import module_with_assets

    collection_1 = AssetGroup.from_modules([asset_package, module_with_assets])

    assets_1 = [asset.op.name for asset in collection_1.assets]
    source_assets_1 = [source_asset.key for source_asset in collection_1.source_assets]

    collection_2 = AssetGroup.from_modules([asset_package, module_with_assets])

    assets_2 = [asset.op.name for asset in collection_2.assets]
    source_assets_2 = [source_asset.key for source_asset in collection_2.source_assets]

    assert assets_1 == assets_2
    assert source_assets_1 == source_assets_2

    with monkeypatch.context() as m:

        @asset
        def little_richard():
            pass

        m.setattr(asset_package, "little_richard_dup", little_richard, raising=False)
        with pytest.raises(
            DagsterInvalidDefinitionError,
            match=re.escape(
                "Asset key AssetKey(['little_richard']) is defined multiple times. "
                "Definitions found in modules: dagster_tests.core_tests.asset_defs_tests.asset_package."
            ),
        ):
            AssetGroup.from_modules([asset_package, module_with_assets])
def test_asset_group_from_list():
    @asset
    def asset_foo():
        return "foo"

    @asset
    def asset_bar():
        return "bar"

    @asset(ins={"asset_bar": AssetIn(asset_key=AssetKey("asset_foo"))})
    def last_asset(asset_bar):
        return asset_bar

    group = AssetGroup(assets=[asset_foo, asset_bar, last_asset])

    @repository
    def the_repo():
        return [group]

    assert len(the_repo.get_all_jobs()) == 1
    asset_group_underlying_job = the_repo.get_all_jobs()[0]
    assert AssetGroup.is_base_job_name(asset_group_underlying_job.name)

    result = asset_group_underlying_job.execute_in_process()
    assert result.success
def test_materialize():
    @asset
    def asset_foo():
        return "foo"

    group = AssetGroup(assets=[asset_foo])

    result = group.materialize()
    assert result.success
def test_materialize_with_out_of_process_executor():
    @asset
    def asset_foo():
        return "foo"

    group = AssetGroup(assets=[asset_foo], executor_def=multiprocess_executor)

    with pytest.raises(
        DagsterUnmetExecutorRequirementsError,
        match="'materialize' can only be invoked on AssetGroups which have no executor or have "
        "the in_process_executor, but the AssetGroup had executor 'multiprocess'",
    ):
        group.materialize()
def test_asset_group_from_current_module():
    group = AssetGroup.from_current_module()
    assert {asset.op.name for asset in group.assets} == {"asset_in_current_module"}
    assert len(group.assets) == 1
    assert {source_asset.key for source_asset in group.source_assets} == {
        AssetKey("source_asset_in_current_module")
    }
    assert len(group.source_assets) == 1
def test_asset_group_from_package_module():
    from . import asset_package

    collection_1 = AssetGroup.from_package_module(asset_package)
    assert len(collection_1.assets) == 6

    assets_1 = [asset.op.name for asset in collection_1.assets]
    source_assets_1 = [source_asset.key for source_asset in collection_1.source_assets]

    collection_2 = AssetGroup.from_package_module(asset_package)
    assert len(collection_2.assets) == 6

    assets_2 = [asset.op.name for asset in collection_2.assets]
    source_assets_2 = [source_asset.key for source_asset in collection_2.source_assets]

    assert assets_1 == assets_2
    assert source_assets_1 == source_assets_2
def test_multiple_partitions_defs():
    @asset(partitions_def=DailyPartitionsDefinition(start_date="2021-05-05"))
    def daily_asset():
        ...

    @asset(partitions_def=DailyPartitionsDefinition(start_date="2021-05-05"))
    def daily_asset2():
        ...

    @asset(partitions_def=DailyPartitionsDefinition(start_date="2020-05-05"))
    def daily_asset_different_start_date():
        ...

    @asset(partitions_def=HourlyPartitionsDefinition(start_date="2021-05-05-00:00"))
    def hourly_asset():
        ...

    @asset
    def unpartitioned_asset():
        ...

    group = AssetGroup(
        [
            daily_asset,
            daily_asset2,
            daily_asset_different_start_date,
            hourly_asset,
            unpartitioned_asset,
        ]
    )

    jobs = group.get_base_jobs()
    assert len(jobs) == 3
    assert {job_def.name for job_def in jobs} == {
        "__ASSET_GROUP_0",
        "__ASSET_GROUP_1",
        "__ASSET_GROUP_2",
    }
    assert {
        frozenset([node_def.name for node_def in job_def.all_node_defs]) for job_def in jobs
    } == {
        frozenset(["daily_asset", "daily_asset2", "unpartitioned_asset"]),
        frozenset(["hourly_asset", "unpartitioned_asset"]),
        frozenset(["daily_asset_different_start_date", "unpartitioned_asset"]),
    }
def test_asset_group_missing_resources():
    @asset(required_resource_keys={"foo"})
    def asset_foo(context):
        return context.resources.foo

    with pytest.raises(
        DagsterInvalidDefinitionError,
        match=r"AssetGroup is missing required resource keys for asset 'asset_foo'. Missing resource keys: \['foo'\]",
    ):
        AssetGroup([asset_foo])

    source_asset_io_req = SourceAsset(key=AssetKey("foo"), io_manager_key="foo")

    with pytest.raises(
        DagsterInvalidDefinitionError,
        match=r"SourceAsset with key AssetKey\(\['foo'\]\) requires io manager with key 'foo', which was not provided on AssetGroup. Provided keys: \['io_manager', 'root_manager'\]",
    ):
        AssetGroup([], source_assets=[source_asset_io_req])
def test_default_io_manager():
    @asset
    def asset_foo():
        return "foo"

    group = AssetGroup(assets=[asset_foo])
    assert (
        group.resource_defs["io_manager"]  # pylint: disable=comparison-with-callable
        == fs_asset_io_manager
    )
def test_asset_group_with_resources():
    @asset(required_resource_keys={"foo"})
    def asset_foo(context):
        return context.resources.foo

    @resource
    def the_resource():
        return "foo"

    group = AssetGroup([asset_foo], resource_defs={"foo": the_resource})

    @repository
    def the_repo():
        return [group]

    asset_group_underlying_job = the_repo.get_all_jobs()[0]
    assert AssetGroup.is_base_job_name(asset_group_underlying_job.name)

    result = asset_group_underlying_job.execute_in_process()
    assert result.success
    assert result.output_for_node("asset_foo") == "foo"
def test_asset_group_requires_root_manager():
    @asset(io_manager_key="blah")
    def asset_foo():
        pass

    with pytest.raises(
            DagsterInvalidDefinitionError,
            match=r"Output 'result' with AssetKey 'AssetKey\(\['asset_foo'\]\)' "
            r"requires io manager 'blah' but was not provided on asset group. "
            r"Provided resources: \['io_manager', 'root_manager'\]",
    ):
        AssetGroup([asset_foo])
Exemple #14
0
def _load_target_from_module(module: ModuleType, fn_name: str,
                             error_suffix: str) -> object:
    from dagster.core.asset_defs import AssetGroup
    from dagster.core.workspace.autodiscovery import LOAD_ALL_ASSETS

    if fn_name == LOAD_ALL_ASSETS:
        # LOAD_ALL_ASSETS is a special symbol that's returned when, instead of loading a particular
        # attribute, we should load all the assets in the module.
        return AssetGroup.from_modules([module])
    else:
        if not hasattr(module, fn_name):
            raise DagsterInvariantViolationError(
                f"{fn_name} not found {error_suffix}")

        return getattr(module, fn_name)
def test_asset_group_from_package_module():
    from . import asset_package

    collection = AssetGroup.from_package_module(asset_package)
    assert len(collection.assets) == 4
    assert {asset.op.name
            for asset in collection.assets} == {
                "little_richard",
                "miles_davis",
                "chuck_berry",
                "bb_king",
            }
    assert {source_asset.key
            for source_asset in collection.source_assets
            } == {AssetKey("elvis_presley")}
def test_resource_override():
    @resource
    def the_resource():
        pass

    with pytest.raises(
        DagsterInvalidDefinitionError,
        match="Resource dictionary included resource with key 'root_manager', "
        "which is a reserved resource keyword in Dagster. Please change this "
        "key, and then change all places that require this key to a new value.",
    ):
        AssetGroup([], resource_defs={"root_manager": the_resource})

    @repository
    def the_repo():
        return [AssetGroup([], resource_defs={"io_manager": mem_io_manager})]

    asset_group_underlying_job = the_repo.get_all_jobs()[0]
    assert (  # pylint: disable=comparison-with-callable
        asset_group_underlying_job.resource_defs["io_manager"] == mem_io_manager
    )
Exemple #17
0
def loadable_targets_from_loaded_module(
        module: ModuleType) -> Sequence[LoadableTarget]:
    loadable_repos = _loadable_targets_of_type(module, RepositoryDefinition)
    if loadable_repos:
        return loadable_repos

    loadable_pipelines = _loadable_targets_of_type(module, PipelineDefinition)
    loadable_jobs = _loadable_targets_of_type(module, JobDefinition)

    if len(loadable_pipelines) == 1:
        return loadable_pipelines

    elif len(loadable_pipelines) > 1:
        target_type = "job" if len(loadable_jobs) > 1 else "pipeline"
        raise DagsterInvariantViolationError((
            'No repository and more than one {target_type} found in "{module_name}". If you load '
            "a file or module directly it must have only one {target_type} "
            "in scope. Found {target_type}s defined in variables or decorated "
            "functions: {pipeline_symbols}.").format(
                module_name=module.__name__,
                pipeline_symbols=repr(
                    [p.attribute for p in loadable_pipelines]),
                target_type=target_type,
            ))

    loadable_graphs = _loadable_targets_of_type(module, GraphDefinition)

    if len(loadable_graphs) == 1:
        return loadable_graphs

    elif len(loadable_graphs) > 1:
        raise DagsterInvariantViolationError((
            'More than one graph found in "{module_name}". '
            "If you load a file or module directly and it has no repositories, jobs, or "
            "pipelines in scope, it must have no more than one graph in scope. "
            "Found graphs defined in variables or decorated functions: {graph_symbols}."
        ).format(
            module_name=module.__name__,
            graph_symbols=repr([g.attribute for g in loadable_graphs]),
        ))

    loadable_asset_groups = _loadable_targets_of_type(module, AssetGroup)
    if len(loadable_asset_groups) == 1:
        return loadable_asset_groups

    elif len(loadable_asset_groups) > 1:
        var_names = repr([a.attribute for a in loadable_asset_groups])
        raise DagsterInvariantViolationError((
            f'More than one asset group found in "{module.__name__}". '
            "If you load a file or module directly and it has no repositories, jobs, "
            "pipeline, or graphs in scope, it must have no more than one asset group in scope. "
            f"Found asset groups defined in variables: {var_names}."))

    asset_group_from_module_assets = AssetGroup.from_modules([module])
    if (len(asset_group_from_module_assets.assets) > 0
            or len(asset_group_from_module_assets.source_assets) > 0):
        return [
            LoadableTarget(LOAD_ALL_ASSETS, asset_group_from_module_assets)
        ]

    raise DagsterInvariantViolationError(
        "No repositories, jobs, pipelines, graphs, asset groups, or asset definitions found in "
        f'"{module.__name__}".')
def test_asset_group_build_subset_job():
    @asset
    def start_asset():
        return "foo"

    @multi_asset(outs={
        "o1": Out(asset_key=AssetKey("o1")),
        "o2": Out(asset_key=AssetKey("o2"))
    })
    def middle_asset(start_asset):
        return (start_asset, start_asset)

    @asset
    def follows_o1(o1):
        return o1

    @asset
    def follows_o2(o2):
        return o2

    _, io_manager_def = asset_aware_io_manager()
    group = AssetGroup(
        [start_asset, middle_asset, follows_o1, follows_o2],
        resource_defs={"io_manager": io_manager_def},
    )

    full_job = group.build_job("full", selection="*")

    result = full_job.execute_in_process()

    assert result.success
    assert result.output_for_node("follows_o1") == "foo"
    assert result.output_for_node("follows_o2") == "foo"

    test_single = group.build_job(name="test_single", selection="follows_o2")
    assert len(test_single.all_node_defs) == 1
    assert test_single.all_node_defs[0].name == "follows_o2"

    result = test_single.execute_in_process()
    assert result.success
    assert result.output_for_node("follows_o2") == "foo"

    test_up_star = group.build_job(name="test_up_star",
                                   selection="*follows_o2")
    assert len(test_up_star.all_node_defs) == 3
    assert set([node.name for node in test_up_star.all_node_defs]) == {
        "follows_o2",
        "middle_asset",
        "start_asset",
    }

    result = test_up_star.execute_in_process()
    assert result.success
    assert result.output_for_node("middle_asset", "o1") == "foo"
    assert result.output_for_node("follows_o2") == "foo"
    assert result.output_for_node("start_asset") == "foo"

    test_down_star = group.build_job(name="test_down_star",
                                     selection="start_asset*")

    assert len(test_down_star.all_node_defs) == 4
    assert set([node.name for node in test_down_star.all_node_defs]) == {
        "follows_o2",
        "middle_asset",
        "start_asset",
        "follows_o1",
    }

    result = test_down_star.execute_in_process()
    assert result.success
    assert result.output_for_node("follows_o2") == "foo"

    test_both_plus = group.build_job(name="test_both_plus",
                                     selection=["+o1+", "o2"])

    assert len(test_both_plus.all_node_defs) == 4
    assert set([node.name for node in test_both_plus.all_node_defs]) == {
        "follows_o1",
        "follows_o2",
        "middle_asset",
        "start_asset",
    }

    result = test_both_plus.execute_in_process()
    assert result.success
    assert result.output_for_node("follows_o2") == "foo"

    test_selection_with_overlap = group.build_job(
        name="test_multi_asset_multi_selection", selection=["o1", "o2+"])

    assert len(test_selection_with_overlap.all_node_defs) == 3
    assert set(
        [node.name for node in test_selection_with_overlap.all_node_defs]) == {
            "follows_o1",
            "follows_o2",
            "middle_asset",
        }

    result = test_selection_with_overlap.execute_in_process()
    assert result.success
    assert result.output_for_node("follows_o2") == "foo"

    with pytest.raises(
            DagsterInvalidDefinitionError,
            match=r"When attempting to create job 'bad_subset', the clause "
            r"'doesnt_exist' within the asset key selection did not match any asset "
            r"keys. Present asset keys: \['start_asset', 'o1', 'o2', 'follows_o1', 'follows_o2'\]",
    ):
        group.build_job(name="bad_subset", selection="doesnt_exist")

    with pytest.raises(
            DagsterInvalidDefinitionError,
            match=
            r"When attempting to create job 'bad_query_arguments', the clause "
            r"follows_o1= within the asset key selection was invalid. Please review "
            r"the selection syntax here: "
            r"https://docs.dagster.io/concepts/ops-jobs-graphs/job-execution#op-selection-syntax.",
    ):
        group.build_job(name="bad_query_arguments", selection="follows_o1=")

    with pytest.raises(
            DagsterInvalidDefinitionError,
            match=r"When building job 'test_subselect_only_one_key', the asset "
            r"'middle_asset' contains asset keys \['o1', 'o2'\], but attempted to "
            r"select only \['o1'\]. Selecting only some of the asset keys for a "
            r"particular asset is not yet supported behavior. Please select all "
            r"asset keys produced by a given asset when subsetting.",
    ):
        group.build_job(name="test_subselect_only_one_key", selection="o1")
 def the_repo():
     return [AssetGroup([], resource_defs={"io_manager": mem_io_manager})]
 def the_repo():
     return [AssetGroup([the_asset], executor_def=in_process_executor)]
 def the_repo():  # pylint: disable=unused-variable
     return [AssetGroup(assets=[]), AssetGroup(assets=[])]