Exemple #1
0
def test_multi_output():
    @solid(output_defs=[
        DynamicOutputDefinition(int, "numbers"),
        DynamicOutputDefinition(str, "letters"),
        OutputDefinition(str, "wildcard"),
    ])
    def multiout(_):
        yield DynamicOutput(1, output_name="numbers", mapping_key="1")
        yield DynamicOutput(2, output_name="numbers", mapping_key="2")
        yield DynamicOutput("a", output_name="letters", mapping_key="a")
        yield DynamicOutput("b", output_name="letters", mapping_key="b")
        yield DynamicOutput("c", output_name="letters", mapping_key="c")
        yield Output("*", "wildcard")

    @solid
    def double(n):
        return n * 2

    @pipeline
    def multi_dyn():
        numbers, _, _ = multiout()
        numbers.map(double)

    pipe_result = execute_pipeline(multi_dyn)

    assert pipe_result.success

    result = pipe_result.result_for_solid("multiout")
    assert len(result.get_output_events_for_compute("numbers")) == 2
    assert len(result.get_output_events_for_compute("letters")) == 3
    assert result.get_output_event_for_compute("wildcard")
    assert len(result.compute_output_events_dict["numbers"]) == 2
    assert len(result.compute_output_events_dict["letters"]) == 3
    assert len(result.compute_output_events_dict["wildcard"]) == 1
    assert result.output_values == {
        "numbers": {
            "1": 1,
            "2": 2
        },
        "letters": {
            "a": "a",
            "b": "b",
            "c": "c"
        },
        "wildcard": "*",
    }
    assert result.output_value("numbers") == {"1": 1, "2": 2}
    assert result.output_value("letters") == {"a": "a", "b": "b", "c": "c"}
    assert result.output_value("wildcard") == "*"

    assert pipe_result.output_for_solid("double") == {"1": 2, "2": 4}
Exemple #2
0
def test_context_mapping_key():
    _observed = []

    @solid
    def observe_key(context, _dep=None):
        _observed.append(context.get_mapping_key())

    @solid(output_defs=[DynamicOutputDefinition()])
    def emit():
        yield DynamicOutput(1, mapping_key="key_1")
        yield DynamicOutput(2, mapping_key="key_2")

    @pipeline
    def test():
        observe_key()
        emit().map(observe_key)

    result = execute_pipeline(test)
    assert result.success
    assert _observed == [None, "key_1", "key_2"]

    # test standalone doesn't throw as well
    _observed = []
    observe_key(build_solid_context())
    assert _observed == [None]
Exemple #3
0
def dynamic_pipeline():
    @solid
    def multiply_by_two(context, y):
        context.log.info("multiply_by_two is returning " + str(y * 2))
        return y * 2

    @solid
    def multiply_inputs(context, y, ten, should_fail):
        current_run = context.instance.get_run_by_id(context.run_id)
        if should_fail:
            if y == 2 and current_run.parent_run_id is None:
                raise Exception()
        context.log.info("multiply_inputs is returning " + str(y * ten))
        return y * ten

    @solid
    def emit_ten(_):
        return 10

    @solid(output_defs=[DynamicOutputDefinition()])
    def emit(_):
        for i in range(3):
            yield DynamicOutput(value=i, mapping_key=str(i))

    @solid
    def sum_numbers(_, nums):
        return sum(nums)

    # pylint: disable=no-member
    multiply_by_two.alias("double_total")(sum_numbers(
        emit().map(lambda n: multiply_by_two(multiply_inputs(n, emit_ten())),
                   ).collect(), ))
Exemple #4
0
def test_fails_with_wrong_output():
    @solid(output_defs=[DynamicOutputDefinition()])
    def should_fail(_):
        yield Output(1)

    with pytest.raises(DagsterInvariantViolationError,
                       match="must yield DynamicOutput"):
        execute_solid(should_fail)

    @solid(output_defs=[DynamicOutputDefinition()])
    def should_also_fail(_):
        return 1

    with pytest.raises(DagsterInvariantViolationError,
                       match="must yield DynamicOutput"):
        execute_solid(should_also_fail)
Exemple #5
0
def test_dynamic(gcs_bucket):
    @solid(output_defs=[DynamicOutputDefinition()])
    def numbers(_):
        for i in range(3):
            yield DynamicOutput(i, mapping_key=str(i))

    @solid
    def echo(_, x):
        return x

    @pipeline(mode_defs=[
        ModeDefinition(resource_defs={
            "io_manager": gcs_pickle_io_manager,
            "gcs": gcs_resource
        })
    ])
    def dynamic():
        numbers().map(echo)

    result = execute_pipeline(dynamic,
                              run_config={
                                  "resources": {
                                      "io_manager": {
                                          "config": {
                                              "gcs_bucket": gcs_bucket
                                          }
                                      }
                                  }
                              })
    assert result.success
Exemple #6
0
def test_dynamic_output_async_gen():
    @solid(output_defs=[
        DynamicOutputDefinition(name="a", is_required=False),
        OutputDefinition(name="b", is_required=False),
    ])
    async def aio_gen():
        yield DynamicOutput(value=1, mapping_key="1", output_name="a")
        yield DynamicOutput(value=2, mapping_key="2", output_name="a")
        await asyncio.sleep(0.01)
        yield Output(value="foo", output_name="b")

    async def get_results():
        res = []
        async for output in aio_gen():
            res.append(output)
        return res

    loop = asyncio.get_event_loop()
    a1, a2, b = loop.run_until_complete(get_results())

    assert a1.value == 1
    assert a1.mapping_key == "1"
    assert a2.value == 2
    assert a2.mapping_key == "2"

    assert b.value == "foo"
Exemple #7
0
def test_multi_out_map():
    @solid(output_defs=[DynamicOutputDefinition()])
    def emit():
        yield DynamicOutput(1, mapping_key="1")
        yield DynamicOutput(2, mapping_key="2")
        yield DynamicOutput(3, mapping_key="3")

    @solid(output_defs=[
        OutputDefinition(name="a", is_required=False),
        OutputDefinition(name="b", is_required=False),
        OutputDefinition(name="c", is_required=False),
    ])
    def multiout(inp: int):
        if inp == 1:
            yield Output(inp, output_name="a")
        else:
            yield Output(inp, output_name="b")

    @solid
    def echo(a):
        return a

    @pipeline
    def destructure():
        a, b, c = emit().map(multiout)
        echo.alias("echo_a")(a.collect())
        echo.alias("echo_b")(b.collect())
        echo.alias("echo_c")(c.collect())

    result = execute_pipeline(destructure)
    assert result.result_for_solid("echo_a").output_value() == [1]
    assert result.result_for_solid("echo_b").output_value() == [2, 3]
    assert result.result_for_solid(
        "echo_c").skipped  # all fanned in inputs skipped -> solid skips
Exemple #8
0
def test_solid_outputs_access():
    called = {}

    @success_hook
    def my_success_hook(context):
        called[context.step_key] = context.solid_output_values

    @failure_hook
    def my_failure_hook(context):
        called[context.step_key] = context.solid_output_values

    @solid(output_defs=[
        OutputDefinition(name="one"),
        OutputDefinition(name="two"),
        OutputDefinition(name="three"),
    ])
    def a_solid(_):
        yield Output(1, "one")
        yield Output(2, "two")
        yield Output(3, "three")

    @solid(output_defs=[
        OutputDefinition(name="one"),
        OutputDefinition(name="two"),
    ])
    def failed_solid(_):
        yield Output(1, "one")
        raise SomeUserException()
        yield Output(3, "two")  # pylint: disable=unreachable

    @solid(output_defs=[DynamicOutputDefinition()])
    def dynamic_solid(_):
        yield DynamicOutput(1, mapping_key="mapping_1")
        yield DynamicOutput(2, mapping_key="mapping_2")

    @solid
    def echo(_, x):
        return x

    @my_success_hook
    @my_failure_hook
    @pipeline
    def a_pipeline():
        a_solid()
        failed_solid()
        dynamic_solid().map(echo)

    result = execute_pipeline(a_pipeline, raise_on_error=False)
    assert not result.success
    assert called.get("a_solid") == {"one": 1, "two": 2, "three": 3}
    assert called.get("failed_solid") == {"one": 1}
    assert called.get("dynamic_solid") == {
        "result": {
            "mapping_1": 1,
            "mapping_2": 2
        }
    }
    assert called.get("echo[mapping_1]") == {"result": 1}
    assert called.get("echo[mapping_2]") == {"result": 2}
Exemple #9
0
def test_fails_dupe_keys():
    @solid(output_defs=[DynamicOutputDefinition()])
    def should_fail(_):
        yield DynamicOutput(True, mapping_key="dunk")
        yield DynamicOutput(True, mapping_key="dunk")

    with pytest.raises(DagsterInvariantViolationError,
                       match='mapping_key "dunk" multiple times'):
        execute_solid(should_fail)
Exemple #10
0
def test_dynamic_output_solid():
    @solid(output_defs=[DynamicOutputDefinition()])
    def should_work(_):
        yield DynamicOutput(1, mapping_key="1")
        yield DynamicOutput(2, mapping_key="2")

    result = execute_in_process(should_work)
    assert result.success
    assert result.output_values["result"]["1"] == 1
    assert result.output_values["result"]["2"] == 2
Exemple #11
0
def test_dynamic_output_definition_single_partition_materialization():

    entry1 = EventMetadataEntry.int(123, "nrows")
    entry2 = EventMetadataEntry.float(3.21, "some value")

    @solid(output_defs=[
        OutputDefinition(name="output1", asset_key=AssetKey("table1"))
    ])
    def solid1(_):
        return Output(None, "output1", metadata_entries=[entry1])

    @solid(output_defs=[
        DynamicOutputDefinition(
            name="output2",
            asset_key=lambda context: AssetKey(context.mapping_key))
    ])
    def solid2(_, _input1):
        for i in range(4):
            yield DynamicOutput(
                7,
                mapping_key=str(i),
                output_name="output2",
                metadata_entries=[entry2],
            )

    @solid
    def do_nothing(_, _input1):
        pass

    @pipeline
    def my_pipeline():
        solid2(solid1()).map(do_nothing)

    result = execute_pipeline(my_pipeline)
    events = result.step_event_list
    materializations = [
        event for event in events
        if event.event_type_value == "ASSET_MATERIALIZATION"
    ]
    assert len(materializations) == 5

    check_materialization(materializations[0],
                          AssetKey(["table1"]),
                          metadata_entries=[entry1])
    seen_paths = set()
    for i in range(1, 5):
        path = materializations[i].asset_key.path
        seen_paths.add(tuple(path))
        check_materialization(
            materializations[i],
            AssetKey(path),
            metadata_entries=[entry2],
            parent_assets=[AssetLineageInfo(AssetKey(["table1"]))],
        )
    assert len(seen_paths) == 4
Exemple #12
0
def test_dynamic():
    @solid(output_defs=[DynamicOutputDefinition(dagster_type=int)])
    def dyn_desc(_) -> Iterator[DynamicOutput]:
        """
        Returns:
            numbers
        """
        yield DynamicOutput(4, "4")

    assert dyn_desc.output_defs[0].description == "numbers"
    assert dyn_desc.output_defs[0].is_dynamic
def test_dynamic_output_non_gen():
    @solid(output_defs=[DynamicOutputDefinition(name="a", is_required=False)])
    def should_not_work():
        return DynamicOutput(value=1, mapping_key="1", output_name="a")

    with pytest.raises(
        DagsterInvariantViolationError,
        match="Attempted to return a DynamicOutput from solid. DynamicOuts are only supported "
        "using yield syntax.",
    ):
        should_not_work()
Exemple #14
0
def test_must_unpack_composite():
    with pytest.raises(
            DagsterInvalidDefinitionError,
            match="Dynamic output must be unpacked by invoking map or collect",
    ):

        @composite_solid(output_defs=[DynamicOutputDefinition()])
        def composed():
            return dynamic_numbers()

        @pipeline
        def _should_fail():
            echo(composed())
def test_dynamic_output_async_non_gen():
    @solid(output_defs=[DynamicOutputDefinition(name="a", is_required=False)])
    def should_not_work():
        asyncio.sleep(0.01)
        return DynamicOutput(value=1, mapping_key="1", output_name="a")

    loop = asyncio.get_event_loop()
    with pytest.raises(
        DagsterInvariantViolationError,
        match="Attempted to return a DynamicOutput from solid. DynamicOuts are only supported "
        "using yield syntax.",
    ):
        loop.run_until_complete(should_not_work())
Exemple #16
0
def test_basic():
    @solid(output_defs=[DynamicOutputDefinition()])
    def should_work(_):
        yield DynamicOutput(1, mapping_key="1")
        yield DynamicOutput(2, mapping_key="2")

    result = execute_solid(should_work)

    assert result.success
    assert len(result.get_output_events_for_compute()) == 2
    assert len(result.compute_output_events_dict["result"]) == 2
    assert result.output_values == {"result": {"1": 1, "2": 2}}
    assert result.output_value() == {"1": 1, "2": 2}
Exemple #17
0
def test_multi_output():
    @solid(output_defs=[
        DynamicOutputDefinition(int, "numbers"),
        DynamicOutputDefinition(str, "letters"),
        OutputDefinition(str, "wildcard"),
    ])
    def should_work(_):
        yield DynamicOutput(1, output_name="numbers", mapping_key="1")
        yield DynamicOutput(2, output_name="numbers", mapping_key="2")
        yield DynamicOutput("a", output_name="letters", mapping_key="a")
        yield DynamicOutput("b", output_name="letters", mapping_key="b")
        yield DynamicOutput("c", output_name="letters", mapping_key="c")
        yield Output("*", "wildcard")

    result = execute_solid(should_work)

    assert result.success
    assert len(result.get_output_events_for_compute("numbers")) == 2
    assert len(result.get_output_events_for_compute("letters")) == 3
    assert result.get_output_event_for_compute("wildcard")
    assert len(result.compute_output_events_dict["numbers"]) == 2
    assert len(result.compute_output_events_dict["letters"]) == 3
    assert len(result.compute_output_events_dict["wildcard"]) == 1
    assert result.output_values == {
        "numbers": {
            "1": 1,
            "2": 2
        },
        "letters": {
            "a": "a",
            "b": "b",
            "c": "c"
        },
        "wildcard": "*",
    }
    assert result.output_value("numbers") == {"1": 1, "2": 2}
    assert result.output_value("letters") == {"a": "a", "b": "b", "c": "c"}
    assert result.output_value("wildcard") == "*"
Exemple #18
0
def test_dynamic_with_op():
    @op
    def passthrough(_ctx, _dep=None):
        pass

    @op(output_defs=[DynamicOutputDefinition()])
    def emit():
        yield DynamicOutput(1, mapping_key="key_1")
        yield DynamicOutput(2, mapping_key="key_2")

    @graph
    def test_graph():
        emit().map(passthrough)

    assert test_graph.execute_in_process().success
Exemple #19
0
def test_multi_composite_out():
    with pytest.raises(
            DagsterInvalidDefinitionError,
            match="cannot be downstream of more than one dynamic output",
    ):

        @composite_solid(output_defs=[DynamicOutputDefinition()])
        def composed_echo():
            return dynamic_solid().map(echo)

        @pipeline
        def _should_fail():
            def _complex(item):
                composed_echo().map(lambda y: add(y, item))

            dynamic_solid().map(_complex)
Exemple #20
0
def test_dynamic_output_gen():
    @solid(output_defs=[
        DynamicOutputDefinition(name="a", is_required=False),
        OutputDefinition(name="b", is_required=False),
    ])
    def my_dynamic():
        yield DynamicOutput(value=1, mapping_key="1", output_name="a")
        yield DynamicOutput(value=2, mapping_key="2", output_name="a")
        yield Output(value="foo", output_name="b")

    a1, a2, b = my_dynamic()
    assert a1.value == 1
    assert a1.mapping_key == "1"
    assert a2.value == 2
    assert a2.mapping_key == "2"

    assert b.value == "foo"
Exemple #21
0
def test_composite_multi_out():
    @composite_solid(output_defs=[
        OutputDefinition(Any, "one"),
        DynamicOutputDefinition(Any, "numbers")
    ])
    def multi_out():
        one = emit_one()
        numbers = dynamic_numbers()
        return {"one": one, "numbers": numbers}

    @pipeline
    def composite_multi():
        one, numbers = multi_out()
        echo(one)
        numbers.map(echo)

    result = execute_pipeline(composite_multi)
    assert result.success
Exemple #22
0
def define_inty_job():
    @op(output_defs=[OutputDefinition(Int)])
    def return_one():
        return 1

    @op(
        input_defs=[InputDefinition("num", Int)],
        output_defs=[DynamicOutputDefinition(Int)],
    )
    def add_one(num):
        yield DynamicOutput(num + 1, "foo")
        yield DynamicOutput(num + 1, "bar")

    @graph
    def basic_external_plan_execution():
        add_one(return_one())

    return basic_external_plan_execution.to_job(
        resource_defs={"io_manager": adls2_pickle_io_manager, "adls2": adls2_resource}
    )
def test_fan_in_skips():
    @solid(output_defs=[
        OutputDefinition(name="nums"),
        OutputDefinition(name="empty"),
        OutputDefinition(name="skip", is_required=False),
    ])
    def fork_logic():
        yield Output([1, 2, 3], output_name="nums")
        yield Output([], output_name="empty")

    @solid(output_defs=[DynamicOutputDefinition(int)])
    def emit_dyn(vector):
        for i in vector:
            yield DynamicOutput(value=i, mapping_key=f"input_{i}")

    @solid
    def total(items):
        return sum(items)

    @pipeline
    def dyn_fork():
        nums, empty, skip = fork_logic()
        total.alias("grand_total")([
            total.alias("nums_total")(emit_dyn(nums).map(echo).collect()),
            total.alias("empty_total")(emit_dyn(empty).map(echo).collect()),
            total.alias("skip_total")(emit_dyn(skip).map(echo).collect()),
        ])

    result = execute_pipeline(dyn_fork)
    assert result.success

    assert result.result_for_solid("nums_total").success
    assert result.result_for_solid("empty_total").success

    assert result.result_for_solid(
        "skip_total").success  # arguably should be skip

    assert result.result_for_solid("grand_total").success
    assert result.result_for_solid("grand_total").output_value() == 6
Exemple #24
0
def test_direct_dep():
    @solid(output_defs=[DynamicOutputDefinition()])
    def dynamic_add(_, x):
        yield DynamicOutput(x + 1, mapping_key="1")
        yield DynamicOutput(x + 2, mapping_key="2")

    @pipeline
    def _is_fine():
        def _add(item):
            dynamic_add(item)

        dynamic_solid().map(_add)

    with pytest.raises(
            DagsterInvalidDefinitionError,
            match="cannot be downstream of more than one dynamic output",
    ):

        @pipeline
        def _should_fail():
            def _add_echo(item):
                dynamic_add(item).map(echo)

            dynamic_solid().map(_add_echo)

    @pipeline
    def _is_fine():
        dynamic_solid().map(dynamic_add)

    with pytest.raises(
            DagsterInvalidDefinitionError,
            match="cannot be downstream of more than one dynamic output",
    ):

        @pipeline
        def _should_fail():
            echo(dynamic_solid().map(dynamic_add).collect())

@solid
def echo(_, x: int) -> int:
    return x


@solid(config_schema={
    "range": Field(int, is_required=False, default_value=3),
})
def num_range(context) -> int:
    return context.solid_config["range"]


@solid(
    output_defs=[DynamicOutputDefinition()],
    config_schema={
        "fail": Field(bool, is_required=False, default_value=False),
    },
    tags={"first": "1"},
)
def emit(context, num: int = 3):
    if context.solid_config["fail"]:
        raise Exception("FAILURE")

    for i in range(num):
        yield DynamicOutput(value=i, mapping_key=str(i))


@solid
def sum_numbers(_, nums):
Exemple #26
0
# pylint: disable=unused-argument, no-value-for-parameter

# start_marker
import os
from typing import List

from dagster import DynamicOutput, DynamicOutputDefinition, Field, pipeline, solid
from dagster.utils import file_relative_path


@solid(
    config_schema={
        "path": Field(str,
                      default_value=file_relative_path(__file__, "sample"))
    },
    output_defs=[DynamicOutputDefinition(str)],
)
def files_in_directory(context):
    path = context.solid_config["path"]
    dirname, _, filenames = next(os.walk(path))
    for file in filenames:
        yield DynamicOutput(
            value=os.path.join(dirname, file),
            # create a mapping key from the file name
            mapping_key=file.replace(".", "_").replace("-", "_"),
        )


@solid
def process_file(path: str) -> int:
    # simple example of calculating size
Exemple #27
0
    """
    id_range, metadata_entries = _id_range_for_time(
        context.resources.partition_start,
        context.resources.partition_end,
        context.resources.hn_client,
    )
    yield Output(id_range, metadata_entries=metadata_entries)


@solid(
    config_schema={"batch_size": Field(int, is_required=False)},
    required_resource_keys={"hn_client", "partition_start", "partition_end"},
    output_defs=[
        DynamicOutputDefinition(
            Tuple[int, int],
            description=
            "A dynamic set of id ranges that cover the range for the partition, divided by batch_size config if provided.",
        )
    ],
)
def dynamic_id_ranges_for_time(context):
    """
    For the configured partition start/end, searches for the range of ids that were created in that time
    """
    id_range, metadata_entries = _id_range_for_time(
        context.resources.partition_start,
        context.resources.partition_end,
        context.resources.hn_client,
    )

    start_id, end_id = id_range
Exemple #28
0
def test_tags_to_dynamic_plan():
    @solid(
        tags={
            USER_DEFINED_K8S_CONFIG_KEY: {
                "container_config": {
                    "resources": {
                        "requests": {
                            "cpu": "500m",
                            "memory": "128Mi"
                        },
                        "limits": {
                            "cpu": "1000m",
                            "memory": "1Gi"
                        },
                    }
                }
            }
        })
    def multiply_inputs(_, x):
        return 2 * x

    @solid(
        tags={
            USER_DEFINED_K8S_CONFIG_KEY: {
                "container_config": {
                    "resources": {
                        "requests": {
                            "cpu": "250m",
                            "memory": "64Mi"
                        },
                        "limits": {
                            "cpu": "500m",
                            "memory": "2560Mi"
                        },
                    }
                }
            }
        },
        output_defs=[DynamicOutputDefinition()],
    )
    def emit(_):
        for i in range(3):
            yield DynamicOutput(value=i, mapping_key=str(i))

    @pipeline
    def k8s_ready():
        return emit().map(multiply_inputs)

    known_state = KnownExecutionState(
        {},
        {
            emit.name: {
                "result": ["0", "1", "2"]
            },
        },
    )
    plan = create_execution_plan(k8s_ready, known_state=known_state)

    emit_step = plan.get_step_by_key(emit.name)
    user_defined_k8s_config = get_user_defined_k8s_config(emit_step.tags)

    assert user_defined_k8s_config.container_config
    assert user_defined_k8s_config.container_config["resources"]

    resources = user_defined_k8s_config.container_config["resources"]

    assert resources["requests"]["cpu"] == "250m"
    assert resources["requests"]["memory"] == "64Mi"
    assert resources["limits"]["cpu"] == "500m"
    assert resources["limits"]["memory"] == "2560Mi"

    for mapping_key in range(3):
        multiply_inputs_step = plan.get_step_by_key(
            f"{multiply_inputs.name}[{mapping_key}]")
        dynamic_step_user_defined_k8s_config = get_user_defined_k8s_config(
            multiply_inputs_step.tags)

        assert dynamic_step_user_defined_k8s_config.container_config
        assert dynamic_step_user_defined_k8s_config.container_config[
            "resources"]

        resources = dynamic_step_user_defined_k8s_config.container_config[
            "resources"]

        assert resources["requests"]["cpu"] == "500m"
        assert resources["requests"]["memory"] == "128Mi"
        assert resources["limits"]["cpu"] == "1000m"
        assert resources["limits"]["memory"] == "1Gi"