Ejemplo n.º 1
0
def _define_dynamic_job(launch_initial, launch_final):
    from typing import List

    initial_launcher = (local_external_step_launcher if launch_initial else
                        ResourceDefinition.mock_resource())
    final_launcher = (local_external_step_launcher
                      if launch_final else ResourceDefinition.mock_resource())

    @op(required_resource_keys={"initial_launcher"}, out=DynamicOut(int))
    def dynamic_outs():
        for i in range(0, 3):
            yield DynamicOutput(value=i, mapping_key=f"num_{i}")

    @op
    def increment(i):
        return i + 1

    @op(required_resource_keys={"final_launcher"})
    def total(ins: List[int]):
        return sum(ins)

    @job(
        resource_defs={
            "initial_launcher": initial_launcher,
            "final_launcher": final_launcher,
            "io_manager": fs_io_manager,
        })
    def my_job():
        all_incs = dynamic_outs().map(increment)
        total(all_incs.collect())

    return my_job
Ejemplo n.º 2
0
def define_dynamic_job():
    from typing import List

    @op(required_resource_keys={"first_step_launcher"}, out=DynamicOut(int))
    def dynamic_outs():
        for i in range(0, 3):
            yield DynamicOutput(value=i, mapping_key=f"num_{i}")

    @op(required_resource_keys={"second_step_launcher"})
    def increment(i):
        return i + 1

    @op
    def total(ins: List[int]):
        return sum(ins)

    @job(
        resource_defs={
            "first_step_launcher": local_external_step_launcher,
            "second_step_launcher": local_external_step_launcher,
            "io_manager": fs_io_manager,
        })
    def my_job():
        all_incs = dynamic_outs().map(increment)
        total(all_incs.collect())

    return my_job
Ejemplo n.º 3
0
def test_dynamic(gcs_bucket):
    @op(out=DynamicOut())
    def numbers():
        for i in range(3):
            yield DynamicOutput(i, mapping_key=str(i))

    @op
    def echo(_, x):
        return x

    @job(resource_defs={
        "io_manager": gcs_pickle_io_manager,
        "gcs": mock_gcs_resource
    })
    def dynamic():
        numbers().map(echo)  # pylint: disable=no-member

    result = dynamic.execute_in_process(run_config={
        "resources": {
            "io_manager": {
                "config": {
                    "gcs_bucket": gcs_bucket
                }
            }
        }
    })
    assert result.success
Ejemplo n.º 4
0
def test_log_metadata_after_dynamic_output():
    @op(out=DynamicOut())
    def the_op(context):
        yield DynamicOutput(1, mapping_key="one")
        context.add_output_metadata({"foo": "bar"}, mapping_key="one")

    with pytest.raises(
            DagsterInvariantViolationError,
            match=
            "In op 'the_op', attempted to log output metadata for output 'result' with mapping_key 'one' which has already been yielded. Metadata must be logged before the output is yielded.",
    ):
        list(the_op(build_op_context()))
Ejemplo n.º 5
0
def test_basic_op():
    @op(out=DynamicOut())
    def should_work(_):
        yield DynamicOutput(1, mapping_key="1")
        yield DynamicOutput(2, mapping_key="2")

    result = execute_solid(should_work)

    assert result.success
    assert len(result.get_output_events_for_compute()) == 2
    assert len(result.compute_output_events_dict["result"]) == 2
    assert result.output_values == {"result": {"1": 1, "2": 2}}
    assert result.output_value() == {"1": 1, "2": 2}
Ejemplo n.º 6
0
def test_log_metadata_multiple_dynamic_outputs():
    @op(out={"out1": DynamicOut(), "out2": DynamicOut()})
    def the_op(context):
        context.add_output_metadata({"one": "one"},
                                    output_name="out1",
                                    mapping_key="one")
        yield DynamicOutput(value=1, output_name="out1", mapping_key="one")
        context.add_output_metadata({"two": "two"},
                                    output_name="out1",
                                    mapping_key="two")
        context.add_output_metadata({"three": "three"},
                                    output_name="out2",
                                    mapping_key="three")
        yield DynamicOutput(value=2, output_name="out1", mapping_key="two")
        yield DynamicOutput(value=3, output_name="out2", mapping_key="three")
        context.add_output_metadata({"four": "four"},
                                    output_name="out2",
                                    mapping_key="four")
        yield DynamicOutput(value=4, output_name="out2", mapping_key="four")

    result = execute_op_in_graph(the_op)
    assert result.success
    events = result.all_node_events
    output_event_one = events[1]
    assert output_event_one.event_specific_data.mapping_key == "one"
    assert output_event_one.event_specific_data.metadata_entries[
        0].label == "one"
    output_event_two = events[3]
    assert output_event_two.event_specific_data.mapping_key == "two"
    assert output_event_two.event_specific_data.metadata_entries[
        0].label == "two"
    output_event_three = events[5]
    assert output_event_three.event_specific_data.mapping_key == "three"
    assert output_event_three.event_specific_data.metadata_entries[
        0].label == "three"
    output_event_four = events[7]
    assert output_event_four.event_specific_data.mapping_key == "four"
    assert output_event_four.event_specific_data.metadata_entries[
        0].label == "four"
Ejemplo n.º 7
0
def test_op_selection_on_dynamic_orchestration():
    @op
    def num_range():
        return 3

    @op(out=DynamicOut())
    def emit(num: int = 2):
        for i in range(num):
            yield DynamicOutput(value=i, mapping_key=str(i))

    @op
    def emit_ten(_):
        return 10

    @op
    def multiply_by_two(context, y):
        context.log.info("multiply_by_two is returning " + str(y * 2))
        return y * 2

    @op
    def multiply_inputs(context, y, ten):
        context.log.info("multiply_inputs is returning " + str(y * ten))
        return y * ten

    @op
    def sum_numbers(_, nums):
        return sum(nums)

    @op
    def echo(_, x: int) -> int:
        return x

    @graph
    def dynamic_graph():
        numbers = emit(num_range())
        dynamic = numbers.map(lambda num: multiply_by_two(multiply_inputs(num, emit_ten())))
        n = sum_numbers(dynamic.collect())
        echo(n)  # test transitive downstream of collect

    full_job = dynamic_graph.to_job()
    result = full_job.execute_in_process()
    assert result.success
    assert result.output_for_node("echo") == 60

    result = full_job.execute_in_process(
        op_selection=["emit*", "emit_ten"],
    )
    assert result.success
    assert result.output_for_node("echo") == 20
Ejemplo n.º 8
0
def test_log_metadata_multiple_dynamic_outputs():
    @op(out={"out1": DynamicOut(), "out2": DynamicOut()})
    def the_op(context):
        context.add_output_metadata({"one": "one"},
                                    output_name="out1",
                                    mapping_key="one")
        yield DynamicOutput(value=1, output_name="out1", mapping_key="one")
        context.add_output_metadata({"two": "two"},
                                    output_name="out1",
                                    mapping_key="two")
        context.add_output_metadata({"three": "three"},
                                    output_name="out2",
                                    mapping_key="three")
        yield DynamicOutput(value=2, output_name="out1", mapping_key="two")
        yield DynamicOutput(value=3, output_name="out2", mapping_key="three")
        context.add_output_metadata({"four": "four"},
                                    output_name="out2",
                                    mapping_key="four")
        yield DynamicOutput(value=4, output_name="out2", mapping_key="four")

    context = build_op_context()

    events = list(the_op(context))
    assert len(events) == 4
    assert context.get_output_metadata("out1", mapping_key="one") == {
        "one": "one"
    }
    assert context.get_output_metadata("out1", mapping_key="two") == {
        "two": "two"
    }
    assert context.get_output_metadata("out2", mapping_key="three") == {
        "three": "three"
    }
    assert context.get_output_metadata("out2", mapping_key="four") == {
        "four": "four"
    }
Ejemplo n.º 9
0
def test_dynamic_output_values():
    @op(out=DynamicOut())
    def two_outs():
        yield DynamicOutput(1, "a")
        yield DynamicOutput(2, "b")

    @op
    def add_one(x):
        return x + 1

    @graph
    def a():
        two_outs().map(add_one)

    result = a.execute_in_process()

    assert result.success
    assert result.output_for_node("two_outs") == {"a": 1, "b": 2}
    assert result.output_for_node("add_one") == {"a": 2, "b": 3}
Ejemplo n.º 10
0
def test_dynamic_memoization_error():
    class MyVersionStrategy(VersionStrategy):
        def get_solid_version(self, _):
            return "foo"

        def get_resource_version(self, _):
            return "foo"

    @op(out=DynamicOut())
    def emit():
        yield DynamicOutput(1, mapping_key="one")
        yield DynamicOutput(2, mapping_key="two")

    @op
    def return_input(x):
        return x

    @graph
    def dynamic_graph():
        x = emit().map(return_input)  # pylint: disable=no-member
        return_input(x.collect())

    @graph
    def just_mapping_graph():
        emit().map(return_input)  # pylint: disable=no-member

    with instance_for_test() as instance:
        for cur_graph in [dynamic_graph, just_mapping_graph]:
            with pytest.raises(
                    DagsterInvariantViolationError,
                    match=
                    "Attempted to use memoization with dynamic orchestration, which is not yet supported.",
            ):
                my_job = cur_graph.to_job(
                    version_strategy=MyVersionStrategy(),
                    resource_defs={
                        "io_manager": versioned_filesystem_io_manager
                    },
                )

                my_job.execute_in_process(instance=instance)
Ejemplo n.º 11
0
def test_collect_and_map():
    @op(out=DynamicOut())
    def dyn_vals():
        for i in range(3):
            yield DynamicOutput(i, mapping_key=f"num_{i}")

    @op
    def echo(x):
        return x

    @op
    def add_each(vals, x):
        return [v + x for v in vals]

    @graph
    def both_w_echo():
        d1 = dyn_vals()
        r = d1.map(lambda x: add_each(echo(d1.collect()), x))
        echo.alias("final")(r.collect())

    result = both_w_echo.execute_in_process()
    assert result.output_for_node("final") == [[0, 1, 2], [1, 2, 3], [2, 3, 4]]
Ejemplo n.º 12
0
def test_metadata_dynamic_outputs():
    class DummyIOManager(IOManager):
        def __init__(self):
            self.values = {}

        def handle_output(self, context, obj):
            keys = tuple(context.get_output_identifier())
            self.values[keys] = obj

            yield MetadataEntry("handle_output",
                                value="I come from handle_output")

        def load_input(self, context):
            keys = tuple(context.upstream_output.get_output_identifier())
            return self.values[keys]

    @op(out=DynamicOut(asset_key=AssetKey(["foo"])))
    def the_op():
        yield DynamicOutput(1, mapping_key="one", metadata={"one": "blah"})
        yield DynamicOutput(2, mapping_key="two", metadata={"two": "blah"})

    @graph
    def the_graph():
        the_op()

    result = the_graph.execute_in_process(
        resources={"io_manager": DummyIOManager()})
    materializations = result.asset_materializations_for_node("the_op")
    assert len(materializations) == 2
    for materialization in materializations:
        assert materialization.metadata_entries[1].label == "handle_output"
        assert materialization.metadata_entries[
            1].entry_data.text == "I come from handle_output"

    assert materializations[0].metadata_entries[0].label == "one"
    assert materializations[1].metadata_entries[0].label == "two"
Ejemplo n.º 13
0
# pylint: disable=unused-argument, no-value-for-parameter, no-member

# start_marker
import os
from typing import List

from dagster import DynamicOut, DynamicOutput, Field, job, op
from dagster.utils import file_relative_path


@op(
    config_schema={
        "path": Field(str,
                      default_value=file_relative_path(__file__, "sample"))
    },
    out=DynamicOut(str),
)
def files_in_directory(context):
    path = context.op_config["path"]
    dirname, _, filenames = next(os.walk(path))
    for file in filenames:
        yield DynamicOutput(
            value=os.path.join(dirname, file),
            # create a mapping key from the file name
            mapping_key=file.replace(".", "_").replace("-", "_"),
        )


@op
def process_file(path: str) -> int:
    # simple example of calculating size
Ejemplo n.º 14
0
    def emit():
        yield DynamicOutput(1, mapping_key="key_1")
        yield DynamicOutput(2, mapping_key="key_2")

    @graph
    def test_graph():
        emit().map(passthrough)

    assert test_graph.execute_in_process().success


class DangerNoodle(NamedTuple):
    x: int


@op(out={"items": DynamicOut(), "refs": Out()})
def spawn():
    for i in range(10):
        yield DynamicOutput(DangerNoodle(i),
                            output_name="items",
                            mapping_key=f"num_{i}")

    gc.collect()
    yield Output(len(objgraph.by_type("DangerNoodle")), output_name="refs")


@job()
def no_leaks_plz():
    spawn()

Ejemplo n.º 15
0
@op
def add(a, b):
    return a + b


@op
def echo(x):
    return x


@op
def process(results):
    return sum(results)


@op(out=DynamicOut())
def dynamic_values():
    for i in range(2):
        yield DynamicOutput(i, mapping_key=f"num_{i}")


@op(
    out={
        "values": DynamicOut(),
        "negatives": DynamicOut(),
    }, )
def multiple_dynamic_values():
    for i in range(2):
        yield DynamicOutput(i, output_name="values", mapping_key=f"num_{i}")
        yield DynamicOutput(-i,
                            output_name="negatives",
Ejemplo n.º 16
0
import time

from dagster import DynamicOut, Out, job, op
from dagster.core.definitions.events import DynamicOutput, Output

from .test_step_delegating_executor import test_step_delegating_executor


@op(out=DynamicOut(str))
def dynamic():
    for x in ["a", "b"]:
        yield DynamicOutput(x, x)


@op(out=Out(is_required=False))
def optional(x):
    if x == "a":
        yield Output(x)


@op
def final(context, xs):
    context.log.info(xs)


def define_dynamic_skipping_job():
    @job(executor_def=test_step_delegating_executor)
    def dynamic_skipping_job():
        xs = dynamic().map(optional)
        xs.map(final)
Ejemplo n.º 17
0
        if y == 2 and current_run.parent_run_id is None:
            raise Exception()
    context.log.info("echo is returning " + str(y * ten))
    return y * ten


@op
def emit_ten():
    return 10


@op
def sum_numbers(base, nums):
    return base + sum(nums)


@op(out=DynamicOut())
def emit():
    for i in range(3):
        yield DynamicOutput(value=i, mapping_key=str(i))


@graph
def dynamic():
    # pylint: disable=no-member
    result = emit().map(lambda num: multiply_by_two(multiply_inputs(num, emit_ten())))
    multiply_by_two.alias("double_total")(sum_numbers(emit_ten(), result.collect()))


dynamic_job = dynamic.to_job()