def _define_dynamic_job(launch_initial, launch_final): from typing import List initial_launcher = (local_external_step_launcher if launch_initial else ResourceDefinition.mock_resource()) final_launcher = (local_external_step_launcher if launch_final else ResourceDefinition.mock_resource()) @op(required_resource_keys={"initial_launcher"}, out=DynamicOut(int)) def dynamic_outs(): for i in range(0, 3): yield DynamicOutput(value=i, mapping_key=f"num_{i}") @op def increment(i): return i + 1 @op(required_resource_keys={"final_launcher"}) def total(ins: List[int]): return sum(ins) @job( resource_defs={ "initial_launcher": initial_launcher, "final_launcher": final_launcher, "io_manager": fs_io_manager, }) def my_job(): all_incs = dynamic_outs().map(increment) total(all_incs.collect()) return my_job
def define_dynamic_job(): from typing import List @op(required_resource_keys={"first_step_launcher"}, out=DynamicOut(int)) def dynamic_outs(): for i in range(0, 3): yield DynamicOutput(value=i, mapping_key=f"num_{i}") @op(required_resource_keys={"second_step_launcher"}) def increment(i): return i + 1 @op def total(ins: List[int]): return sum(ins) @job( resource_defs={ "first_step_launcher": local_external_step_launcher, "second_step_launcher": local_external_step_launcher, "io_manager": fs_io_manager, }) def my_job(): all_incs = dynamic_outs().map(increment) total(all_incs.collect()) return my_job
def test_dynamic(gcs_bucket): @op(out=DynamicOut()) def numbers(): for i in range(3): yield DynamicOutput(i, mapping_key=str(i)) @op def echo(_, x): return x @job(resource_defs={ "io_manager": gcs_pickle_io_manager, "gcs": mock_gcs_resource }) def dynamic(): numbers().map(echo) # pylint: disable=no-member result = dynamic.execute_in_process(run_config={ "resources": { "io_manager": { "config": { "gcs_bucket": gcs_bucket } } } }) assert result.success
def test_log_metadata_after_dynamic_output(): @op(out=DynamicOut()) def the_op(context): yield DynamicOutput(1, mapping_key="one") context.add_output_metadata({"foo": "bar"}, mapping_key="one") with pytest.raises( DagsterInvariantViolationError, match= "In op 'the_op', attempted to log output metadata for output 'result' with mapping_key 'one' which has already been yielded. Metadata must be logged before the output is yielded.", ): list(the_op(build_op_context()))
def test_basic_op(): @op(out=DynamicOut()) def should_work(_): yield DynamicOutput(1, mapping_key="1") yield DynamicOutput(2, mapping_key="2") result = execute_solid(should_work) assert result.success assert len(result.get_output_events_for_compute()) == 2 assert len(result.compute_output_events_dict["result"]) == 2 assert result.output_values == {"result": {"1": 1, "2": 2}} assert result.output_value() == {"1": 1, "2": 2}
def test_log_metadata_multiple_dynamic_outputs(): @op(out={"out1": DynamicOut(), "out2": DynamicOut()}) def the_op(context): context.add_output_metadata({"one": "one"}, output_name="out1", mapping_key="one") yield DynamicOutput(value=1, output_name="out1", mapping_key="one") context.add_output_metadata({"two": "two"}, output_name="out1", mapping_key="two") context.add_output_metadata({"three": "three"}, output_name="out2", mapping_key="three") yield DynamicOutput(value=2, output_name="out1", mapping_key="two") yield DynamicOutput(value=3, output_name="out2", mapping_key="three") context.add_output_metadata({"four": "four"}, output_name="out2", mapping_key="four") yield DynamicOutput(value=4, output_name="out2", mapping_key="four") result = execute_op_in_graph(the_op) assert result.success events = result.all_node_events output_event_one = events[1] assert output_event_one.event_specific_data.mapping_key == "one" assert output_event_one.event_specific_data.metadata_entries[ 0].label == "one" output_event_two = events[3] assert output_event_two.event_specific_data.mapping_key == "two" assert output_event_two.event_specific_data.metadata_entries[ 0].label == "two" output_event_three = events[5] assert output_event_three.event_specific_data.mapping_key == "three" assert output_event_three.event_specific_data.metadata_entries[ 0].label == "three" output_event_four = events[7] assert output_event_four.event_specific_data.mapping_key == "four" assert output_event_four.event_specific_data.metadata_entries[ 0].label == "four"
def test_op_selection_on_dynamic_orchestration(): @op def num_range(): return 3 @op(out=DynamicOut()) def emit(num: int = 2): for i in range(num): yield DynamicOutput(value=i, mapping_key=str(i)) @op def emit_ten(_): return 10 @op def multiply_by_two(context, y): context.log.info("multiply_by_two is returning " + str(y * 2)) return y * 2 @op def multiply_inputs(context, y, ten): context.log.info("multiply_inputs is returning " + str(y * ten)) return y * ten @op def sum_numbers(_, nums): return sum(nums) @op def echo(_, x: int) -> int: return x @graph def dynamic_graph(): numbers = emit(num_range()) dynamic = numbers.map(lambda num: multiply_by_two(multiply_inputs(num, emit_ten()))) n = sum_numbers(dynamic.collect()) echo(n) # test transitive downstream of collect full_job = dynamic_graph.to_job() result = full_job.execute_in_process() assert result.success assert result.output_for_node("echo") == 60 result = full_job.execute_in_process( op_selection=["emit*", "emit_ten"], ) assert result.success assert result.output_for_node("echo") == 20
def test_log_metadata_multiple_dynamic_outputs(): @op(out={"out1": DynamicOut(), "out2": DynamicOut()}) def the_op(context): context.add_output_metadata({"one": "one"}, output_name="out1", mapping_key="one") yield DynamicOutput(value=1, output_name="out1", mapping_key="one") context.add_output_metadata({"two": "two"}, output_name="out1", mapping_key="two") context.add_output_metadata({"three": "three"}, output_name="out2", mapping_key="three") yield DynamicOutput(value=2, output_name="out1", mapping_key="two") yield DynamicOutput(value=3, output_name="out2", mapping_key="three") context.add_output_metadata({"four": "four"}, output_name="out2", mapping_key="four") yield DynamicOutput(value=4, output_name="out2", mapping_key="four") context = build_op_context() events = list(the_op(context)) assert len(events) == 4 assert context.get_output_metadata("out1", mapping_key="one") == { "one": "one" } assert context.get_output_metadata("out1", mapping_key="two") == { "two": "two" } assert context.get_output_metadata("out2", mapping_key="three") == { "three": "three" } assert context.get_output_metadata("out2", mapping_key="four") == { "four": "four" }
def test_dynamic_output_values(): @op(out=DynamicOut()) def two_outs(): yield DynamicOutput(1, "a") yield DynamicOutput(2, "b") @op def add_one(x): return x + 1 @graph def a(): two_outs().map(add_one) result = a.execute_in_process() assert result.success assert result.output_for_node("two_outs") == {"a": 1, "b": 2} assert result.output_for_node("add_one") == {"a": 2, "b": 3}
def test_dynamic_memoization_error(): class MyVersionStrategy(VersionStrategy): def get_solid_version(self, _): return "foo" def get_resource_version(self, _): return "foo" @op(out=DynamicOut()) def emit(): yield DynamicOutput(1, mapping_key="one") yield DynamicOutput(2, mapping_key="two") @op def return_input(x): return x @graph def dynamic_graph(): x = emit().map(return_input) # pylint: disable=no-member return_input(x.collect()) @graph def just_mapping_graph(): emit().map(return_input) # pylint: disable=no-member with instance_for_test() as instance: for cur_graph in [dynamic_graph, just_mapping_graph]: with pytest.raises( DagsterInvariantViolationError, match= "Attempted to use memoization with dynamic orchestration, which is not yet supported.", ): my_job = cur_graph.to_job( version_strategy=MyVersionStrategy(), resource_defs={ "io_manager": versioned_filesystem_io_manager }, ) my_job.execute_in_process(instance=instance)
def test_collect_and_map(): @op(out=DynamicOut()) def dyn_vals(): for i in range(3): yield DynamicOutput(i, mapping_key=f"num_{i}") @op def echo(x): return x @op def add_each(vals, x): return [v + x for v in vals] @graph def both_w_echo(): d1 = dyn_vals() r = d1.map(lambda x: add_each(echo(d1.collect()), x)) echo.alias("final")(r.collect()) result = both_w_echo.execute_in_process() assert result.output_for_node("final") == [[0, 1, 2], [1, 2, 3], [2, 3, 4]]
def test_metadata_dynamic_outputs(): class DummyIOManager(IOManager): def __init__(self): self.values = {} def handle_output(self, context, obj): keys = tuple(context.get_output_identifier()) self.values[keys] = obj yield MetadataEntry("handle_output", value="I come from handle_output") def load_input(self, context): keys = tuple(context.upstream_output.get_output_identifier()) return self.values[keys] @op(out=DynamicOut(asset_key=AssetKey(["foo"]))) def the_op(): yield DynamicOutput(1, mapping_key="one", metadata={"one": "blah"}) yield DynamicOutput(2, mapping_key="two", metadata={"two": "blah"}) @graph def the_graph(): the_op() result = the_graph.execute_in_process( resources={"io_manager": DummyIOManager()}) materializations = result.asset_materializations_for_node("the_op") assert len(materializations) == 2 for materialization in materializations: assert materialization.metadata_entries[1].label == "handle_output" assert materialization.metadata_entries[ 1].entry_data.text == "I come from handle_output" assert materializations[0].metadata_entries[0].label == "one" assert materializations[1].metadata_entries[0].label == "two"
# pylint: disable=unused-argument, no-value-for-parameter, no-member # start_marker import os from typing import List from dagster import DynamicOut, DynamicOutput, Field, job, op from dagster.utils import file_relative_path @op( config_schema={ "path": Field(str, default_value=file_relative_path(__file__, "sample")) }, out=DynamicOut(str), ) def files_in_directory(context): path = context.op_config["path"] dirname, _, filenames = next(os.walk(path)) for file in filenames: yield DynamicOutput( value=os.path.join(dirname, file), # create a mapping key from the file name mapping_key=file.replace(".", "_").replace("-", "_"), ) @op def process_file(path: str) -> int: # simple example of calculating size
def emit(): yield DynamicOutput(1, mapping_key="key_1") yield DynamicOutput(2, mapping_key="key_2") @graph def test_graph(): emit().map(passthrough) assert test_graph.execute_in_process().success class DangerNoodle(NamedTuple): x: int @op(out={"items": DynamicOut(), "refs": Out()}) def spawn(): for i in range(10): yield DynamicOutput(DangerNoodle(i), output_name="items", mapping_key=f"num_{i}") gc.collect() yield Output(len(objgraph.by_type("DangerNoodle")), output_name="refs") @job() def no_leaks_plz(): spawn()
@op def add(a, b): return a + b @op def echo(x): return x @op def process(results): return sum(results) @op(out=DynamicOut()) def dynamic_values(): for i in range(2): yield DynamicOutput(i, mapping_key=f"num_{i}") @op( out={ "values": DynamicOut(), "negatives": DynamicOut(), }, ) def multiple_dynamic_values(): for i in range(2): yield DynamicOutput(i, output_name="values", mapping_key=f"num_{i}") yield DynamicOutput(-i, output_name="negatives",
import time from dagster import DynamicOut, Out, job, op from dagster.core.definitions.events import DynamicOutput, Output from .test_step_delegating_executor import test_step_delegating_executor @op(out=DynamicOut(str)) def dynamic(): for x in ["a", "b"]: yield DynamicOutput(x, x) @op(out=Out(is_required=False)) def optional(x): if x == "a": yield Output(x) @op def final(context, xs): context.log.info(xs) def define_dynamic_skipping_job(): @job(executor_def=test_step_delegating_executor) def dynamic_skipping_job(): xs = dynamic().map(optional) xs.map(final)
if y == 2 and current_run.parent_run_id is None: raise Exception() context.log.info("echo is returning " + str(y * ten)) return y * ten @op def emit_ten(): return 10 @op def sum_numbers(base, nums): return base + sum(nums) @op(out=DynamicOut()) def emit(): for i in range(3): yield DynamicOutput(value=i, mapping_key=str(i)) @graph def dynamic(): # pylint: disable=no-member result = emit().map(lambda num: multiply_by_two(multiply_inputs(num, emit_ten()))) multiply_by_two.alias("double_total")(sum_numbers(emit_ten(), result.collect())) dynamic_job = dynamic.to_job()