def test_no_outputs_no_inputs_config_schema():
    assert ResolvedRunConfig.build(no_input_no_output_pipeline())

    with pytest.raises(DagsterInvalidConfigError) as exc_context:
        ResolvedRunConfig.build(no_input_no_output_pipeline(), {"solids": {"return_one": {}}})

    assert len(exc_context.value.errors) == 1
    assert (
        'Error 1: Received unexpected config entry "return_one" at path root:solids'
        in exc_context.value.message
    )
def test_basic_json_misnamed_output_config_schema():
    with pytest.raises(DagsterInvalidConfigError) as exc_context:
        ResolvedRunConfig.build(
            single_int_named_output_pipeline(),
            {
                "solids": {
                    "return_named_one": {"outputs": [{"wrong_name": {"json": {"path": "foo"}}}]}
                }
            },
        )

    assert len(exc_context.value.errors) == 1
    assert 'Error 1: Received unexpected config entry "wrong_name"' in exc_context.value.message
    assert "at path root:solids:return_named_one:outputs[0]" in exc_context.value.message
def test_basic_json_default_output_config_schema():
    env = ResolvedRunConfig.build(
        single_int_output_pipeline(),
        {
            "solids": {
                "return_one": {
                    "outputs": [{
                        "result": {
                            "json": {
                                "path": "foo"
                            }
                        }
                    }]
                }
            }
        },
    )

    assert env.solids["return_one"]
    assert env.solids["return_one"].outputs.type_materializer_specs == [{
        "result": {
            "json": {
                "path": "foo"
            }
        }
    }]
def test_execution_plan_reexecution_with_in_memory():
    pipeline_def = define_addy_pipeline()
    instance = DagsterInstance.ephemeral()
    run_config = {"solids": {"add_one": {"inputs": {"num": {"value": 3}}}}}
    result = execute_pipeline(pipeline_def, run_config=run_config, instance=instance)

    assert result.success

    ## re-execute add_two

    resolved_run_config = ResolvedRunConfig.build(pipeline_def, run_config=run_config)
    execution_plan = ExecutionPlan.build(InMemoryPipeline(pipeline_def), resolved_run_config)

    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline_def,
        execution_plan=execution_plan,
        run_config=run_config,
        parent_run_id=result.run_id,
        root_run_id=result.run_id,
    )

    with pytest.raises(DagsterInvariantViolationError):
        execute_plan(
            execution_plan.build_subset_plan(["add_two"], pipeline_def, resolved_run_config),
            InMemoryPipeline(pipeline_def),
            run_config=run_config,
            pipeline_run=pipeline_run,
            instance=instance,
        )
Exemple #5
0
def test_compile():
    resolved_run_config = ResolvedRunConfig.build(
        composition,
        {"solids": {
            "add_four": {
                "inputs": {
                    "num": {
                        "value": 1
                    }
                }
            }
        }},
    )

    plan = ExecutionPlan.build(InMemoryPipeline(composition),
                               resolved_run_config)

    res = coalesce_execution_steps(plan)
    assert set(res.keys()) == {
        "add_four.add",
        "div_four.div_two",
        "div_four.div_two_2",
        "add_four.emit_two.emit_one_2",
        "add_four.emit_two_2.add",
        "int_to_float",
        "add_four.emit_two_2.emit_one_2",
        "add_four.emit_two.add",
        "add_four.emit_two_2.emit_one",
        "add_four.emit_two.emit_one",
    }
Exemple #6
0
def create_execution_plan(
    pipeline: Union[IPipeline, PipelineDefinition],
    run_config: Optional[dict] = None,
    mode: Optional[str] = None,
    step_keys_to_execute: Optional[List[str]] = None,
    known_state: KnownExecutionState = None,
) -> ExecutionPlan:
    pipeline = _check_pipeline(pipeline)
    pipeline_def = pipeline.get_definition()
    check.inst_param(pipeline_def, "pipeline_def", PipelineDefinition)
    run_config = check.opt_dict_param(run_config, "run_config", key_type=str)
    mode = check.opt_str_param(mode,
                               "mode",
                               default=pipeline_def.get_default_mode_name())
    check.opt_nullable_list_param(step_keys_to_execute,
                                  "step_keys_to_execute",
                                  of_type=str)

    resolved_run_config = ResolvedRunConfig.build(pipeline_def,
                                                  run_config,
                                                  mode=mode)

    return ExecutionPlan.build(
        pipeline,
        resolved_run_config,
        step_keys_to_execute=step_keys_to_execute,
        known_state=known_state,
    )
Exemple #7
0
def test_memoized_plan_memoized_results():
    with instance_for_test() as instance:
        manager = VersionedInMemoryIOManager()

        versioned_pipeline = versioned_pipeline_factory(manager)
        plan = create_execution_plan(versioned_pipeline,
                                     instance_ref=instance.get_ref())
        resolved_run_config = ResolvedRunConfig.build(versioned_pipeline)

        # Affix a memoized value to the output
        step_output_handle = StepOutputHandle("versioned_solid_no_input",
                                              "result")
        step_output_version = plan.get_version_for_step_output_handle(
            step_output_handle)
        manager.values[(step_output_handle.step_key,
                        step_output_handle.output_name,
                        step_output_version)] = 4

        memoized_plan = plan.build_memoized_plan(versioned_pipeline,
                                                 resolved_run_config,
                                                 instance=None,
                                                 selected_step_keys=None)

        assert memoized_plan.step_keys_to_execute == [
            "versioned_solid_takes_input"
        ]
def test_resolve_memoized_execution_plan_partial_versioning():
    manager = VersionedInMemoryIOManager()

    partially_versioned_pipeline = partially_versioned_pipeline_factory(
        manager)
    speculative_execution_plan = create_execution_plan(
        partially_versioned_pipeline)

    resolved_run_config = ResolvedRunConfig.build(partially_versioned_pipeline)

    step_output_handle = StepOutputHandle("versioned_solid_no_input", "result")

    step_output_version = resolve_step_output_versions(
        partially_versioned_pipeline, speculative_execution_plan,
        resolved_run_config)[step_output_handle]
    manager.values[(step_output_handle.step_key,
                    step_output_handle.output_name, step_output_version)] = 4

    with DagsterInstance.ephemeral() as instance:
        assert (resolve_memoized_execution_plan(
            speculative_execution_plan,
            partially_versioned_pipeline,
            {},
            instance,
            resolved_run_config,
        ).step_keys_to_execute == ["solid_takes_input"])
Exemple #9
0
def validate_run_config(
    pipeline_def: PipelineDefinition,
    run_config: Optional[Dict[str, Any]] = None,
    mode: Optional[str] = None,
) -> Dict[str, Any]:
    """Function to validate a provided run config blob against a given pipeline and mode.

    If validation is successful, this function will return a dictionary representation of the
    validated config actually used during execution.

    Args:
        pipeline_def (PipelineDefinition): The pipeline definition to validate run config against
        run_config (Optional[Dict[str, Any]]): The run config to validate
        mode (str): The mode of the pipeline to validate against (different modes may require
            different config)

    Returns:
        Dict[str, Any]: A dictionary representation of the validated config.
    """

    pipeline_def = check.inst_param(pipeline_def, "pipeline_def",
                                    PipelineDefinition)
    run_config = check.opt_dict_param(run_config, "run_config", key_type=str)
    mode = check.opt_str_param(mode,
                               "mode",
                               default=pipeline_def.get_default_mode_name())

    return ResolvedRunConfig.build(pipeline_def, run_config,
                                   mode=mode).to_dict()
Exemple #10
0
def test_optional_solid_with_optional_scalar_config():
    def _assert_config_none(context, value):
        assert context.solid_config is value

    pipeline_def = PipelineDefinition(
        name="some_pipeline",
        solid_defs=[
            SolidDefinition(
                name="int_config_solid",
                config_schema=Field(Int, is_required=False),
                input_defs=[],
                output_defs=[],
                compute_fn=lambda context, _inputs: _assert_config_none(
                    context, 234),
            )
        ],
    )

    env_type = create_run_config_schema_type(pipeline_def)

    assert env_type.fields["solids"].is_required is False

    solids_type = env_type.fields["solids"].config_type

    assert solids_type.fields["int_config_solid"].is_required is False

    env_obj = ResolvedRunConfig.build(pipeline_def, {})

    assert env_obj.solids["int_config_solid"].config is None
Exemple #11
0
def test_solid_dictionary_type():
    pipeline_def = define_test_solids_config_pipeline()

    env_obj = ResolvedRunConfig.build(
        pipeline_def,
        {
            "solids": {
                "int_config_solid": {
                    "config": 1
                },
                "string_config_solid": {
                    "config": "bar"
                }
            },
        },
    )

    value = env_obj.solids

    assert set(["int_config_solid",
                "string_config_solid"]) == set(value.keys())
    assert value == {
        "int_config_solid": SolidConfig.from_dict({"config": 1}),
        "string_config_solid": SolidConfig.from_dict({"config": "bar"}),
    }
Exemple #12
0
def test_solid_dictionary_some_no_config():
    @solid(name="int_config_solid",
           config_schema=Int,
           input_defs=[],
           output_defs=[])
    def int_config_solid(_):
        return None

    @solid(name="no_config_solid", input_defs=[], output_defs=[])
    def no_config_solid(_):
        return None

    @pipeline
    def pipeline_def():
        int_config_solid()
        no_config_solid()

    env = ResolvedRunConfig.build(
        pipeline_def, {"solids": {
            "int_config_solid": {
                "config": 1
            }
        }})

    assert {"int_config_solid", "no_config_solid"} == set(env.solids.keys())
    assert env.solids == {
        "int_config_solid": SolidConfig.from_dict({"config": 1}),
        "no_config_solid": SolidConfig.from_dict({}),
    }
def test_using_file_system_for_subplan_invalid_step():
    pipeline = define_inty_pipeline()

    run_config = {"storage": {"filesystem": {}}}

    instance = DagsterInstance.ephemeral()

    resolved_run_config = ResolvedRunConfig.build(
        pipeline,
        run_config=run_config,
    )
    execution_plan = ExecutionPlan.build(
        InMemoryPipeline(pipeline),
        resolved_run_config,
    )

    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline, execution_plan=execution_plan)

    with pytest.raises(DagsterExecutionStepNotFoundError):
        execute_plan(
            execution_plan.build_subset_plan(["nope.compute"], pipeline,
                                             resolved_run_config),
            InMemoryPipeline(pipeline),
            instance,
            run_config=run_config,
            pipeline_run=pipeline_run,
        )
Exemple #14
0
    def _io_manager(init_context):
        if not init_context.pipeline_run:
            raise DagsterInvariantViolationError(
                "Attempted to construct intermediate storage outside of execution context. "
                "Intermediate storage can only be constructed within the context of an execution."
            )
        pipeline_run = init_context.pipeline_run
        instance = init_context.instance
        pipeline_def = init_context.pipeline_def_for_backwards_compat
        # depend on InitResourceContext.instance and pipeline_def_for_backwards_compat
        resolved_run_config = ResolvedRunConfig.build(pipeline_def,
                                                      pipeline_run.run_config,
                                                      mode=pipeline_run.mode)
        mode_def = pipeline_def.get_mode_definition(pipeline_run.mode)

        intermediate_storage_context = InitIntermediateStorageContext(
            pipeline_def=pipeline_def,
            mode_def=mode_def,
            intermediate_storage_def=intermediate_storage_def,
            pipeline_run=pipeline_run,
            instance=instance,
            resolved_run_config=resolved_run_config,
            type_storage_plugin_registry=construct_type_storage_plugin_registry(
                pipeline_def, intermediate_storage_def),
            resources=init_context.resources,
            intermediate_storage_config=resolved_run_config.
            intermediate_storage.intermediate_storage_config,
        )

        intermediate_storage = intermediate_storage_def.intermediate_storage_creation_fn(
            intermediate_storage_context)

        return IntermediateStorageAdapter(intermediate_storage)
def basic_resource_versions():
    run_config = {
        "resources": {
            "basic_resource": {
                "config": {
                    "input_str": "apple"
                },
            },
            "resource_no_version": {
                "config": {
                    "input_str": "banana"
                }
            },
        }
    }

    resolved_run_config = ResolvedRunConfig.build(modes_pipeline,
                                                  run_config,
                                                  mode="fakemode")

    resource_versions_by_key = resolve_resource_versions(
        resolved_run_config, modes_pipeline)

    assert resource_versions_by_key["basic_resource"] == join_and_hash(
        resolve_config_version({"input_str": "apple"}), basic_resource.version)

    assert resource_versions_by_key["resource_no_version"] == None

    assert resource_versions_by_key["resource_no_config"] == join_and_hash(
        join_and_hash(), "42")
def test_using_file_system_for_subplan_missing_input():
    pipeline = define_inty_pipeline()
    run_config = {"storage": {"filesystem": {}}}

    instance = DagsterInstance.ephemeral()
    resolved_run_config = ResolvedRunConfig.build(
        pipeline,
        run_config=run_config,
    )
    execution_plan = ExecutionPlan.build(
        InMemoryPipeline(pipeline),
        resolved_run_config,
    )
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline, execution_plan=execution_plan)

    events = execute_plan(
        execution_plan.build_subset_plan(["add_one"], pipeline,
                                         resolved_run_config),
        InMemoryPipeline(pipeline),
        instance,
        run_config=run_config,
        pipeline_run=pipeline_run,
    )
    failures = [
        event for event in events if event.event_type_value == "STEP_FAILURE"
    ]
    assert len(failures) == 1
    assert failures[0].step_key == "add_one"
    assert "DagsterStepOutputNotFoundError" in failures[
        0].event_specific_data.error.message
Exemple #17
0
def test_get_output_context_with_resources():
    @solid
    def basic_solid():
        pass

    @pipeline
    def basic_pipeline():
        basic_solid()

    with pytest.raises(
            CheckError,
            match="Expected either resources or step context to be set, but "
            "received both. If step context is provided, resources for IO manager will be "
            "retrieved off of that.",
    ):
        get_output_context(
            execution_plan=create_execution_plan(basic_pipeline),
            pipeline_def=basic_pipeline,
            resolved_run_config=ResolvedRunConfig.build(basic_pipeline),
            step_output_handle=StepOutputHandle("basic_solid", "result"),
            run_id=None,
            log_manager=None,
            step_context=mock.MagicMock(),
            resources=mock.MagicMock(),
        )
Exemple #18
0
def create_context_creation_data(
    pipeline: IPipeline,
    execution_plan: ExecutionPlan,
    run_config: Dict[str, Any],
    pipeline_run: PipelineRun,
    instance: DagsterInstance,
) -> "ContextCreationData":
    pipeline_def = pipeline.get_definition()
    resolved_run_config = ResolvedRunConfig.build(pipeline_def,
                                                  run_config,
                                                  mode=pipeline_run.mode)

    mode_def = pipeline_def.get_mode_definition(pipeline_run.mode)
    intermediate_storage_def = resolved_run_config.intermediate_storage_def_for_mode(
        mode_def)
    executor_def = executor_def_from_config(mode_def, resolved_run_config)

    return ContextCreationData(
        pipeline=pipeline,
        resolved_run_config=resolved_run_config,
        pipeline_run=pipeline_run,
        mode_def=mode_def,
        intermediate_storage_def=intermediate_storage_def,
        executor_def=executor_def,
        instance=instance,
        resource_keys_to_init=get_required_resource_keys_to_init(
            execution_plan, pipeline_def, resolved_run_config,
            intermediate_storage_def),
        execution_plan=execution_plan,
    )
def test_resolve_memoized_execution_plan_yes_stored_results():
    manager = VersionedInMemoryIOManager()
    versioned_pipeline = versioned_pipeline_factory(manager)

    speculative_execution_plan = create_execution_plan(versioned_pipeline)

    resolved_run_config = ResolvedRunConfig.build(versioned_pipeline)

    step_output_handle = StepOutputHandle("versioned_solid_no_input", "result")
    step_output_version = resolve_step_output_versions(
        versioned_pipeline, speculative_execution_plan,
        resolved_run_config)[step_output_handle]
    manager.values[(step_output_handle.step_key,
                    step_output_handle.output_name, step_output_version)] = 4

    with DagsterInstance.ephemeral() as dagster_instance:

        memoized_execution_plan = resolve_memoized_execution_plan(
            speculative_execution_plan,
            versioned_pipeline,
            {},
            dagster_instance,
            resolved_run_config,
        )

        assert memoized_execution_plan.step_keys_to_execute == [
            "versioned_solid_takes_input"
        ]

        expected_handle = StepOutputHandle(step_key="versioned_solid_no_input",
                                           output_name="result")

        assert (memoized_execution_plan.get_step_by_key(
            "versioned_solid_takes_input").step_input_dict["intput"].source.
                step_output_handle == expected_handle)
Exemple #20
0
def test_clean_event_generator_exit():
    """Testing for generator cleanup
    (see https://amir.rachum.com/blog/2017/03/03/generator-cleanup/)
    """
    from dagster.core.execution.context.init import InitResourceContext
    from dagster.core.definitions.resource import ScopedResourcesBuilder

    pipeline_def = gen_basic_resource_pipeline()
    instance = DagsterInstance.ephemeral()
    execution_plan = create_execution_plan(pipeline_def)
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline_def, execution_plan=execution_plan)
    log_manager = DagsterLogManager(run_id=pipeline_run.run_id,
                                    logging_tags={},
                                    loggers=[])
    resolved_run_config = ResolvedRunConfig.build(pipeline_def)
    execution_plan = create_execution_plan(pipeline_def)

    resource_name, resource_def = next(
        iter(pipeline_def.get_default_mode().resource_defs.items()))
    resource_context = InitResourceContext(
        resource_def=resource_def,
        resources=ScopedResourcesBuilder().build(None),
        resource_config=None,
        pipeline_run=pipeline_run,
        instance=instance,
    )
    generator = single_resource_event_generator(resource_context,
                                                resource_name, resource_def)
    next(generator)
    generator.close()

    resource_defs = pipeline_def.get_mode_definition(resolved_run_config.mode)

    generator = resource_initialization_event_generator(
        resource_defs=resource_defs,
        resource_configs=resolved_run_config.resources,
        log_manager=log_manager,
        execution_plan=execution_plan,
        pipeline_run=pipeline_run,
        resource_keys_to_init={"a"},
        instance=instance,
        emit_persistent_events=True,
        pipeline_def_for_backwards_compat=pipeline_def,
    )
    next(generator)
    generator.close()

    generator = PlanExecutionContextManager(  # pylint: disable=protected-access
        pipeline=InMemoryPipeline(pipeline_def),
        execution_plan=execution_plan,
        run_config={},
        pipeline_run=pipeline_run,
        instance=instance,
        retry_mode=RetryMode.DISABLED,
        scoped_resources_builder_cm=resource_initialization_manager,
    ).get_generator()
    next(generator)
    generator.close()
def get_step_keys_to_execute(pipeline, run_config, mode, instance):
    memoized_execution_plan = resolve_memoized_execution_plan(
        create_execution_plan(pipeline, run_config=run_config, mode=mode),
        pipeline,
        run_config,
        instance,
        ResolvedRunConfig.build(pipeline, run_config=run_config, mode=mode),
    )
    return memoized_execution_plan.step_keys_to_execute
def test_using_intermediate_file_system_for_subplan_multiprocessing():
    with instance_for_test() as instance:

        run_config = {"intermediate_storage": {"filesystem": {}}}

        pipeline = reconstructable(define_inty_pipeline)

        resolved_run_config = ResolvedRunConfig.build(
            pipeline.get_definition(),
            run_config=run_config,
        )
        execution_plan = ExecutionPlan.build(
            pipeline,
            resolved_run_config,
        )
        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline.get_definition(),
            execution_plan=execution_plan)

        assert execution_plan.get_step_by_key("return_one")

        return_one_step_events = list(
            execute_plan(
                execution_plan.build_subset_plan(["return_one"],
                                                 pipeline.get_definition(),
                                                 resolved_run_config),
                pipeline,
                instance,
                run_config=dict(run_config, execution={"multiprocess": {}}),
                pipeline_run=pipeline_run,
            ))

        intermediate_storage = build_fs_intermediate_storage(
            instance.intermediates_directory, pipeline_run.run_id)

        assert get_step_output(return_one_step_events, "return_one")
        assert intermediate_storage.has_intermediate(
            None, StepOutputHandle("return_one"))
        assert (intermediate_storage.get_intermediate(
            None, Int, StepOutputHandle("return_one")).obj == 1)

        add_one_step_events = list(
            execute_plan(
                execution_plan.build_subset_plan(["add_one"],
                                                 pipeline.get_definition(),
                                                 resolved_run_config),
                pipeline,
                instance,
                run_config=dict(run_config, execution={"multiprocess": {}}),
                pipeline_run=pipeline_run,
            ))

        assert get_step_output(add_one_step_events, "add_one")
        assert intermediate_storage.has_intermediate(
            None, StepOutputHandle("add_one"))
        assert (intermediate_storage.get_intermediate(
            None, Int, StepOutputHandle("add_one")).obj == 2)
Exemple #23
0
def test_default_environment():
    @solid(name="some_solid", input_defs=[], output_defs=[])
    def some_solid(_):
        return None

    @pipeline
    def pipeline_def():
        some_solid()

    assert ResolvedRunConfig.build(pipeline_def, {})
def run_test_with_builtin_type(type_to_test, loader_version, type_value):
    @solid(version="42",
           input_defs=[InputDefinition("_builtin_type", type_to_test)])
    def versioned_solid_ext_input_builtin_type(_, _builtin_type):
        pass

    @pipeline
    def versioned_pipeline_ext_input_builtin_type():
        versioned_solid_takes_input(versioned_solid_ext_input_builtin_type())

    run_config = {
        "solids": {
            "versioned_solid_ext_input_builtin_type": {
                "inputs": {
                    "_builtin_type": type_value
                }
            }
        }
    }
    speculative_execution_plan = create_execution_plan(
        versioned_pipeline_ext_input_builtin_type,
        run_config=run_config,
    )

    resolved_run_config = ResolvedRunConfig.build(
        versioned_pipeline_ext_input_builtin_type, run_config=run_config)

    versions = resolve_step_versions(versioned_pipeline_ext_input_builtin_type,
                                     speculative_execution_plan,
                                     resolved_run_config)

    ext_input_version = join_and_hash(str(type_value))
    input_version = join_and_hash(loader_version + ext_input_version)

    solid1_def_version = versioned_solid_ext_input_builtin_type.version
    solid1_config_version = resolve_config_version(None)
    solid1_resources_version = join_and_hash()
    solid1_version = join_and_hash(solid1_def_version, solid1_config_version,
                                   solid1_resources_version)

    step1_version = join_and_hash(input_version, solid1_version)
    assert versions["versioned_solid_ext_input_builtin_type"] == step1_version

    output_version = join_and_hash(step1_version, "result")
    hashed_input2 = output_version

    solid2_def_version = versioned_solid_takes_input.version
    solid2_config_version = resolve_config_version(None)
    solid2_resources_version = join_and_hash()
    solid2_version = join_and_hash(solid2_def_version, solid2_config_version,
                                   solid2_resources_version)

    step2_version = join_and_hash(hashed_input2, solid2_version)
    assert versions["versioned_solid_takes_input"] == step2_version
Exemple #25
0
def test_whole_environment():
    pipeline_def = PipelineDefinition(
        name="some_pipeline",
        mode_defs=[
            ModeDefinition(
                name="test_mode",
                resource_defs={
                    "test_resource":
                    ResourceDefinition(resource_fn=lambda _: None,
                                       config_schema=Any)
                },
            )
        ],
        solid_defs=[
            SolidDefinition(
                name="int_config_solid",
                config_schema=Int,
                input_defs=[],
                output_defs=[OutputDefinition()],
                required_resource_keys={"test_resource"},
                compute_fn=lambda *args: None,
            ),
            SolidDefinition(name="no_config_solid",
                            input_defs=[],
                            output_defs=[],
                            compute_fn=lambda *args: None),
        ],
    )

    env = ResolvedRunConfig.build(
        pipeline_def,
        {
            "resources": {
                "test_resource": {
                    "config": 1
                }
            },
            "solids": {
                "int_config_solid": {
                    "config": 123
                }
            },
        },
    )

    assert isinstance(env, ResolvedRunConfig)
    assert env.solids == {
        "int_config_solid": SolidConfig.from_dict({"config": 123}),
        "no_config_solid": SolidConfig.from_dict({}),
    }
    assert env.resources == {
        "test_resource": ResourceConfig(1),
        "io_manager": ResourceConfig(None)
    }
def test_step_versions_with_resources():
    run_config = {
        "resources": {
            "basic_resource": {
                "config": {
                    "input_str": "apple"
                }
            }
        }
    }
    speculative_execution_plan = create_execution_plan(
        versioned_modes_pipeline, run_config=run_config, mode="fakemode")
    resolved_run_config = ResolvedRunConfig.build(versioned_modes_pipeline,
                                                  run_config=run_config,
                                                  mode="fakemode")

    versions = resolve_step_versions(versioned_modes_pipeline,
                                     speculative_execution_plan,
                                     resolved_run_config)

    solid_def_version = fake_solid_resources_versioned.version
    solid_config_version = resolve_config_version(None)

    resolved_run_config = ResolvedRunConfig.build(versioned_modes_pipeline,
                                                  run_config,
                                                  mode="fakemode")

    resource_versions_by_key = resolve_resource_versions(
        resolved_run_config,
        versioned_modes_pipeline,
    )
    solid_resources_version = join_and_hash(*[
        resource_versions_by_key[resource_key] for resource_key in
        fake_solid_resources_versioned.required_resource_keys
    ])
    solid_version = join_and_hash(solid_def_version, solid_config_version,
                                  solid_resources_version)

    step_version = join_and_hash(solid_version)

    assert versions["fake_solid_resources_versioned"] == step_version
def test_using_file_system_for_subplan_multiprocessing():
    with instance_for_test() as instance:
        pipeline = reconstructable(define_reconstructable_inty_pipeline)

        resolved_run_config = ResolvedRunConfig.build(
            pipeline.get_definition(), )
        execution_plan = ExecutionPlan.build(
            pipeline,
            resolved_run_config,
        )
        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline.get_definition(),
            execution_plan=execution_plan)

        assert execution_plan.get_step_by_key("return_one")

        return_one_step_events = list(
            execute_plan(
                execution_plan.build_subset_plan(["return_one"],
                                                 pipeline.get_definition(),
                                                 resolved_run_config),
                pipeline,
                instance,
                run_config=dict(execution={"multiprocess": {}}),
                pipeline_run=pipeline_run,
            ))

        assert get_step_output(return_one_step_events, "return_one")
        with open(
                os.path.join(instance.storage_directory(), pipeline_run.run_id,
                             "return_one", "result"),
                "rb",
        ) as read_obj:
            assert pickle.load(read_obj) == 1

        add_one_step_events = list(
            execute_plan(
                execution_plan.build_subset_plan(["add_one"],
                                                 pipeline.get_definition(),
                                                 resolved_run_config),
                pipeline,
                instance,
                run_config=dict(execution={"multiprocess": {}}),
                pipeline_run=pipeline_run,
            ))

        assert get_step_output(add_one_step_events, "add_one")
        with open(
                os.path.join(instance.storage_directory(), pipeline_run.run_id,
                             "add_one", "result"),
                "rb",
        ) as read_obj:
            assert pickle.load(read_obj) == 2
def test_using_intermediates_file_system_for_subplan():
    pipeline = define_inty_pipeline()

    run_config = {"intermediate_storage": {"filesystem": {}}}

    instance = DagsterInstance.ephemeral()
    resolved_run_config = ResolvedRunConfig.build(
        pipeline,
        run_config=run_config,
    )

    execution_plan = ExecutionPlan.build(
        InMemoryPipeline(pipeline),
        resolved_run_config,
    )
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline, execution_plan=execution_plan)
    assert execution_plan.get_step_by_key("return_one")

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["return_one"], pipeline,
                                             resolved_run_config),
            InMemoryPipeline(pipeline),
            instance,
            run_config=run_config,
            pipeline_run=pipeline_run,
        ))

    intermediate_storage = build_fs_intermediate_storage(
        instance.intermediates_directory, pipeline_run.run_id)
    assert get_step_output(return_one_step_events, "return_one")
    assert intermediate_storage.has_intermediate(
        None, StepOutputHandle("return_one"))
    assert intermediate_storage.get_intermediate(
        None, Int, StepOutputHandle("return_one")).obj == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(["add_one"], pipeline,
                                             resolved_run_config),
            InMemoryPipeline(pipeline),
            instance,
            run_config=run_config,
            pipeline_run=pipeline_run,
        ))

    assert get_step_output(add_one_step_events, "add_one")
    assert intermediate_storage.has_intermediate(None,
                                                 StepOutputHandle("add_one"))
    assert intermediate_storage.get_intermediate(
        None, Int, StepOutputHandle("add_one")).obj == 2
def test_no_outputs_one_input_config_schema():
    assert ResolvedRunConfig.build(
        one_input_no_output_pipeline(),
        {"solids": {"take_input_return_nothing": {"inputs": {"dummy": {"value": "value"}}}}},
    )

    with pytest.raises(DagsterInvalidConfigError) as exc_context:
        ResolvedRunConfig.build(
            one_input_no_output_pipeline(),
            {
                "solids": {
                    "take_input_return_nothing": {
                        "inputs": {"dummy": {"value": "value"}},
                        "outputs": {},
                    }
                }
            },
        )

    assert len(exc_context.value.errors) == 1
    exp_msg = 'Error 1: Received unexpected config entry "outputs" at path root:solids:take_input_return_nothing'
    assert exp_msg in exc_context.value.message
Exemple #30
0
def test_intermediate_storage_definition_run_config_required():
    """Run config required for intermediate storage definition, none provided to pipeline def."""

    intermediate_storage_requires_config = IntermediateStorageDefinition(
        name="test_intermediate_requires_config",
        is_persistent=False,
        required_resource_keys=set(),
        config_schema={"field": Field(StringSource)},
    )
    run_config = {
        "intermediate_storage": {
            "test_intermediate_requires_config": {
                "config": {
                    "field": "value"
                }
            }
        }
    }

    fake_mode = ModeDefinition(
        name="fakemode",
        intermediate_storage_defs=[intermediate_storage_requires_config])
    pipeline_def = PipelineDefinition([fake_solid],
                                      name="fakename",
                                      mode_defs=[fake_mode])

    resolved_run_config = ResolvedRunConfig.build(pipeline_def,
                                                  run_config,
                                                  mode="fakemode")

    assert (resolved_run_config.intermediate_storage.intermediate_storage_name
            == "test_intermediate_requires_config")

    with pytest.raises(DagsterInvalidConfigError):
        resolved_run_config = ResolvedRunConfig.build(pipeline_def, {},
                                                      mode="fakemode")