Ejemplo n.º 1
0
def define_dagstermill_solid(
    name,
    notebook_path,
    input_defs=None,
    output_defs=None,
    config_field=None,
    required_resource_keys=None,
):
    '''Wrap a Jupyter notebook in a solid.

    Arguments:
        name (str): The name of the solid.
        notebook_path (str): Path to the backing notebook.
        input_defs (Optional[list[:class:`dagster.InputDefinition`]]): The solid's inputs.
        output_defs (Optional[list[:class:`dagster.OutputDefinition`]]): The solid's outputs.
        required_resource_keys (Optional[set[str]]): The string names of any required resources.

    Returns:
        :class:`dagster.SolidDefinition`
    '''
    check.str_param(name, 'name')
    check.str_param(notebook_path, 'notebook_path')
    input_defs = check.opt_list_param(input_defs, 'input_defs', of_type=InputDefinition)
    output_defs = check.opt_list_param(output_defs, 'output_defs', of_type=OutputDefinition)
    required_resource_keys = check.opt_set_param(
        required_resource_keys, 'required_resource_keys', of_type=str
    )
    config_field = check_user_facing_opt_field_param(
        config_field, 'config_field', 'of a dagstermill solid named "{name}"'.format(name=name)
    )

    return SolidDefinition(
        name=name,
        input_defs=input_defs,
        compute_fn=_dm_solid_compute(name, notebook_path),
        output_defs=output_defs,
        config_field=config_field,
        required_resource_keys=required_resource_keys,
        description='This solid is backed by the notebook at {path}'.format(path=notebook_path),
        metadata={'notebook_path': notebook_path, 'kind': 'ipynb'},
    )
Ejemplo n.º 2
0
    def get_computed_asset_solid_def(self,
                                     computed_asset,
                                     assets_in_pipeline,
                                     include_nothing_input=False):
        output_dagster_type = computed_asset.dagster_type
        output_def = OutputDefinition(output_dagster_type)
        input_defs = []
        deps = computed_asset.computation.deps
        for dep in deps.values():
            if dep.asset in assets_in_pipeline:
                input_dagster_type = dep.asset.dagster_type
                input_def = InputDefinition(name="__".join(dep.asset.path),
                                            dagster_type=input_dagster_type)
                input_defs.append(input_def)

        # Add a `Nothing` input if requested and if this asset has no input definitions.
        if include_nothing_input and not input_defs:
            input_defs.append(
                InputDefinition(name="nothing", dagster_type=Nothing))

        required_resource_keys = set(
            [
                resource_key for in_memory_type in
                [dep.in_memory_type for dep in deps.values()] +
                [computed_asset.computation.output_in_memory_type]
                for resource_key in self._in_memory_type_resource_keys.get(
                    in_memory_type, [])
            ] + [computed_asset.storage_key] +
            [dep.asset.storage_key for dep in deps.values()])

        return SolidDefinition(
            name="__".join(computed_asset.path),
            input_defs=input_defs,
            compute_fn=self._create_asset_solid_compute_wrapper(
                computed_asset, input_defs, output_def),
            output_defs=[output_def],
            config_schema=None,
            required_resource_keys=required_resource_keys,
            positional_inputs=None,
            version=computed_asset.computation.version,
        )
Ejemplo n.º 3
0
def test_solid_not_found():
    def _t_fn(*_args):
        raise Exception('should not reach')

    solid = SolidDefinition(name='find_me_solid',
                            input_defs=[],
                            output_defs=[],
                            compute_fn=_t_fn)

    pipeline = PipelineDefinition(solid_defs=[solid])

    with pytest.raises(DagsterInvalidConfigError):
        execute_pipeline(
            pipeline,
            {'solids': {
                'not_found': {
                    'config': {
                        'some_config': 1
                    }
                }
            }})
Ejemplo n.º 4
0
def test_config_for_no_config():
    def _t_fn(*_args):
        raise Exception('should not reach')

    solid_def = SolidDefinition(name='no_config_solid',
                                inputs=[],
                                outputs=[],
                                compute_fn=_t_fn)

    pipeline = PipelineDefinition(solids=[solid_def])

    with pytest.raises(PipelineConfigEvaluationError):
        execute_pipeline(
            pipeline,
            {'solids': {
                'no_config_solid': {
                    'config': {
                        'some_config': 1
                    }
                }
            }})
Ejemplo n.º 5
0
def test_optional_solid_with_optional_subfield():
    pipeline_def = PipelineDefinition(
        name='some_pipeline',
        solids=[
            SolidDefinition(
                name='int_config_solid',
                config_field=Field(
                    Dict({'optional_field': Field(String, is_optional=True)}), is_optional=True
                ),
                inputs=[],
                outputs=[],
                transform_fn=lambda *_args: None,
            )
        ],
    )

    env_type = pipeline_def.environment_type
    assert env_type.fields['solids'].is_optional
    assert env_type.fields['context'].is_optional
    assert env_type.fields['execution'].is_optional
    assert env_type.fields['expectations'].is_optional
Ejemplo n.º 6
0
def test_default_context_config():
    pipeline_def = PipelineDefinition(
        solids=[
            SolidDefinition(
                name='some_solid', inputs=[], outputs=[], transform_fn=lambda *args: None
            )
        ]
    )

    context_config_type = define_context_context_cls(
        pipeline_def.name, pipeline_def.context_definitions
    ).inst()
    assert 'default' in context_config_type.fields
    assert context_config_type.fields['default'].is_optional
    default_context_config_type = context_config_type.fields['default'].config_type

    assert 'config' in default_context_config_type.fields

    context_dict = throwing_evaluate_config_value(context_config_type, {})

    assert 'default' in context_dict
Ejemplo n.º 7
0
def test_config_arg_mismatch():
    def _t_fn(*_args):
        raise Exception('should not reach')

    solid = SolidDefinition(
        name='solid_with_context',
        inputs=[],
        outputs=[],
        config_def=ConfigDefinition.config_dict('SomeConfig', {'some_config': Field(types.String)}),
        transform_fn=_t_fn,
    )

    pipeline = PipelineDefinition(solids=[solid])

    with pytest.raises(DagsterTypeError):
        execute_pipeline(
            pipeline,
            config.Environment(solids={'solid_with_context': config.Solid({
                'some_config': 1
            })}),
        )
Ejemplo n.º 8
0
def test_optional_solid_with_required_scalar_config():
    def _assert_config_none(context, value):
        assert context.solid_config is value

    pipeline_def = PipelineDefinition(
        name='some_pipeline',
        solid_defs=[
            SolidDefinition(
                name='int_config_solid',
                config_schema=Int,
                input_defs=[],
                output_defs=[],
                compute_fn=lambda context, _inputs: _assert_config_none(
                    context, 234),
            )
        ],
    )

    env_type = create_environment_type(pipeline_def)

    assert env_type.fields['solids'].is_required is True

    solids_type = env_type.fields['solids'].config_type

    assert solids_type.fields['int_config_solid'].is_required is True

    int_config_solid_type = solids_type.fields['int_config_solid'].config_type

    assert_has_fields(int_config_solid_type, 'config')

    int_config_solid_config_field = int_config_solid_type.fields['config']

    assert int_config_solid_config_field.is_required is True

    execute_pipeline(pipeline_def,
                     {'solids': {
                         'int_config_solid': {
                             'config': 234
                         }
                     }})
Ejemplo n.º 9
0
def test_optional_solid_with_required_scalar_config():
    def _assert_config_none(context, value):
        assert context.solid_config is value

    pipeline_def = PipelineDefinition(
        name="some_pipeline",
        solid_defs=[
            SolidDefinition(
                name="int_config_solid",
                config_schema=Int,
                input_defs=[],
                output_defs=[],
                compute_fn=lambda context, _inputs: _assert_config_none(
                    context, 234),
            )
        ],
    )

    env_type = create_run_config_schema_type(pipeline_def)

    assert env_type.fields["solids"].is_required is True

    solids_type = env_type.fields["solids"].config_type

    assert solids_type.fields["int_config_solid"].is_required is True

    int_config_solid_type = solids_type.fields["int_config_solid"].config_type

    assert_has_fields(int_config_solid_type, "config")

    int_config_solid_config_field = int_config_solid_type.fields["config"]

    assert int_config_solid_config_field.is_required is True

    execute_pipeline(pipeline_def,
                     {"solids": {
                         "int_config_solid": {
                             "config": 234
                         }
                     }})
Ejemplo n.º 10
0
def test_required_solid_with_required_subfield():
    pipeline_def = PipelineDefinition(
        name='some_pipeline',
        solid_defs=[
            SolidDefinition(
                name='int_config_solid',
                config_field=Field(Dict({'required_field': Field(String)})),
                input_defs=[],
                output_defs=[],
                compute_fn=lambda *_args: None,
            )
        ],
    )

    env_type = create_environment_type(pipeline_def)

    assert env_type.fields['solids'].is_optional is False
    assert env_type.fields['solids'].config_type

    solids_type = env_type.fields['solids'].config_type
    assert solids_type.fields['int_config_solid'].is_optional is False
    int_config_solid_type = solids_type.fields['int_config_solid'].config_type
    assert int_config_solid_type.fields['config'].is_optional is False

    assert env_type.fields['execution'].is_optional
    assert env_type.fields['expectations'].is_optional

    env_obj = EnvironmentConfig.from_dict(
        throwing_evaluate_config_value(
            env_type, {'solids': {'int_config_solid': {'config': {'required_field': 'foobar'}}}}
        )
    )

    assert env_obj.solids['int_config_solid'].config['required_field'] == 'foobar'

    with pytest.raises(DagsterEvaluateConfigValueError):
        throwing_evaluate_config_value(env_type, {'solids': {}})

    with pytest.raises(DagsterEvaluateConfigValueError):
        throwing_evaluate_config_value(env_type, {})
Ejemplo n.º 11
0
def test_optional_solid_with_required_scalar_config():
    def _assert_config_none(context, value):
        assert context.solid_config is value

    pipeline_def = PipelineDefinition(
        name='some_pipeline',
        solids=[
            SolidDefinition(
                name='int_config_solid',
                config_field=Field(Int),
                inputs=[],
                outputs=[],
                transform_fn=lambda context, _inputs: _assert_config_none(
                    context, 234),
            )
        ],
    )

    env_type = pipeline_def.environment_type

    assert env_type.fields['solids'].is_optional is False

    solids_type = env_type.fields['solids'].config_type

    assert solids_type.fields['int_config_solid'].is_optional is False

    int_config_solid_type = solids_type.fields['int_config_solid'].config_type

    assert_has_fields(int_config_solid_type, 'config')

    int_config_solid_config_field = int_config_solid_type.fields['config']

    assert int_config_solid_config_field.is_optional is False

    execute_pipeline(pipeline_def,
                     {'solids': {
                         'int_config_solid': {
                             'config': 234
                         }
                     }})
Ejemplo n.º 12
0
def test_wrong_solid_name():
    pipeline_def = PipelineDefinition(
        name='pipeline_wrong_solid_name',
        solid_defs=[
            SolidDefinition(
                name='some_solid',
                inputs=[],
                outputs=[],
                config_field=Field(Int),
                compute_fn=lambda *_args: None,
            )
        ],
    )

    env_config = {'solids': {'another_name': {'config': {}}}}

    with pytest.raises(PipelineConfigEvaluationError) as pe_info:
        execute_pipeline(pipeline_def, env_config)

    pe = pe_info.value

    assert 'Undefined field "another_name" at path root:solids' in str(pe)
Ejemplo n.º 13
0
def generate_solid(solid_id, num_inputs, num_outputs, num_cfg):
    def compute_fn(_context, **_kwargs):
        for i in range(num_outputs):
            yield Output(i, "out_{}".format(i))

    config = {}
    for i in range(num_cfg):
        config[f"field_{i}"] = Field(str, is_required=False)

    return SolidDefinition(
        name=solid_id,
        input_defs=[
            InputDefinition(name="in_{}".format(i), default_value="default")
            for i in range(num_inputs)
        ],
        output_defs=[
            OutputDefinition(name="out_{}".format(i))
            for i in range(num_outputs)
        ],
        compute_fn=compute_fn,
        config_schema=config,
    )
Ejemplo n.º 14
0
def test_solid_not_found():
    def _t_fn(*_args):
        raise Exception('should not reach')

    solid = SolidDefinition(
        name='find_me_solid',
        inputs=[],
        outputs=[],
        transform_fn=_t_fn,
    )

    pipeline = PipelineDefinition(solids=[solid])

    with pytest.raises(DagsterInvariantViolationError):
        execute_pipeline(
            pipeline,
            config.Environment(solids={
                'not_found': config.Solid({
                    'some_config': 1,
                }),
            }),
        )
Ejemplo n.º 15
0
def test_basic_solid_with_config():
    did_get = {}

    def _t_fn(info, _inputs):
        did_get['yep'] = info.config

    solid = SolidDefinition(
        name='solid_with_context',
        inputs=[],
        outputs=[],
        config_field=Field(Dict({'some_config': Field(String)})),
        transform_fn=_t_fn,
    )

    pipeline = PipelineDefinition(solids=[solid])

    execute_pipeline(
        pipeline, {'solids': {'solid_with_context': {'config': {'some_config': 'foo'}}}}
    )

    assert 'yep' in did_get
    assert 'some_config' in did_get['yep']
Ejemplo n.º 16
0
def test_provided_default_on_resources_config():
    pipeline_def = PipelineDefinition(
        mode_definitions=[
            ModeDefinition(
                name='some_mode',
                resources={
                    'some_resource': ResourceDefinition(
                        resource_fn=lambda: None,
                        config_field=Field(
                            Dict(
                                {
                                    'with_default_int': Field(
                                        Int, is_optional=True, default_value=23434
                                    )
                                }
                            )
                        ),
                    )
                },
            )
        ],
        solids=[
            SolidDefinition(name='some_solid', inputs=[], outputs=[], compute_fn=lambda *args: None)
        ],
    )

    env_type = create_environment_type(pipeline_def)
    assert env_type.type_attributes.is_system_config
    some_resource_field = env_type.fields['resources'].config_type.fields['some_resource']
    assert some_resource_field.is_optional

    some_resource_config_field = some_resource_field.config_type.fields['config']
    assert some_resource_config_field.is_optional
    assert some_resource_config_field.default_value == {'with_default_int': 23434}

    assert some_resource_field.default_value == {'config': {'with_default_int': 23434}}

    value = EnvironmentConfig.from_dict(throwing_evaluate_config_value(env_type, {}))
    assert value.resources == {'some_resource': {'config': {'with_default_int': 23434}}}
Ejemplo n.º 17
0
def test_required_solid_with_required_subfield():
    pipeline_def = PipelineDefinition(
        name="some_pipeline",
        solid_defs=[
            SolidDefinition(
                name="int_config_solid",
                config_schema={"required_field": String},
                input_defs=[],
                output_defs=[],
                compute_fn=lambda *_args: None,
            )
        ],
    )

    env_type = create_environment_type(pipeline_def)

    assert env_type.fields["solids"].is_required is True
    assert env_type.fields["solids"].config_type

    solids_type = env_type.fields["solids"].config_type
    assert solids_type.fields["int_config_solid"].is_required is True
    int_config_solid_type = solids_type.fields["int_config_solid"].config_type
    assert int_config_solid_type.fields["config"].is_required is True

    assert env_type.fields["execution"].is_required is False

    env_obj = EnvironmentConfig.build(
        pipeline_def,
        {"solids": {"int_config_solid": {"config": {"required_field": "foobar"}}}},
    )

    assert env_obj.solids["int_config_solid"].config["required_field"] == "foobar"

    res = process_config(env_type, {"solids": {}})
    assert not res.success

    res = process_config(env_type, {})
    assert not res.success
Ejemplo n.º 18
0
def test_execution_plan_create_metadata():
    solid_def = SolidDefinition(
        name='solid_metadata_creation',
        input_defs=[],
        output_defs=[],
        compute_fn=lambda *args, **kwargs: None,
        config_field=Field(Dict({'str_value': Field(String)})),
        step_metadata_fn=lambda env_config: {
            'computed': env_config.solids['solid_metadata_creation'].config['str_value'] + '1'
        },
    )
    p_def = PipelineDefinition(name='test_metadata', solid_defs=[solid_def])

    execution_plan = create_execution_plan(
        p_def,
        environment_dict={
            'solids': {'solid_metadata_creation': {'config': {'str_value': 'foobar'}}}
        },
    )

    compute_step = execution_plan.get_step_by_key('solid_metadata_creation.compute')
    assert compute_step
    assert compute_step.metadata == {'computed': 'foobar1'}
Ejemplo n.º 19
0
def create_templated_sql_transform_solid(name,
                                         sql,
                                         table_arguments,
                                         dependant_solids=None):
    check.str_param(name, 'name')
    check.str_param(sql, 'sql')
    check.list_param(table_arguments, 'table_arguments', of_type=str)

    dependant_solids = check.opt_list_param(dependant_solids,
                                            'dependant_solids',
                                            of_type=SolidDefinition)

    field_dict = {}
    for table in table_arguments:
        field_dict[table] = Field(types.String)

    return SolidDefinition(
        name=name,
        inputs=[InputDefinition(solid.name) for solid in dependant_solids],
        config_def=ConfigDefinition.config_dict(field_dict),
        transform_fn=_create_templated_sql_transform_with_output(sql),
        outputs=[OutputDefinition()],
    )
Ejemplo n.º 20
0
def define_more_complicated_config():
    return PipelineDefinition(
        name='more_complicated_config',
        solids=[
            SolidDefinition(
                name='a_solid_with_three_field_config',
                inputs=[],
                outputs=[],
                transform_fn=lambda *_args: None,
                config_field=Field(
                    Dict(
                        {
                            'field_one': Field(String),
                            'field_two': Field(String, is_optional=True),
                            'field_three': Field(
                                String, is_optional=True, default_value='some_value'
                            ),
                        }
                    )
                ),
            )
        ],
    )
Ejemplo n.º 21
0
def create_templated_sql_transform_solid(name, sql, table_arguments, dependant_solids=None):
    check.str_param(name, 'name')
    check.str_param(sql, 'sql')
    check.list_param(table_arguments, 'table_arguments', of_type=str)

    dependant_solids = check.opt_list_param(
        dependant_solids, 'dependant_solids', of_type=SolidDefinition
    )

    field_dict = {}
    for table in table_arguments:
        field_dict[table] = Field(String)

    return SolidDefinition(
        name=name,
        inputs=[InputDefinition(solid.name) for solid in dependant_solids],
        config_field=Field(Dict(field_dict)),
        transform_fn=_create_templated_sql_transform_with_output(sql),
        outputs=[
            OutputDefinition(name='result', dagster_type=Any),
            OutputDefinition(name='sql_text', dagster_type=SqlTextType),
        ],
    )
Ejemplo n.º 22
0
def test_required_solid_with_required_subfield():
    pipeline_def = PipelineDefinition(
        name='some_pipeline',
        solid_defs=[
            SolidDefinition(
                name='int_config_solid',
                config={'required_field': String},
                input_defs=[],
                output_defs=[],
                compute_fn=lambda *_args: None,
            )
        ],
    )

    env_type = create_environment_type(pipeline_def)

    assert env_type.fields['solids'].is_required is True
    assert env_type.fields['solids'].config_type

    solids_type = env_type.fields['solids'].config_type
    assert solids_type.fields['int_config_solid'].is_required is True
    int_config_solid_type = solids_type.fields['int_config_solid'].config_type
    assert int_config_solid_type.fields['config'].is_required is True

    assert env_type.fields['execution'].is_required is False

    env_obj = EnvironmentConfig.build(
        pipeline_def, {'solids': {'int_config_solid': {'config': {'required_field': 'foobar'}}}},
    )

    assert env_obj.solids['int_config_solid'].config['required_field'] == 'foobar'

    with pytest.raises(DagsterEvaluateConfigValueError):
        throwing_validate_config_value(env_type, {'solids': {}})

    with pytest.raises(DagsterEvaluateConfigValueError):
        throwing_validate_config_value(env_type, {})
Ejemplo n.º 23
0
def test_required_solid_with_required_subfield():
    pipeline_def = PipelineDefinition(
        name='some_pipeline',
        solid_defs=[
            SolidDefinition(
                name='int_config_solid',
                config_schema={'required_field': String},
                input_defs=[],
                output_defs=[],
                compute_fn=lambda *_args: None,
            )
        ],
    )

    env_type = create_environment_type(pipeline_def)

    assert env_type.fields['solids'].is_required is True
    assert env_type.fields['solids'].config_type

    solids_type = env_type.fields['solids'].config_type
    assert solids_type.fields['int_config_solid'].is_required is True
    int_config_solid_type = solids_type.fields['int_config_solid'].config_type
    assert int_config_solid_type.fields['config'].is_required is True

    assert env_type.fields['execution'].is_required is False

    env_obj = EnvironmentConfig.build(
        pipeline_def, {'solids': {'int_config_solid': {'config': {'required_field': 'foobar'}}}},
    )

    assert env_obj.solids['int_config_solid'].config['required_field'] == 'foobar'

    res = process_config(env_type, {'solids': {}})
    assert not res.success

    res = process_config(env_type, {})
    assert not res.success
Ejemplo n.º 24
0
def test_provided_default_config():
    pipeline_def = PipelineDefinition(
        context_definitions={
            'some_context': PipelineContextDefinition(
                config_field=Field(
                    Dict({'with_default_int': Field(Int, is_optional=True, default_value=23434)})
                ),
                context_fn=lambda *args: None,
            )
        },
        solids=[
            SolidDefinition(
                name='some_solid', inputs=[], outputs=[], transform_fn=lambda *args: None
            )
        ],
    )

    env_type = pipeline_def.environment_type
    some_context_field = env_type.fields['context'].config_type.fields['some_context']
    assert some_context_field.is_optional

    some_context_config_field = some_context_field.config_type.fields['config']
    assert some_context_config_field.is_optional
    assert some_context_config_field.default_value == {'with_default_int': 23434}

    assert some_context_field.default_value == {
        'config': {'with_default_int': 23434},
        'resources': {},
        'persistence': {'file': {}},
    }

    value = construct_environment_config(
        throwing_evaluate_config_value(pipeline_def.environment_type, {})
    )
    assert value.context.name == 'some_context'
    assert env_type.type_attributes.is_system_config
Ejemplo n.º 25
0
def test_config_arg_mismatch():
    def _t_fn(*_args):
        raise Exception('should not reach')

    solid = SolidDefinition(
        name='solid_with_context',
        inputs=[],
        outputs=[],
        config_field=Field(Dict({'some_config': Field(String)})),
        compute_fn=_t_fn,
    )

    pipeline = PipelineDefinition(solids=[solid])

    with pytest.raises(PipelineConfigEvaluationError):
        execute_pipeline(
            pipeline,
            {'solids': {
                'solid_with_context': {
                    'config': {
                        'some_config': 1
                    }
                }
            }})
Ejemplo n.º 26
0
    def get_context(self, solid_config=None, mode_def=None, run_config=None):
        """Get a dagstermill execution context for interactive exploration and development.

        Args:
            solid_config (Optional[Any]): If specified, this value will be made available on the
                context as its ``solid_config`` property.
            mode_def (Optional[:class:`dagster.ModeDefinition`]): If specified, defines the mode to
                use to construct the context. Specify this if you would like a context constructed
                with specific ``resource_defs`` or ``logger_defs``. By default, an ephemeral mode
                with a console logger will be constructed.
            run_config(Optional[dict]): The environment config dict with which to construct
                the context.

        Returns:
            :py:class:`~dagstermill.DagstermillExecutionContext`
        """
        check.opt_inst_param(mode_def, "mode_def", ModeDefinition)
        run_config = check.opt_dict_param(run_config,
                                          "run_config",
                                          key_type=str)

        # If we are running non-interactively, and there is already a context reconstituted, return
        # that context rather than overwriting it.
        if self.context is not None and isinstance(
                self.context, DagstermillRuntimeExecutionContext):
            return self.context

        if not mode_def:
            mode_def = ModeDefinition(
                logger_defs={"dagstermill": colored_console_logger})
            run_config["loggers"] = {"dagstermill": {}}

        solid_def = SolidDefinition(
            name="this_solid",
            input_defs=[],
            compute_fn=lambda *args, **kwargs: None,
            output_defs=[],
            description=
            "Ephemeral solid constructed by dagstermill.get_context()",
            required_resource_keys=mode_def.resource_key_set,
        )

        pipeline_def = PipelineDefinition(
            [solid_def],
            mode_defs=[mode_def],
            name="ephemeral_dagstermill_pipeline")

        run_id = make_new_run_id()

        # construct stubbed PipelineRun for notebook exploration...
        # The actual pipeline run during pipeline execution will be serialized and reconstituted
        # in the `reconstitute_pipeline_context` call
        pipeline_run = PipelineRun(
            pipeline_name=pipeline_def.name,
            run_id=run_id,
            run_config=run_config,
            mode=mode_def.name,
            step_keys_to_execute=None,
            status=PipelineRunStatus.NOT_STARTED,
            tags=None,
        )

        self.in_pipeline = False
        self.solid_def = solid_def
        self.pipeline = pipeline_def

        environment_config = EnvironmentConfig.build(pipeline_def,
                                                     run_config,
                                                     mode=mode_def.name)

        pipeline = InMemoryPipeline(pipeline_def)
        execution_plan = ExecutionPlan.build(pipeline, environment_config)

        with scoped_pipeline_context(
                execution_plan,
                pipeline,
                run_config,
                pipeline_run,
                DagsterInstance.ephemeral(),
                scoped_resources_builder_cm=self._setup_resources,
        ) as pipeline_context:

            self.context = DagstermillExecutionContext(
                pipeline_context=pipeline_context,
                pipeline_def=pipeline_def,
                solid_config=solid_config,
                resource_keys_to_init=get_required_resource_keys_to_init(
                    execution_plan,
                    pipeline_def,
                    environment_config,
                    pipeline_context.intermediate_storage_def,
                ),
                solid_name=solid_def.name,
            )

        return self.context
Ejemplo n.º 27
0
# pylint: disable=unused-argument

from dagster import Int, Output, OutputDefinition, SolidDefinition, solid


# start_solid_definition_marker_0
@solid
def my_solid(context):
    return 1


# end_solid_definition_marker_0

# start_solid_definition_marker_1
def _return_one(_context, inputs):
    yield Output(1)


solid = SolidDefinition(
    name="my_solid",
    input_defs=[],
    output_defs=[OutputDefinition(Int)],
    compute_fn=_return_one,
)
# end_solid_definition_marker_1
Ejemplo n.º 28
0
def get_duplicate_solids():
    return (
        SolidDefinition("a_solid", [], lambda: None, []),
        SolidDefinition("a_solid", [], lambda: None, []),
    )
Ejemplo n.º 29
0
def define_dagstermill_solid(
    name,
    notebook_path,
    input_defs=None,
    output_defs=None,
    config_schema=None,
    required_resource_keys=None,
    output_notebook=None,
    asset_key_prefix=None,
):
    """Wrap a Jupyter notebook in a solid.

    Arguments:
        name (str): The name of the solid.
        notebook_path (str): Path to the backing notebook.
        input_defs (Optional[List[InputDefinition]]): The solid's inputs.
        output_defs (Optional[List[OutputDefinition]]): The solid's outputs. Your notebook should
            call :py:func:`~dagstermill.yield_result` to yield each of these outputs.
        required_resource_keys (Optional[Set[str]]): The string names of any required resources.
        output_notebook (Optional[str]): If set, will be used as the name of an injected output of
            type :py:class:`~dagster.FileHandle` that will point to the executed notebook (in
            addition to the :py:class:`~dagster.AssetMaterialization` that is always created). This
            respects the :py:class:`~dagster.core.storage.file_manager.FileManager` configured on
            the pipeline resources via the "file_manager" resource key, so, e.g.,
            if :py:class:`~dagster_aws.s3.s3_file_manager` is configured, the output will be a :
            py:class:`~dagster_aws.s3.S3FileHandle`.
        asset_key_prefix (Optional[Union[List[str], str]]): If set, will be used to prefix the
            asset keys for materialized notebooks.

    Returns:
        :py:class:`~dagster.SolidDefinition`
    """
    check.str_param(name, "name")
    check.str_param(notebook_path, "notebook_path")
    input_defs = check.opt_list_param(input_defs,
                                      "input_defs",
                                      of_type=InputDefinition)
    output_defs = check.opt_list_param(output_defs,
                                       "output_defs",
                                       of_type=OutputDefinition)
    required_resource_keys = check.opt_set_param(required_resource_keys,
                                                 "required_resource_keys",
                                                 of_type=str)
    if output_notebook is not None:
        required_resource_keys.add("file_manager")
    if isinstance(asset_key_prefix, str):
        asset_key_prefix = [asset_key_prefix]

    asset_key_prefix = check.opt_list_param(asset_key_prefix,
                                            "asset_key_prefix",
                                            of_type=str)

    return SolidDefinition(
        name=name,
        input_defs=input_defs,
        compute_fn=_dm_solid_compute(name,
                                     notebook_path,
                                     output_notebook,
                                     asset_key_prefix=asset_key_prefix),
        output_defs=output_defs +
        ([OutputDefinition(dagster_type=FileHandle, name=output_notebook)]
         if output_notebook else []),
        config_schema=config_schema,
        required_resource_keys=required_resource_keys,
        description="This solid is backed by the notebook at {path}".format(
            path=notebook_path),
        tags={
            "notebook_path": notebook_path,
            "kind": "ipynb"
        },
    )
Ejemplo n.º 30
0
    def get_context(self,
                    solid_config=None,
                    mode_def=None,
                    environment_dict=None):
        '''Get a dagstermill execution context for interactive exploration and development.

        Args:
            solid_config (Optional[Any]): If specified, this value will be made available on the
                context as its ``solid_config`` property.
            mode_def (Optional[:class:`dagster.ModeDefinition`]): If specified, defines the mode to
                use to construct the context. Specify this if you would like a context constructed
                with specific ``resource_defs`` or ``logger_defs``. By default, an ephemeral mode
                with a console logger will be constructed.
            environment_dict(Optional[dict]): The environment config dict with which to construct
                the context.

        Returns:
            :class:`dagstermill.DagstermillExecutionContext`
        '''
        check.opt_inst_param(mode_def, 'mode_def', ModeDefinition)
        environment_dict = check.opt_dict_param(environment_dict,
                                                'environment_dict',
                                                key_type=str)

        solid_def = SolidDefinition(
            name='this_solid',
            input_defs=[],
            compute_fn=lambda *args, **kwargs: None,
            output_defs=[],
            description=
            'Ephemeral solid constructed by dagstermill.get_context()',
        )

        if not mode_def:
            mode_def = ModeDefinition(
                logger_defs={'dagstermill': colored_console_logger})
            environment_dict['loggers'] = {'dagstermill': {}}

        pipeline_def = PipelineDefinition(
            [solid_def],
            mode_defs=[mode_def],
            name='ephemeral_dagstermill_pipeline')

        run_id = str(uuid.uuid4())

        # construct stubbed PipelineRun for notebook exploration...
        # The actual pipeline run during pipeline execution will be serialized and reconstituted
        # in the `reconstitute_pipeline_context` call
        pipeline_run = PipelineRun(
            pipeline_name=pipeline_def.name,
            run_id=run_id,
            environment_dict=environment_dict,
            mode=mode_def.name,
            reexecution_config=None,
            selector=None,
            step_keys_to_execute=None,
            status=PipelineRunStatus.NOT_STARTED,
            tags=None,
        )

        self.in_pipeline = False
        self.solid_def = solid_def
        self.pipeline_def = pipeline_def

        with scoped_pipeline_context(
                self.pipeline_def,
                environment_dict,
                pipeline_run,
                instance=DagsterInstance.ephemeral(),
                scoped_resources_builder_cm=self._setup_resources,
        ) as pipeline_context:
            self.context = DagstermillExecutionContext(pipeline_context,
                                                       solid_config)

        return self.context