예제 #1
0
def test_pipeline_types():
    ContextOneConfigDict = types.ConfigDictionary(
        'ContextOneConfigDict',
        {'field_one': Field(types.String)},
    )

    SolidOneConfigDict = types.ConfigDictionary(
        'SolidOneConfigDict',
        {'another_field': Field(types.Int)},
    )

    @lambda_solid
    def produce_string():
        return 'foo'

    @solid(
        inputs=[InputDefinition('input_one', types.String)],
        outputs=[OutputDefinition(types.Any)],
        config_def=ConfigDefinition(SolidOneConfigDict),
    )
    def solid_one(_info, input_one):
        raise Exception('should not execute')

    pipeline_def = PipelineDefinition(
        solids=[produce_string, solid_one],
        dependencies={
            'solid_one': {
                'input_one': DependencyDefinition('produce_string'),
            }
        },
        context_definitions={
            'context_one':
            PipelineContextDefinition(
                context_fn=lambda: None,
                config_def=ConfigDefinition(ContextOneConfigDict),
            )
        })

    present_types = [
        SolidOneConfigDict,
        ContextOneConfigDict,
        types.String,
        types.Any,
        types.Int,
    ]

    for present_type in present_types:
        name = present_type.name
        assert pipeline_def.has_type(name)
        assert pipeline_def.type_named(name).name == name

    not_present_types = [
        types.Bool,
        types.Dict,
        types.PythonObjectType('Duisjdfke', dict),
    ]

    for not_present_type in not_present_types:
        assert not pipeline_def.has_type(not_present_type.name)
예제 #2
0
def test_aliased_configs():
    @solid(
        inputs=[],
        config_def=ConfigDefinition(types.Int),
    )
    def load_constant(info):
        return info.config

    pipeline = PipelineDefinition(
        solids=[load_constant],
        dependencies={
            SolidInstance(load_constant.name, 'load_a'): {},
            SolidInstance(load_constant.name, 'load_b'): {},
        })

    result = execute_pipeline(
        pipeline,
        config.Environment(solids={
            'load_a': config.Solid(2),
            'load_b': config.Solid(3),
        }))

    assert result.success
    assert result.result_for_solid('load_a').transformed_value() == 2
    assert result.result_for_solid('load_b').transformed_value() == 3
예제 #3
0
def define_pandas_source_test_solid():
    return dm.define_dagstermill_solid(
        name='pandas_source_test',
        notebook_path=nb_test_path('pandas_source_test'),
        inputs=[],
        outputs=[OutputDefinition(DataFrame)],
        config_def=ConfigDefinition(types.String),
    )
예제 #4
0
def _nested_optional_config_with_no_default():
    return ConfigDefinition(config_type=types.ConfigDictionary({
        'nested':
        Field(dagster_type=types.ConfigDictionary(
            {'int_field': Field(
                types.Int,
                is_optional=True,
            )})),
    }))
예제 #5
0
def define_solid_with_stuff():
    return SolidDefinition(
        name='stuff',
        inputs=[InputDefinition('foo', types.Int)],
        outputs=[OutputDefinition(name='bar', dagster_type=types.Int)],
        config_def=ConfigDefinition(types.Int),
        transform_fn=lambda *args, **kwargs: check.failed('do not execute'),
        metadata={'notebook_path': 'unused.ipynb'},
    )
def define_contextful_solids():
    @solid(
        config_def=ConfigDefinition(types.Int),
        outputs=[OutputDefinition(types.Int)],
    )
    def injest_a(info):
        info.context.resources.store.record_value(info.context, 'a',
                                                  info.config)
        return info.config

    @solid(
        config_def=ConfigDefinition(types.Int),
        outputs=[OutputDefinition(types.Int)],
    )
    def injest_b(info):
        info.context.resources.store.record_value(info.context, 'b',
                                                  info.config)
        return info.config

    @solid(
        inputs=[
            InputDefinition('num_one', types.Int),
            InputDefinition('num_two', types.Int)
        ],
        outputs=[OutputDefinition(types.Int)],
    )
    def add_ints(info, num_one, num_two):
        result = num_one + num_two
        info.context.resources.store.record_value(info.context, 'add', result)
        return result

    @solid(
        inputs=[
            InputDefinition('num_one', types.Int),
            InputDefinition('num_two', types.Int)
        ],
        outputs=[OutputDefinition(types.Int)],
    )
    def mult_ints(info, num_one, num_two):
        result = num_one * num_two
        info.context.resources.store.record_value(info.context, 'mult', result)
        return result

    return [injest_a, injest_b, add_ints, mult_ints]
def test_double_type():
    @solid(config_def=ConfigDefinition(
        types.ConfigDictionary(
            'SomeTypeName',
            {'some_field': Field(types.String)},
        ), ))
    def solid_one(_info):
        raise Exception('should not execute')

    @solid(config_def=ConfigDefinition(
        types.ConfigDictionary(
            'SomeTypeName',
            {'some_field': Field(types.String)},
        ), ))
    def solid_two(_info):
        raise Exception('should not execute')

    with pytest.raises(DagsterInvalidDefinitionError,
                       match='Type names must be unique.'):
        PipelineDefinition(solids=[solid_one, solid_two])
예제 #8
0
def to_parquet_solid(name):
    def _t_fn(info, inputs):
        inputs['df'].to_parquet(info.config['path'])

    return SolidDefinition(
        name=name,
        inputs=[InputDefinition('df', DataFrame)],
        outputs=[],
        config_def=ConfigDefinition(WriteDataFrameConfigDict),
        transform_fn=_t_fn,
    )
예제 #9
0
def load_csv_solid(name):
    check.str_param(name, 'name')

    def _t_fn(info, _inputs):
        yield Result(pd.read_csv(info.config['path']))

    return SolidDefinition(
        name=name,
        inputs=[],
        outputs=[OutputDefinition(DataFrame)],
        transform_fn=_t_fn,
        config_def=ConfigDefinition(LoadDataFrameConfigDict),
    )
예제 #10
0
def _single_nested_config():
    return ConfigDefinition(
        config_type=types.ConfigDictionary(
            'ParentType', {
                'nested':
                Field(
                    dagster_type=types.ConfigDictionary(
                        'NestedType',
                        {'int_field': Field(types.Int)},
                    )
                ),
            }
        )
    )
def define_contextless_solids():
    @solid(
        config_def=ConfigDefinition(types.Int),
        outputs=[OutputDefinition(types.Int)],
    )
    def injest_a(info):
        return info.config

    @solid(
        config_def=ConfigDefinition(types.Int),
        outputs=[OutputDefinition(types.Int)],
    )
    def injest_b(info):
        return info.config

    @lambda_solid(
        inputs=[
            InputDefinition('num_one', types.Int),
            InputDefinition('num_two', types.Int)
        ],
        output=OutputDefinition(types.Int),
    )
    def add_ints(num_one, num_two):
        return num_one + num_two

    @lambda_solid(
        inputs=[
            InputDefinition('num_one', types.Int),
            InputDefinition('num_two', types.Int)
        ],
        output=OutputDefinition(types.Int),
    )
    def mult_ints(num_one, num_two):
        return num_one * num_two

    return [injest_a, injest_b, add_ints, mult_ints]
예제 #12
0
def define_pass_value_solid(name, description=None):
    check.str_param(name, 'name')
    check.opt_str_param(description, 'description')

    def _value_t_fn(info, _inputs):
        yield Result(info.config['value'])

    return SolidDefinition(
        name=name,
        description=description,
        inputs=[],
        outputs=[OutputDefinition(types.String)],
        config_def=ConfigDefinition(SingleValueDict),
        transform_fn=_value_t_fn,
    )
예제 #13
0
def _nested_optional_config_with_no_default():
    nested_type = types.ConfigDictionary(
        'NestedType',
        {
            'int_field': Field(
                types.Int,
                is_optional=True,
            ),
        },
    )
    return ConfigDefinition(
        config_type=types.ConfigDictionary(
            'ParentType',
            {'nested': Field(dagster_type=nested_type)},
        )
    )
예제 #14
0
def test_any_config_definition():
    called = {}
    conf_value = 234

    @solid(config_def=ConfigDefinition())
    def hello_world(info):
        assert info.config == conf_value
        called['yup'] = True

    result = execute_single_solid(
        create_test_context(),
        hello_world,
        environment=config.Environment(
            solids={'hello_world': config.Solid(conf_value)}))

    assert called['yup']
예제 #15
0
def define_create_table_solid(name):
    def _materialization_fn(info, inputs):
        sql_expr = inputs['expr']
        check.inst(sql_expr, DagsterSqlExpression)
        output_table_name = check.str_elem(info.config, 'table_name')
        total_sql = '''CREATE TABLE {output_table_name} AS {query_text}'''.format(
            output_table_name=output_table_name, query_text=sql_expr.query_text
        )
        info.context.resources.sa.engine.connect().execute(total_sql)

    return SolidDefinition(
        name=name,
        inputs=[InputDefinition('expr')],
        outputs=[],
        transform_fn=_materialization_fn,
        config_def=ConfigDefinition(CreateTableConfigDict),
    )
예제 #16
0
def _nested_optional_config_with_default():
    return ConfigDefinition(
        config_type=types.ConfigDictionary(
            'ParentType', {
                'nested':
                Field(
                    dagster_type=types.ConfigDictionary(
                        'NestedType',
                        {'int_field': Field(
                            types.Int,
                            is_optional=True,
                            default_value=3,
                        )}
                    )
                ),
            }
        )
    )
def test_hello_world_config():
    with_config_solid = dm.define_dagstermill_solid(
        'with_config',
        nb_test_path('hello_world_with_config'),
        [],
        [OutputDefinition()],
        config_def=ConfigDefinition(types.String),
    )

    pipeline = PipelineDefinition(solids=[with_config_solid])
    pipeline_result = execute_pipeline(
        pipeline,
        config.Environment(
            solids={
                'with_config': config.Solid(script_relative_path('num.csv'))
            }),
    )

    assert pipeline_result.success
    assert pipeline_result.result_for_solid(
        'with_config').transformed_value() == 100
def define_part_nine_final():
    return PipelineDefinition(
        name='part_nine_final',
        solids=define_contextful_solids(),
        dependencies={
            'add_ints': {
                'num_one': DependencyDefinition('injest_a'),
                'num_two': DependencyDefinition('injest_b'),
            },
            'mult_ints': {
                'num_one': DependencyDefinition('injest_a'),
                'num_two': DependencyDefinition('injest_b'),
            },
        },
        context_definitions={
            'local':
            PipelineContextDefinition(
                context_fn=lambda *_args: ExecutionContext.console_logging(
                    log_level=DEBUG,
                    resources=PartNineResources(InMemoryStore()))),
            'cloud':
            PipelineContextDefinition(
                context_fn=lambda info: ExecutionContext.console_logging(
                    resources=PartNineResources(
                        PublicCloudStore(info.config['credentials']))),
                config_def=ConfigDefinition(config_type=types.ConfigDictionary(
                    name='CloudConfigDict',
                    fields={
                        'credentials':
                        Field(
                            types.ConfigDictionary(
                                name='CredentialsConfigDict',
                                fields={
                                    'user': Field(types.String),
                                    'pass': Field(types.String),
                                })),
                    })),
            )
        })
from dagster import (
    ConfigDefinition,
    DependencyDefinition,
    InputDefinition,
    OutputDefinition,
    PipelineDefinition,
    SolidInstance,
    config,
    execute_pipeline,
    lambda_solid,
    solid,
    types,
)


@solid(config_def=ConfigDefinition(types.Int), outputs=[OutputDefinition(types.Int)])
def load_a(info):
    return info.config


@solid(config_def=ConfigDefinition(types.Int), outputs=[OutputDefinition(types.Int)])
def load_b(info):
    return info.config


@lambda_solid(
    inputs=[
        InputDefinition('a', types.Int),
        InputDefinition('b', types.Int),
    ],
    output=OutputDefinition(types.Int),
예제 #20
0
def test_noop_config():
    assert ConfigDefinition(types.Any)
    ConfigDefinition,
    DependencyDefinition,
    InputDefinition,
    OutputDefinition,
    PipelineDefinition,
    SolidInstance,
    define_stub_solid,
    execute_pipeline,
    lambda_solid,
    solid,
    types,
)


@solid(
    config_def=ConfigDefinition(types.Int),
    outputs=[OutputDefinition(types.Int)],
)
def load_number(info):
    return info.config


@lambda_solid(
    inputs=[
        InputDefinition('num1', types.Int),
        InputDefinition('num2', types.Int),
    ],
    output=OutputDefinition(types.Int),
)
def adder(num1, num2):
    return num1 + num2