Ejemplo n.º 1
0
def test_execute_dep_solid_different_input_name():
    pass_to_first = define_pass_value_solid('pass_to_first')

    first_solid = single_output_transform(
        'first_solid',
        inputs=[InputDefinition(name='a_thing')],
        transform_fn=lambda context, inputs: inputs['a_thing'] + inputs['a_thing'],
        output=dagster.OutputDefinition(),
    )

    second_solid = single_output_transform(
        'second_solid',
        inputs=[InputDefinition(name='an_input')],
        transform_fn=lambda context, inputs: inputs['an_input'] + inputs['an_input'],
        output=dagster.OutputDefinition(),
    )

    pipeline = dagster.PipelineDefinition(
        solids=[pass_to_first, first_solid, second_solid],
        dependencies={
            'first_solid': {'a_thing': DependencyDefinition('pass_to_first')},
            'second_solid': {'an_input': DependencyDefinition('first_solid')},
        },
    )

    result = dagster.execute_pipeline(
        pipeline, environment={'solids': {'pass_to_first': {'config': {'value': 'bar'}}}}
    )

    assert result.success
    assert len(result.result_list) == 3
    assert result.result_for_solid('pass_to_first').transformed_value() == 'bar'
    assert result.result_for_solid('first_solid').transformed_value() == 'barbar'
    assert result.result_for_solid('second_solid').transformed_value() == 'barbarbarbar'
def test_execute_solid_with_dep_only_inputs_with_api():
    did_run_dict = {}

    step_one_solid = single_output_transform(
        name='step_one_solid',
        inputs=[],
        transform_fn=lambda context, args: _set_key_value(
            did_run_dict, 'step_one', True),
        output=OutputDefinition(),
    )

    step_two_solid = single_output_transform(
        name='step_two_solid',
        transform_fn=lambda context, args: _set_key_value(
            did_run_dict, 'step_two', True),
        inputs=[InputDefinition(step_one_solid.name)],
        output=OutputDefinition(),
    )

    pipeline = PipelineDefinition(
        solids=[step_one_solid, step_two_solid],
        dependencies={
            'step_two_solid': {
                step_one_solid.name: DependencyDefinition(step_one_solid.name)
            }
        },
    )

    pipeline_result = execute_pipeline(pipeline)

    for result in pipeline_result.solid_result_list:
        assert result.success

    assert did_run_dict['step_one'] is True
    assert did_run_dict['step_two'] is True
Ejemplo n.º 3
0
def test_execute_two_solids_with_same_input_name():
    input_def = InputDefinition(name='a_thing')

    solid_one = single_output_transform(
        'solid_one',
        input_defs=[input_def],
        compute_fn=lambda context, inputs: inputs['a_thing'] + inputs['a_thing'
                                                                      ],
        output_def=dagster.OutputDefinition(),
    )

    solid_two = single_output_transform(
        'solid_two',
        input_defs=[input_def],
        compute_fn=lambda context, inputs: inputs['a_thing'] + inputs['a_thing'
                                                                      ],
        output_def=dagster.OutputDefinition(),
    )

    pipeline = dagster.PipelineDefinition(
        solid_defs=[
            define_pass_value_solid('pass_to_one'),
            define_pass_value_solid('pass_to_two'),
            solid_one,
            solid_two,
        ],
        dependencies={
            'solid_one': {
                'a_thing': DependencyDefinition('pass_to_one')
            },
            'solid_two': {
                'a_thing': DependencyDefinition('pass_to_two')
            },
        },
    )

    result = execute_pipeline(
        pipeline,
        environment_dict={
            'solids': {
                'pass_to_one': {
                    'config': {
                        'value': 'foo'
                    }
                },
                'pass_to_two': {
                    'config': {
                        'value': 'bar'
                    }
                },
            }
        },
    )

    assert result.success

    assert result.result_for_solid('solid_one').result_value() == 'foofoo'
    assert result.result_for_solid('solid_two').result_value() == 'barbar'
Ejemplo n.º 4
0
def test_failure_midstream():
    '''
    A
     \\
       C (fails) = D (skipped)
     //
    B
    '''

    solid_a = create_root_success_solid('A')
    solid_b = create_root_success_solid('B')

    def fail_fn(_context, inputs):
        check.failed('user error')
        return [inputs['A'], inputs['B'], {'C': 'transform_called'}]

    def success_fn(_context, inputs):
        return [inputs['C'], {'D': 'transform_called'}]

    solid_c = single_output_transform(
        name='C',
        inputs=[InputDefinition(name='A'),
                InputDefinition(name='B')],
        compute_fn=fail_fn,
        output=OutputDefinition(),
    )

    solid_d = single_output_transform(
        name='D',
        inputs=[InputDefinition(name='C')],
        compute_fn=success_fn,
        output=OutputDefinition(),
    )

    pipeline_def = PipelineDefinition(
        solid_defs=[solid_a, solid_b, solid_c, solid_d],
        dependencies={
            'C': {
                'A': DependencyDefinition(solid_a.name),
                'B': DependencyDefinition(solid_b.name)
            },
            'D': {
                'C': DependencyDefinition(solid_c.name)
            },
        },
    )
    pipeline_result = execute_pipeline(
        pipeline_def, run_config=RunConfig.nonthrowing_in_process())

    assert pipeline_result.result_for_solid('A').success
    assert pipeline_result.result_for_solid('B').success
    assert not pipeline_result.result_for_solid('C').success
    assert pipeline_result.result_for_solid(
        'C').failure_data.error.cls_name == 'CheckError'
    assert not pipeline_result.result_for_solid('D').success
    assert pipeline_result.result_for_solid('D').skipped
Ejemplo n.º 5
0
def test_execute_solid_with_input_same_name():
    a_thing_solid = single_output_transform(
        'a_thing',
        input_defs=[InputDefinition(name='a_thing')],
        compute_fn=lambda context, inputs: inputs['a_thing'] + inputs['a_thing'
                                                                      ],
        output_def=dagster.OutputDefinition(),
    )

    pipeline = PipelineDefinition(
        solid_defs=[define_pass_value_solid('pass_value'), a_thing_solid],
        dependencies={
            'a_thing': {
                'a_thing': DependencyDefinition('pass_value')
            }
        },
    )

    result = execute_pipeline(pipeline,
                              environment_dict={
                                  'solids': {
                                      'pass_value': {
                                          'config': {
                                              'value': 'foo'
                                          }
                                      }
                                  }
                              })

    assert result.result_for_solid('a_thing').result_value() == 'foofoo'
Ejemplo n.º 6
0
def _dataframe_solid(name, input_defs, compute_fn):
    return single_output_transform(
        name=name,
        input_defs=input_defs,
        compute_fn=compute_fn,
        output_def=OutputDefinition(DataFrame),
    )
Ejemplo n.º 7
0
def _dataframe_solid(name, inputs, transform_fn):
    return single_output_transform(
        name=name,
        inputs=inputs,
        transform_fn=transform_fn,
        output=OutputDefinition(dagster_pd.DataFrame),
    )
Ejemplo n.º 8
0
def test_failure_midstream():
    solid_a = create_root_success_solid('A')
    solid_b = create_root_success_solid('B')

    def transform_fn(_context, inputs):
        check.failed('user error')
        return [inputs['A'], inputs['B'], {'C': 'transform_called'}]

    solid_c = single_output_transform(
        name='C',
        inputs=[InputDefinition(name='A'),
                InputDefinition(name='B')],
        transform_fn=transform_fn,
        output=OutputDefinition(),
    )

    pipeline = silencing_pipeline(
        solids=[solid_a, solid_b, solid_c],
        dependencies={
            'C': {
                'A': DependencyDefinition(solid_a.name),
                'B': DependencyDefinition(solid_b.name)
            }
        },
    )
    pipeline_result = execute_pipeline(pipeline, throw_on_user_error=False)

    assert pipeline_result.result_for_solid('A').success
    assert pipeline_result.result_for_solid('B').success
    assert not pipeline_result.result_for_solid('C').success
    assert isinstance(
        pipeline_result.result_for_solid('C').dagster_error,
        DagsterExecutionStepExecutionError)
Ejemplo n.º 9
0
def create_root_transform_failure_solid(name):
    def failed_transform(**_kwargs):
        raise Exception('Transform failed')

    return single_output_transform(name=name,
                                   inputs=[],
                                   transform_fn=failed_transform,
                                   output=OutputDefinition())
def create_sql_solid(name, inputs, sql_text):
    check.str_param(name, 'name')
    check.list_param(inputs, 'inputs', of_type=InputDefinition)
    check.str_param(sql_text, 'sql_text')

    return single_output_transform(name,
                                   inputs=inputs,
                                   transform_fn=create_sql_transform(sql_text),
                                   output=OutputDefinition())
Ejemplo n.º 11
0
def create_solid_with_deps(name, *solid_deps):
    inputs = [InputDefinition(solid_dep.name) for solid_dep in solid_deps]

    return single_output_transform(
        name=name,
        inputs=inputs,
        transform_fn=make_transform(name),
        output=OutputDefinition(),
    )
Ejemplo n.º 12
0
def test_single_transform_returning_result():
    solid_inst = single_output_transform(
        'test_return_result',
        input_defs=[],
        compute_fn=lambda *_args, **_kwargs: Output(None),
        output_def=OutputDefinition(),
    )

    with pytest.raises(DagsterInvariantViolationError):
        execute_isolated_solid(solid_inst)
Ejemplo n.º 13
0
def create_root_solid(name):
    input_name = name + '_input'
    inp = InputDefinition(input_name)

    return single_output_transform(
        name=name,
        inputs=[inp],
        transform_fn=make_transform(name),
        output=OutputDefinition(),
    )
Ejemplo n.º 14
0
def test_single_transform_returning_result():
    solid_inst = single_output_transform(
        'test_return_result',
        inputs=[],
        transform_fn=lambda *_args, **_kwargs: Result(None),
        output=OutputDefinition(),
    )

    with pytest.raises(DagsterInvariantViolationError):
        execute_single_solid_in_isolation(ExecutionContext(), solid_inst)
Ejemplo n.º 15
0
def create_root_success_solid(name):
    def root_transform(_context, _args):
        passed_rows = []
        passed_rows.append({name: 'transform_called'})
        return passed_rows

    return single_output_transform(name=name,
                                   inputs=[],
                                   transform_fn=root_transform,
                                   output=OutputDefinition())
Ejemplo n.º 16
0
def test_execute_solid_with_dep_only_inputs_no_api():
    did_run_dict = {}

    step_one_solid = single_output_transform(
        name='step_one_solid',
        inputs=[],
        compute_fn=lambda context, args: _set_key_value(
            did_run_dict, 'step_one', True),
        output=OutputDefinition(),
    )

    step_two_solid = single_output_transform(
        name='step_two_solid',
        inputs=[InputDefinition('step_one_solid')],
        compute_fn=lambda context, args: _set_key_value(
            did_run_dict, 'step_two', True),
        output=OutputDefinition(),
    )

    pipeline = PipelineDefinition(
        solid_defs=[step_one_solid, step_two_solid],
        dependencies={
            'step_two_solid': {
                'step_one_solid': DependencyDefinition('step_one_solid')
            }
        },
    )

    # from dagster.utils import logging

    pipeline_result = execute_pipeline(pipeline)

    assert pipeline_result.success

    for result in pipeline_result.solid_result_list:
        assert result.success

    assert did_run_dict['step_one'] is True
    assert did_run_dict['step_two'] is True
def create_sql_statement_solid(name, sql_text, inputs=None):
    check.str_param(name, 'name')
    check.str_param(sql_text, 'sql_text')
    check.opt_list_param(inputs, 'inputs', of_type=InputDefinition)

    if inputs is None:
        inputs = []

    return single_output_transform(
        name=name,
        transform_fn=_create_sql_alchemy_transform_fn(sql_text),
        inputs=inputs,
        output=OutputDefinition()
    )
Ejemplo n.º 18
0
def create_definition_based_solid():
    table_input = InputDefinition('num_csv', dagster_pd.DataFrame)

    def transform_fn(_context, inputs):
        num_csv = inputs['num_csv']
        num_csv['sum'] = num_csv['num1'] + num_csv['num2']
        return num_csv

    # supports CSV and PARQUET by default
    hello_world = single_output_transform(name='hello_world',
                                          inputs=[table_input],
                                          transform_fn=transform_fn,
                                          output=OutputDefinition(
                                              dagster_pd.DataFrame))
    return hello_world
def test_hello_world_pipeline_no_api():
    def hello_world_transform_fn(_context, inputs):
        num_df = inputs['num_df']
        num_df['sum'] = num_df['num1'] + num_df['num2']
        return num_df

    read_csv_solid = define_read_csv_solid('read_csv_solid')

    hello_world = single_output_transform(
        name='hello_world',
        inputs=[InputDefinition('num_df')],
        transform_fn=hello_world_transform_fn,
        output=OutputDefinition(),
    )

    pipeline = PipelineDefinition(
        solids=[read_csv_solid, hello_world],
        dependencies={
            'hello_world': {
                'num_df': DependencyDefinition('read_csv_solid'),
            },
        }
    )

    pipeline_result = execute_pipeline(
        pipeline,
        config.Environment(
            solids={
                'read_csv_solid': config.Solid({
                    'path': script_relative_path('num.csv'),
                }),
            },
        ),
    )

    assert pipeline_result.success

    result = pipeline_result.result_for_solid('hello_world')

    assert result.transformed_value().to_dict('list') == {
        'num1': [1, 3],
        'num2': [2, 4],
        'sum': [3, 7],
    }
def create_hello_world_solid_composed_pipeline():
    def transform_fn(_context, inputs):
        num_df = inputs['num_df']
        num_df['sum'] = num_df['num1'] + num_df['num2']
        return num_df

    hello_world = single_output_transform(
        name='hello_world',
        inputs=[InputDefinition('num_df')],
        transform_fn=transform_fn,
        output=OutputDefinition(),
    )

    return PipelineDefinition(
        solids=[define_read_csv_solid('read_hello_world'), hello_world],
        dependencies={'hello_world': {
            'num_df': DependencyDefinition('read_hello_world')
        }}
    )
Ejemplo n.º 21
0
def test_basic_pandas_solid():
    csv_input = InputDefinition('num_csv', dagster_pd.DataFrame)

    def transform(_context, inputs):
        num_csv = inputs['num_csv']
        num_csv['sum'] = num_csv['num1'] + num_csv['num2']
        return num_csv

    single_solid = single_output_transform(
        name='sum_table',
        inputs=[csv_input],
        transform_fn=transform,
        output=OutputDefinition(),
    )

    pipeline = PipelineDefinition(
        solids=[dagster_pd.load_csv_solid('load_csv'), single_solid],
        dependencies={
            single_solid.name: {
                'num_csv': DependencyDefinition('load_csv'),
            }
        })

    pipeline_result = execute_pipeline(
        pipeline,
        environment=get_num_csv_environment(
            get_load_only_solids_config('load_csv')),
    )

    assert pipeline_result.success

    assert pipeline_result.result_for_solid(
        'sum_table').transformed_value().to_dict('list') == {
            'num1': [1, 3],
            'num2': [2, 4],
            'sum': [3, 7]
        }
Ejemplo n.º 22
0
def test_pandas_csv_to_csv():
    csv_input = InputDefinition('num_csv', dagster_pd.DataFrame)

    # just adding a second context arg to test that
    def transform(context, inputs):
        check.inst_param(context, 'context', ExecutionContext)
        num_csv = inputs['num_csv']
        num_csv['sum'] = num_csv['num1'] + num_csv['num2']
        return num_csv

    solid_def = single_output_transform(
        name='sum_table',
        inputs=[csv_input],
        transform_fn=transform,
        output=OutputDefinition(dagster_pd.DataFrame),
    )

    output_df = execute_transform_in_temp_csv_files(solid_def)

    assert output_df.to_dict('list') == {
        'num1': [1, 3],
        'num2': [2, 4],
        'sum': [3, 7]
    }
def test_pandas_hello_no_library():
    def solid_one_transform(_context, inputs):
        num_df = inputs['num_df']
        num_df['sum'] = num_df['num1'] + num_df['num2']
        return num_df

    solid_one = single_output_transform(
        name='solid_one',
        inputs=[InputDefinition(name='num_df')],
        transform_fn=solid_one_transform,
        output=OutputDefinition(),
    )

    def solid_two_transform(_context, inputs):
        sum_df = inputs['sum_df']
        sum_df['sum_sq'] = sum_df['sum'] * sum_df['sum']
        return sum_df

    solid_two = single_output_transform(
        name='solid_two',
        inputs=[InputDefinition(name='sum_df')],
        transform_fn=solid_two_transform,
        output=OutputDefinition(),
    )

    pipeline = PipelineDefinition(
        solids=[define_read_csv_solid('read_one'), solid_one, solid_two],
        dependencies={
            'solid_one': {
                'num_df': DependencyDefinition('read_one'),
            },
            'solid_two': {
                'sum_df': DependencyDefinition('solid_one'),
            },
        }
    )

    environment = config.Environment(
        solids={
            'read_one': config.Solid({
                'path': script_relative_path('num.csv')
            }),
        }
    )

    execute_pipeline_result = execute_pipeline(
        pipeline,
        environment=environment,
    )

    assert execute_pipeline_result.result_for_solid('solid_two'
                                                    ).transformed_value().to_dict('list') == {
                                                        'num1': [1, 3],
                                                        'num2': [2, 4],
                                                        'sum': [3, 7],
                                                        'sum_sq': [9, 49],
                                                    }

    sum_sq_out_path = '/tmp/sum_sq.csv'
    import os
    if os.path.exists(sum_sq_out_path):
        os.remove(sum_sq_out_path)

    sum_sq_path_args = {'path': '/tmp/sum_sq.csv'}
    environment_two = config.Environment(
        solids={
            'read_one': config.Solid({
                'path': script_relative_path('num.csv')
            }),
            'write_two': config.Solid(sum_sq_path_args),
        },
    )

    pipeline_two = PipelineDefinition(
        solids=[
            define_read_csv_solid('read_one'),
            solid_one,
            solid_two,
            define_to_csv_solid('write_two'),
        ],
        dependencies={
            'solid_one': {
                'num_df': DependencyDefinition('read_one'),
            },
            'solid_two': {
                'sum_df': DependencyDefinition('solid_one'),
            },
            'write_two': {
                'df': DependencyDefinition('solid_two'),
            }
        }
    )

    execute_pipeline(pipeline_two, environment=environment_two)

    sum_sq_df = pd.read_csv('/tmp/sum_sq.csv')

    assert sum_sq_df.to_dict('list') == {
        'num1': [1, 3],
        'num2': [2, 4],
        'sum': [3, 7],
        'sum_sq': [9, 49],
    }
Ejemplo n.º 24
0
def test_failure_propagation():
    '''
      B =========== C
     //             \\
    A                F (skipped)
     \\             //
      D (fails) == E (skipped)
    '''

    solid_a = create_root_success_solid('A')

    def fail_fn(_context, inputs):
        check.failed('user error')
        return inputs

    def success_fn(_context, inputs):
        return inputs

    solid_b = single_output_transform(
        name='B',
        inputs=[InputDefinition(name='A')],
        compute_fn=success_fn,
        output=OutputDefinition(),
    )

    solid_c = single_output_transform(
        name='C',
        inputs=[InputDefinition(name='B')],
        compute_fn=success_fn,
        output=OutputDefinition(),
    )

    solid_d = single_output_transform(name='D',
                                      inputs=[InputDefinition(name='A')],
                                      compute_fn=fail_fn,
                                      output=OutputDefinition())

    solid_e = single_output_transform(
        name='E',
        inputs=[InputDefinition(name='D')],
        compute_fn=success_fn,
        output=OutputDefinition(),
    )

    solid_f = single_output_transform(
        name='F',
        inputs=[InputDefinition(name='C'),
                InputDefinition(name='E')],
        compute_fn=success_fn,
        output=OutputDefinition(),
    )

    pipeline_def = PipelineDefinition(
        solid_defs=[solid_a, solid_b, solid_c, solid_d, solid_e, solid_f],
        dependencies={
            'B': {
                'A': DependencyDefinition(solid_a.name)
            },
            'D': {
                'A': DependencyDefinition(solid_a.name)
            },
            'C': {
                'B': DependencyDefinition(solid_b.name)
            },
            'E': {
                'D': DependencyDefinition(solid_d.name)
            },
            'F': {
                'C': DependencyDefinition(solid_c.name),
                'E': DependencyDefinition(solid_e.name)
            },
        },
    )

    pipeline_result = execute_pipeline(
        pipeline_def, run_config=RunConfig.nonthrowing_in_process())

    assert pipeline_result.result_for_solid('A').success
    assert pipeline_result.result_for_solid('B').success
    assert pipeline_result.result_for_solid('C').success
    assert not pipeline_result.result_for_solid('D').success
    assert pipeline_result.result_for_solid(
        'D').failure_data.error.cls_name == 'CheckError'
    assert not pipeline_result.result_for_solid('E').success
    assert pipeline_result.result_for_solid('E').skipped
    assert not pipeline_result.result_for_solid('F').success
    assert pipeline_result.result_for_solid('F').skipped