def test_execute_dep_solid_different_input_name():
    pass_to_first = define_pass_value_solid('pass_to_first')

    first_solid = single_output_transform(
        'first_solid',
        inputs=[InputDefinition(name='a_thing')],
        transform_fn=lambda context, inputs: inputs['a_thing'] + inputs['a_thing'],
        output=dagster.OutputDefinition(),
    )

    second_solid = single_output_transform(
        'second_solid',
        inputs=[InputDefinition(name='an_input')],
        transform_fn=lambda context, inputs: inputs['an_input'] + inputs['an_input'],
        output=dagster.OutputDefinition(),
    )

    pipeline = dagster.PipelineDefinition(
        solids=[pass_to_first, first_solid, second_solid],
        dependencies={
            'first_solid': {'a_thing': DependencyDefinition('pass_to_first')},
            'second_solid': {'an_input': DependencyDefinition('first_solid')},
        },
    )

    result = dagster.execute_pipeline(
        pipeline, environment={'solids': {'pass_to_first': {'config': {'value': 'bar'}}}}
    )

    assert result.success
    assert len(result.result_list) == 3
    assert result.result_for_solid('pass_to_first').transformed_value() == 'bar'
    assert result.result_for_solid('first_solid').transformed_value() == 'barbar'
    assert result.result_for_solid('second_solid').transformed_value() == 'barbarbarbar'
Example #2
0
def define_truncate_pipeline():
    truncate_solid = _get_project_solid('truncate_all_derived_tables')
    return dagster.PipelineDefinition(
        name='truncate_all_derived_tables',
        description=
        'Truncates all tables that are populated by the pipeline. Preserves source tables',
        solids=[truncate_solid],
    )
Example #3
0
def test_execute_two_solids_with_same_input_name():
    input_def = InputDefinition(name='a_thing')

    solid_one = single_output_transform(
        'solid_one',
        input_defs=[input_def],
        compute_fn=lambda context, inputs: inputs['a_thing'] + inputs['a_thing'
                                                                      ],
        output_def=dagster.OutputDefinition(),
    )

    solid_two = single_output_transform(
        'solid_two',
        input_defs=[input_def],
        compute_fn=lambda context, inputs: inputs['a_thing'] + inputs['a_thing'
                                                                      ],
        output_def=dagster.OutputDefinition(),
    )

    pipeline = dagster.PipelineDefinition(
        solid_defs=[
            define_pass_value_solid('pass_to_one'),
            define_pass_value_solid('pass_to_two'),
            solid_one,
            solid_two,
        ],
        dependencies={
            'solid_one': {
                'a_thing': DependencyDefinition('pass_to_one')
            },
            'solid_two': {
                'a_thing': DependencyDefinition('pass_to_two')
            },
        },
    )

    result = execute_pipeline(
        pipeline,
        environment_dict={
            'solids': {
                'pass_to_one': {
                    'config': {
                        'value': 'foo'
                    }
                },
                'pass_to_two': {
                    'config': {
                        'value': 'bar'
                    }
                },
            }
        },
    )

    assert result.success

    assert result.result_for_solid('solid_one').result_value() == 'foofoo'
    assert result.result_for_solid('solid_two').result_value() == 'barbar'
Example #4
0
def define_failure_pipeline():
    return dagster.PipelineDefinition(
        name='pandas_hello_world_fails',
        solids=[sum_solid, sum_sq_solid, always_fails_solid],
        dependencies={
            'sum_sq_solid': {
                'sum_df': DependencyDefinition(sum_solid.name)
            },
            'always_fails_solid': {
                'sum_sq_solid': DependencyDefinition(sum_sq_solid.name)
            },
        },
    )
Example #5
0
def define_setup_pipeline():
    create_all_tables_solids = _get_project_solid('create_all_tables')

    populate_num_table_solid = _get_project_solid(
        'populate_num_table',
        inputs=[InputDefinition(create_all_tables_solids.name)])

    return dagster.PipelineDefinition(
        name='setup_pipeline',
        description='Creates all tables and then populates source table',
        solids=[create_all_tables_solids, populate_num_table_solid],
        dependencies={
            populate_num_table_solid.name: {
                create_all_tables_solids.name:
                DependencyDefinition(create_all_tables_solids.name)
            }
        },
    )
Example #6
0
def define_failure_pipeline():
    return dagster.PipelineDefinition(
        name='pandas_hello_world_fails',
        solids=[
            dagster_pd.load_csv_solid('load_num_csv'),
            sum_solid,
            sum_sq_solid,
            always_fails_solid,
        ],
        dependencies={
            'sum_solid': {
                'num': DependencyDefinition('load_num_csv')
            },
            'sum_sq_solid': {
                'sum_df': DependencyDefinition(sum_solid.name),
            },
            'always_fails_solid': {
                'sum_sq_solid': DependencyDefinition(sum_sq_solid.name),
            }
        })
Example #7
0
def define_rerun_pipeline():
    insert_into_sum_table_solid = _get_project_solid('insert_into_sum_table',
                                                     inputs=None)

    insert_into_sum_sq_table_solid = _get_project_solid(
        'insert_into_sum_sq_table',
        inputs=[InputDefinition(insert_into_sum_table_solid.name)])

    return dagster.PipelineDefinition(
        name='rerun_pipeline',
        description=
        'Rerun the pipeline, populating the derived tables. Assumes pipeline is setup',
        solids=[insert_into_sum_table_solid, insert_into_sum_sq_table_solid],
        dependencies={
            insert_into_sum_sq_table_solid.name: {
                insert_into_sum_table_solid.name:
                DependencyDefinition(insert_into_sum_table_solid.name)
            }
        },
    )
Example #8
0
def define_full_pipeline():
    create_all_tables_solids = _get_project_solid('create_all_tables')

    populate_num_table_solid = _get_project_solid(
        'populate_num_table',
        inputs=[InputDefinition(create_all_tables_solids.name)])

    insert_into_sum_table_solid = _get_project_solid(
        'insert_into_sum_table',
        inputs=[InputDefinition(populate_num_table_solid.name)])

    insert_into_sum_sq_table_solid = _get_project_solid(
        'insert_into_sum_sq_table',
        inputs=[InputDefinition(insert_into_sum_table_solid.name)])

    return dagster.PipelineDefinition(
        name='full_pipeline',
        description=
        'Runs entire pipeline, both setup and running the transform',
        solids=[
            create_all_tables_solids,
            populate_num_table_solid,
            insert_into_sum_table_solid,
            insert_into_sum_sq_table_solid,
        ],
        dependencies={
            populate_num_table_solid.name: {
                create_all_tables_solids.name:
                DependencyDefinition(create_all_tables_solids.name)
            },
            insert_into_sum_table_solid.name: {
                populate_num_table_solid.name:
                DependencyDefinition(populate_num_table_solid.name)
            },
            insert_into_sum_sq_table_solid.name: {
                insert_into_sum_table_solid.name:
                DependencyDefinition(insert_into_sum_table_solid)
            },
        },
    )