def test_execute_dep_solid_different_input_name(): pass_to_first = define_pass_value_solid('pass_to_first') first_solid = single_output_transform( 'first_solid', inputs=[InputDefinition(name='a_thing')], transform_fn=lambda context, inputs: inputs['a_thing'] + inputs['a_thing'], output=dagster.OutputDefinition(), ) second_solid = single_output_transform( 'second_solid', inputs=[InputDefinition(name='an_input')], transform_fn=lambda context, inputs: inputs['an_input'] + inputs['an_input'], output=dagster.OutputDefinition(), ) pipeline = dagster.PipelineDefinition( solids=[pass_to_first, first_solid, second_solid], dependencies={ 'first_solid': {'a_thing': DependencyDefinition('pass_to_first')}, 'second_solid': {'an_input': DependencyDefinition('first_solid')}, }, ) result = dagster.execute_pipeline( pipeline, environment={'solids': {'pass_to_first': {'config': {'value': 'bar'}}}} ) assert result.success assert len(result.result_list) == 3 assert result.result_for_solid('pass_to_first').transformed_value() == 'bar' assert result.result_for_solid('first_solid').transformed_value() == 'barbar' assert result.result_for_solid('second_solid').transformed_value() == 'barbarbarbar'
def define_truncate_pipeline(): truncate_solid = _get_project_solid('truncate_all_derived_tables') return dagster.PipelineDefinition( name='truncate_all_derived_tables', description= 'Truncates all tables that are populated by the pipeline. Preserves source tables', solids=[truncate_solid], )
def test_execute_two_solids_with_same_input_name(): input_def = InputDefinition(name='a_thing') solid_one = single_output_transform( 'solid_one', input_defs=[input_def], compute_fn=lambda context, inputs: inputs['a_thing'] + inputs['a_thing' ], output_def=dagster.OutputDefinition(), ) solid_two = single_output_transform( 'solid_two', input_defs=[input_def], compute_fn=lambda context, inputs: inputs['a_thing'] + inputs['a_thing' ], output_def=dagster.OutputDefinition(), ) pipeline = dagster.PipelineDefinition( solid_defs=[ define_pass_value_solid('pass_to_one'), define_pass_value_solid('pass_to_two'), solid_one, solid_two, ], dependencies={ 'solid_one': { 'a_thing': DependencyDefinition('pass_to_one') }, 'solid_two': { 'a_thing': DependencyDefinition('pass_to_two') }, }, ) result = execute_pipeline( pipeline, environment_dict={ 'solids': { 'pass_to_one': { 'config': { 'value': 'foo' } }, 'pass_to_two': { 'config': { 'value': 'bar' } }, } }, ) assert result.success assert result.result_for_solid('solid_one').result_value() == 'foofoo' assert result.result_for_solid('solid_two').result_value() == 'barbar'
def define_failure_pipeline(): return dagster.PipelineDefinition( name='pandas_hello_world_fails', solids=[sum_solid, sum_sq_solid, always_fails_solid], dependencies={ 'sum_sq_solid': { 'sum_df': DependencyDefinition(sum_solid.name) }, 'always_fails_solid': { 'sum_sq_solid': DependencyDefinition(sum_sq_solid.name) }, }, )
def define_setup_pipeline(): create_all_tables_solids = _get_project_solid('create_all_tables') populate_num_table_solid = _get_project_solid( 'populate_num_table', inputs=[InputDefinition(create_all_tables_solids.name)]) return dagster.PipelineDefinition( name='setup_pipeline', description='Creates all tables and then populates source table', solids=[create_all_tables_solids, populate_num_table_solid], dependencies={ populate_num_table_solid.name: { create_all_tables_solids.name: DependencyDefinition(create_all_tables_solids.name) } }, )
def define_failure_pipeline(): return dagster.PipelineDefinition( name='pandas_hello_world_fails', solids=[ dagster_pd.load_csv_solid('load_num_csv'), sum_solid, sum_sq_solid, always_fails_solid, ], dependencies={ 'sum_solid': { 'num': DependencyDefinition('load_num_csv') }, 'sum_sq_solid': { 'sum_df': DependencyDefinition(sum_solid.name), }, 'always_fails_solid': { 'sum_sq_solid': DependencyDefinition(sum_sq_solid.name), } })
def define_rerun_pipeline(): insert_into_sum_table_solid = _get_project_solid('insert_into_sum_table', inputs=None) insert_into_sum_sq_table_solid = _get_project_solid( 'insert_into_sum_sq_table', inputs=[InputDefinition(insert_into_sum_table_solid.name)]) return dagster.PipelineDefinition( name='rerun_pipeline', description= 'Rerun the pipeline, populating the derived tables. Assumes pipeline is setup', solids=[insert_into_sum_table_solid, insert_into_sum_sq_table_solid], dependencies={ insert_into_sum_sq_table_solid.name: { insert_into_sum_table_solid.name: DependencyDefinition(insert_into_sum_table_solid.name) } }, )
def define_full_pipeline(): create_all_tables_solids = _get_project_solid('create_all_tables') populate_num_table_solid = _get_project_solid( 'populate_num_table', inputs=[InputDefinition(create_all_tables_solids.name)]) insert_into_sum_table_solid = _get_project_solid( 'insert_into_sum_table', inputs=[InputDefinition(populate_num_table_solid.name)]) insert_into_sum_sq_table_solid = _get_project_solid( 'insert_into_sum_sq_table', inputs=[InputDefinition(insert_into_sum_table_solid.name)]) return dagster.PipelineDefinition( name='full_pipeline', description= 'Runs entire pipeline, both setup and running the transform', solids=[ create_all_tables_solids, populate_num_table_solid, insert_into_sum_table_solid, insert_into_sum_sq_table_solid, ], dependencies={ populate_num_table_solid.name: { create_all_tables_solids.name: DependencyDefinition(create_all_tables_solids.name) }, insert_into_sum_table_solid.name: { populate_num_table_solid.name: DependencyDefinition(populate_num_table_solid.name) }, insert_into_sum_sq_table_solid.name: { insert_into_sum_table_solid.name: DependencyDefinition(insert_into_sum_table_solid) }, }, )