def test_execute_dep_solid_different_input_name(): pass_to_first = define_pass_value_solid('pass_to_first') first_solid = single_output_transform( 'first_solid', inputs=[InputDefinition(name='a_thing')], transform_fn=lambda context, inputs: inputs['a_thing'] + inputs['a_thing'], output=dagster.OutputDefinition(), ) second_solid = single_output_transform( 'second_solid', inputs=[InputDefinition(name='an_input')], transform_fn=lambda context, inputs: inputs['an_input'] + inputs['an_input'], output=dagster.OutputDefinition(), ) pipeline = dagster.PipelineDefinition( solids=[pass_to_first, first_solid, second_solid], dependencies={ 'first_solid': {'a_thing': DependencyDefinition('pass_to_first')}, 'second_solid': {'an_input': DependencyDefinition('first_solid')}, }, ) result = dagster.execute_pipeline( pipeline, environment={'solids': {'pass_to_first': {'config': {'value': 'bar'}}}} ) assert result.success assert len(result.result_list) == 3 assert result.result_for_solid('pass_to_first').transformed_value() == 'bar' assert result.result_for_solid('first_solid').transformed_value() == 'barbar' assert result.result_for_solid('second_solid').transformed_value() == 'barbarbarbar'
def test_execute_solid_with_dep_only_inputs_with_api(): did_run_dict = {} step_one_solid = single_output_transform( name='step_one_solid', inputs=[], transform_fn=lambda context, args: _set_key_value( did_run_dict, 'step_one', True), output=OutputDefinition(), ) step_two_solid = single_output_transform( name='step_two_solid', transform_fn=lambda context, args: _set_key_value( did_run_dict, 'step_two', True), inputs=[InputDefinition(step_one_solid.name)], output=OutputDefinition(), ) pipeline = PipelineDefinition( solids=[step_one_solid, step_two_solid], dependencies={ 'step_two_solid': { step_one_solid.name: DependencyDefinition(step_one_solid.name) } }, ) pipeline_result = execute_pipeline(pipeline) for result in pipeline_result.solid_result_list: assert result.success assert did_run_dict['step_one'] is True assert did_run_dict['step_two'] is True
def test_execute_two_solids_with_same_input_name(): input_def = InputDefinition(name='a_thing') solid_one = single_output_transform( 'solid_one', input_defs=[input_def], compute_fn=lambda context, inputs: inputs['a_thing'] + inputs['a_thing' ], output_def=dagster.OutputDefinition(), ) solid_two = single_output_transform( 'solid_two', input_defs=[input_def], compute_fn=lambda context, inputs: inputs['a_thing'] + inputs['a_thing' ], output_def=dagster.OutputDefinition(), ) pipeline = dagster.PipelineDefinition( solid_defs=[ define_pass_value_solid('pass_to_one'), define_pass_value_solid('pass_to_two'), solid_one, solid_two, ], dependencies={ 'solid_one': { 'a_thing': DependencyDefinition('pass_to_one') }, 'solid_two': { 'a_thing': DependencyDefinition('pass_to_two') }, }, ) result = execute_pipeline( pipeline, environment_dict={ 'solids': { 'pass_to_one': { 'config': { 'value': 'foo' } }, 'pass_to_two': { 'config': { 'value': 'bar' } }, } }, ) assert result.success assert result.result_for_solid('solid_one').result_value() == 'foofoo' assert result.result_for_solid('solid_two').result_value() == 'barbar'
def test_failure_midstream(): ''' A \\ C (fails) = D (skipped) // B ''' solid_a = create_root_success_solid('A') solid_b = create_root_success_solid('B') def fail_fn(_context, inputs): check.failed('user error') return [inputs['A'], inputs['B'], {'C': 'transform_called'}] def success_fn(_context, inputs): return [inputs['C'], {'D': 'transform_called'}] solid_c = single_output_transform( name='C', inputs=[InputDefinition(name='A'), InputDefinition(name='B')], compute_fn=fail_fn, output=OutputDefinition(), ) solid_d = single_output_transform( name='D', inputs=[InputDefinition(name='C')], compute_fn=success_fn, output=OutputDefinition(), ) pipeline_def = PipelineDefinition( solid_defs=[solid_a, solid_b, solid_c, solid_d], dependencies={ 'C': { 'A': DependencyDefinition(solid_a.name), 'B': DependencyDefinition(solid_b.name) }, 'D': { 'C': DependencyDefinition(solid_c.name) }, }, ) pipeline_result = execute_pipeline( pipeline_def, run_config=RunConfig.nonthrowing_in_process()) assert pipeline_result.result_for_solid('A').success assert pipeline_result.result_for_solid('B').success assert not pipeline_result.result_for_solid('C').success assert pipeline_result.result_for_solid( 'C').failure_data.error.cls_name == 'CheckError' assert not pipeline_result.result_for_solid('D').success assert pipeline_result.result_for_solid('D').skipped
def test_execute_solid_with_input_same_name(): a_thing_solid = single_output_transform( 'a_thing', input_defs=[InputDefinition(name='a_thing')], compute_fn=lambda context, inputs: inputs['a_thing'] + inputs['a_thing' ], output_def=dagster.OutputDefinition(), ) pipeline = PipelineDefinition( solid_defs=[define_pass_value_solid('pass_value'), a_thing_solid], dependencies={ 'a_thing': { 'a_thing': DependencyDefinition('pass_value') } }, ) result = execute_pipeline(pipeline, environment_dict={ 'solids': { 'pass_value': { 'config': { 'value': 'foo' } } } }) assert result.result_for_solid('a_thing').result_value() == 'foofoo'
def _dataframe_solid(name, input_defs, compute_fn): return single_output_transform( name=name, input_defs=input_defs, compute_fn=compute_fn, output_def=OutputDefinition(DataFrame), )
def _dataframe_solid(name, inputs, transform_fn): return single_output_transform( name=name, inputs=inputs, transform_fn=transform_fn, output=OutputDefinition(dagster_pd.DataFrame), )
def test_failure_midstream(): solid_a = create_root_success_solid('A') solid_b = create_root_success_solid('B') def transform_fn(_context, inputs): check.failed('user error') return [inputs['A'], inputs['B'], {'C': 'transform_called'}] solid_c = single_output_transform( name='C', inputs=[InputDefinition(name='A'), InputDefinition(name='B')], transform_fn=transform_fn, output=OutputDefinition(), ) pipeline = silencing_pipeline( solids=[solid_a, solid_b, solid_c], dependencies={ 'C': { 'A': DependencyDefinition(solid_a.name), 'B': DependencyDefinition(solid_b.name) } }, ) pipeline_result = execute_pipeline(pipeline, throw_on_user_error=False) assert pipeline_result.result_for_solid('A').success assert pipeline_result.result_for_solid('B').success assert not pipeline_result.result_for_solid('C').success assert isinstance( pipeline_result.result_for_solid('C').dagster_error, DagsterExecutionStepExecutionError)
def create_root_transform_failure_solid(name): def failed_transform(**_kwargs): raise Exception('Transform failed') return single_output_transform(name=name, inputs=[], transform_fn=failed_transform, output=OutputDefinition())
def create_sql_solid(name, inputs, sql_text): check.str_param(name, 'name') check.list_param(inputs, 'inputs', of_type=InputDefinition) check.str_param(sql_text, 'sql_text') return single_output_transform(name, inputs=inputs, transform_fn=create_sql_transform(sql_text), output=OutputDefinition())
def create_solid_with_deps(name, *solid_deps): inputs = [InputDefinition(solid_dep.name) for solid_dep in solid_deps] return single_output_transform( name=name, inputs=inputs, transform_fn=make_transform(name), output=OutputDefinition(), )
def test_single_transform_returning_result(): solid_inst = single_output_transform( 'test_return_result', input_defs=[], compute_fn=lambda *_args, **_kwargs: Output(None), output_def=OutputDefinition(), ) with pytest.raises(DagsterInvariantViolationError): execute_isolated_solid(solid_inst)
def create_root_solid(name): input_name = name + '_input' inp = InputDefinition(input_name) return single_output_transform( name=name, inputs=[inp], transform_fn=make_transform(name), output=OutputDefinition(), )
def test_single_transform_returning_result(): solid_inst = single_output_transform( 'test_return_result', inputs=[], transform_fn=lambda *_args, **_kwargs: Result(None), output=OutputDefinition(), ) with pytest.raises(DagsterInvariantViolationError): execute_single_solid_in_isolation(ExecutionContext(), solid_inst)
def create_root_success_solid(name): def root_transform(_context, _args): passed_rows = [] passed_rows.append({name: 'transform_called'}) return passed_rows return single_output_transform(name=name, inputs=[], transform_fn=root_transform, output=OutputDefinition())
def test_execute_solid_with_dep_only_inputs_no_api(): did_run_dict = {} step_one_solid = single_output_transform( name='step_one_solid', inputs=[], compute_fn=lambda context, args: _set_key_value( did_run_dict, 'step_one', True), output=OutputDefinition(), ) step_two_solid = single_output_transform( name='step_two_solid', inputs=[InputDefinition('step_one_solid')], compute_fn=lambda context, args: _set_key_value( did_run_dict, 'step_two', True), output=OutputDefinition(), ) pipeline = PipelineDefinition( solid_defs=[step_one_solid, step_two_solid], dependencies={ 'step_two_solid': { 'step_one_solid': DependencyDefinition('step_one_solid') } }, ) # from dagster.utils import logging pipeline_result = execute_pipeline(pipeline) assert pipeline_result.success for result in pipeline_result.solid_result_list: assert result.success assert did_run_dict['step_one'] is True assert did_run_dict['step_two'] is True
def create_sql_statement_solid(name, sql_text, inputs=None): check.str_param(name, 'name') check.str_param(sql_text, 'sql_text') check.opt_list_param(inputs, 'inputs', of_type=InputDefinition) if inputs is None: inputs = [] return single_output_transform( name=name, transform_fn=_create_sql_alchemy_transform_fn(sql_text), inputs=inputs, output=OutputDefinition() )
def create_definition_based_solid(): table_input = InputDefinition('num_csv', dagster_pd.DataFrame) def transform_fn(_context, inputs): num_csv = inputs['num_csv'] num_csv['sum'] = num_csv['num1'] + num_csv['num2'] return num_csv # supports CSV and PARQUET by default hello_world = single_output_transform(name='hello_world', inputs=[table_input], transform_fn=transform_fn, output=OutputDefinition( dagster_pd.DataFrame)) return hello_world
def test_hello_world_pipeline_no_api(): def hello_world_transform_fn(_context, inputs): num_df = inputs['num_df'] num_df['sum'] = num_df['num1'] + num_df['num2'] return num_df read_csv_solid = define_read_csv_solid('read_csv_solid') hello_world = single_output_transform( name='hello_world', inputs=[InputDefinition('num_df')], transform_fn=hello_world_transform_fn, output=OutputDefinition(), ) pipeline = PipelineDefinition( solids=[read_csv_solid, hello_world], dependencies={ 'hello_world': { 'num_df': DependencyDefinition('read_csv_solid'), }, } ) pipeline_result = execute_pipeline( pipeline, config.Environment( solids={ 'read_csv_solid': config.Solid({ 'path': script_relative_path('num.csv'), }), }, ), ) assert pipeline_result.success result = pipeline_result.result_for_solid('hello_world') assert result.transformed_value().to_dict('list') == { 'num1': [1, 3], 'num2': [2, 4], 'sum': [3, 7], }
def create_hello_world_solid_composed_pipeline(): def transform_fn(_context, inputs): num_df = inputs['num_df'] num_df['sum'] = num_df['num1'] + num_df['num2'] return num_df hello_world = single_output_transform( name='hello_world', inputs=[InputDefinition('num_df')], transform_fn=transform_fn, output=OutputDefinition(), ) return PipelineDefinition( solids=[define_read_csv_solid('read_hello_world'), hello_world], dependencies={'hello_world': { 'num_df': DependencyDefinition('read_hello_world') }} )
def test_basic_pandas_solid(): csv_input = InputDefinition('num_csv', dagster_pd.DataFrame) def transform(_context, inputs): num_csv = inputs['num_csv'] num_csv['sum'] = num_csv['num1'] + num_csv['num2'] return num_csv single_solid = single_output_transform( name='sum_table', inputs=[csv_input], transform_fn=transform, output=OutputDefinition(), ) pipeline = PipelineDefinition( solids=[dagster_pd.load_csv_solid('load_csv'), single_solid], dependencies={ single_solid.name: { 'num_csv': DependencyDefinition('load_csv'), } }) pipeline_result = execute_pipeline( pipeline, environment=get_num_csv_environment( get_load_only_solids_config('load_csv')), ) assert pipeline_result.success assert pipeline_result.result_for_solid( 'sum_table').transformed_value().to_dict('list') == { 'num1': [1, 3], 'num2': [2, 4], 'sum': [3, 7] }
def test_pandas_csv_to_csv(): csv_input = InputDefinition('num_csv', dagster_pd.DataFrame) # just adding a second context arg to test that def transform(context, inputs): check.inst_param(context, 'context', ExecutionContext) num_csv = inputs['num_csv'] num_csv['sum'] = num_csv['num1'] + num_csv['num2'] return num_csv solid_def = single_output_transform( name='sum_table', inputs=[csv_input], transform_fn=transform, output=OutputDefinition(dagster_pd.DataFrame), ) output_df = execute_transform_in_temp_csv_files(solid_def) assert output_df.to_dict('list') == { 'num1': [1, 3], 'num2': [2, 4], 'sum': [3, 7] }
def test_pandas_hello_no_library(): def solid_one_transform(_context, inputs): num_df = inputs['num_df'] num_df['sum'] = num_df['num1'] + num_df['num2'] return num_df solid_one = single_output_transform( name='solid_one', inputs=[InputDefinition(name='num_df')], transform_fn=solid_one_transform, output=OutputDefinition(), ) def solid_two_transform(_context, inputs): sum_df = inputs['sum_df'] sum_df['sum_sq'] = sum_df['sum'] * sum_df['sum'] return sum_df solid_two = single_output_transform( name='solid_two', inputs=[InputDefinition(name='sum_df')], transform_fn=solid_two_transform, output=OutputDefinition(), ) pipeline = PipelineDefinition( solids=[define_read_csv_solid('read_one'), solid_one, solid_two], dependencies={ 'solid_one': { 'num_df': DependencyDefinition('read_one'), }, 'solid_two': { 'sum_df': DependencyDefinition('solid_one'), }, } ) environment = config.Environment( solids={ 'read_one': config.Solid({ 'path': script_relative_path('num.csv') }), } ) execute_pipeline_result = execute_pipeline( pipeline, environment=environment, ) assert execute_pipeline_result.result_for_solid('solid_two' ).transformed_value().to_dict('list') == { 'num1': [1, 3], 'num2': [2, 4], 'sum': [3, 7], 'sum_sq': [9, 49], } sum_sq_out_path = '/tmp/sum_sq.csv' import os if os.path.exists(sum_sq_out_path): os.remove(sum_sq_out_path) sum_sq_path_args = {'path': '/tmp/sum_sq.csv'} environment_two = config.Environment( solids={ 'read_one': config.Solid({ 'path': script_relative_path('num.csv') }), 'write_two': config.Solid(sum_sq_path_args), }, ) pipeline_two = PipelineDefinition( solids=[ define_read_csv_solid('read_one'), solid_one, solid_two, define_to_csv_solid('write_two'), ], dependencies={ 'solid_one': { 'num_df': DependencyDefinition('read_one'), }, 'solid_two': { 'sum_df': DependencyDefinition('solid_one'), }, 'write_two': { 'df': DependencyDefinition('solid_two'), } } ) execute_pipeline(pipeline_two, environment=environment_two) sum_sq_df = pd.read_csv('/tmp/sum_sq.csv') assert sum_sq_df.to_dict('list') == { 'num1': [1, 3], 'num2': [2, 4], 'sum': [3, 7], 'sum_sq': [9, 49], }
def test_failure_propagation(): ''' B =========== C // \\ A F (skipped) \\ // D (fails) == E (skipped) ''' solid_a = create_root_success_solid('A') def fail_fn(_context, inputs): check.failed('user error') return inputs def success_fn(_context, inputs): return inputs solid_b = single_output_transform( name='B', inputs=[InputDefinition(name='A')], compute_fn=success_fn, output=OutputDefinition(), ) solid_c = single_output_transform( name='C', inputs=[InputDefinition(name='B')], compute_fn=success_fn, output=OutputDefinition(), ) solid_d = single_output_transform(name='D', inputs=[InputDefinition(name='A')], compute_fn=fail_fn, output=OutputDefinition()) solid_e = single_output_transform( name='E', inputs=[InputDefinition(name='D')], compute_fn=success_fn, output=OutputDefinition(), ) solid_f = single_output_transform( name='F', inputs=[InputDefinition(name='C'), InputDefinition(name='E')], compute_fn=success_fn, output=OutputDefinition(), ) pipeline_def = PipelineDefinition( solid_defs=[solid_a, solid_b, solid_c, solid_d, solid_e, solid_f], dependencies={ 'B': { 'A': DependencyDefinition(solid_a.name) }, 'D': { 'A': DependencyDefinition(solid_a.name) }, 'C': { 'B': DependencyDefinition(solid_b.name) }, 'E': { 'D': DependencyDefinition(solid_d.name) }, 'F': { 'C': DependencyDefinition(solid_c.name), 'E': DependencyDefinition(solid_e.name) }, }, ) pipeline_result = execute_pipeline( pipeline_def, run_config=RunConfig.nonthrowing_in_process()) assert pipeline_result.result_for_solid('A').success assert pipeline_result.result_for_solid('B').success assert pipeline_result.result_for_solid('C').success assert not pipeline_result.result_for_solid('D').success assert pipeline_result.result_for_solid( 'D').failure_data.error.cls_name == 'CheckError' assert not pipeline_result.result_for_solid('E').success assert pipeline_result.result_for_solid('E').skipped assert not pipeline_result.result_for_solid('F').success assert pipeline_result.result_for_solid('F').skipped