def test_custom_contexts(): @solid(inputs=[], outputs=[OutputDefinition()]) def custom_context_transform(info): assert info.context.resources == {'field_one': 'value_two'} pipeline = PipelineDefinition( solids=[custom_context_transform], context_definitions={ 'custom_one': PipelineContextDefinition( config_def=ConfigDefinition.config_dict( {'field_one': Field(dagster_type=types.String)}), context_fn=lambda info: ExecutionContext(resources=info.config ), ), 'custom_two': PipelineContextDefinition( config_def=ConfigDefinition.config_dict( {'field_one': Field(dagster_type=types.String)}), context_fn=lambda info: ExecutionContext(resources=info.config ), ) }, ) environment_one = config.Environment( context=config.Context('custom_one', {'field_one': 'value_two'})) execute_pipeline(pipeline, environment=environment_one) environment_two = config.Environment( context=config.Context('custom_two', {'field_one': 'value_two'})) execute_pipeline(pipeline, environment=environment_two)
def execute_single_solid(context, solid_def, environment=None, throw_on_error=True): check.inst_param(context, 'context', ExecutionContext) check.inst_param(solid_def, 'solid_def', SolidDefinition) environment = check.opt_inst_param( environment, 'environment', config.Environment, config.Environment(), ) check.bool_param(throw_on_error, 'throw_on_error') single_solid_environment = config.Environment( expectations=environment.expectations, context=environment.context, solids={solid_def.name: environment.solids[solid_def.name]} if solid_def.name in environment.solids else None) pipeline_result = execute_pipeline( PipelineDefinition( solids=[solid_def], context_definitions=PipelineContextDefinition. passthrough_context_definition(context), ), environment=single_solid_environment, ) return pipeline_result
def test_invalid_context(): @lambda_solid def never_transform(): raise Exception('should never execute') default_context_pipeline = PipelineDefinition(solids=[never_transform]) environment_context_not_found = config.Environment( context=config.Context('not_found', {})) with pytest.raises(DagsterInvariantViolationError, message='Context not_found not found'): execute_pipeline(default_context_pipeline, environment=environment_context_not_found, throw_on_error=True) environment_field_name_mismatch = config.Environment( context=config.Context(config={'unexpected': 'value'})) with pytest.raises(DagsterTypeError, message='Argument mismatch in context default'): execute_pipeline(default_context_pipeline, environment=environment_field_name_mismatch, throw_on_error=True) with_argful_context_pipeline = PipelineDefinition( solids=[never_transform], context_definitions={ 'default': PipelineContextDefinition( config_def=ConfigDefinition.config_dict( {'string_field': Field(types.String)}), context_fn=lambda info: info.config, ) }) environment_no_config_error = config.Environment(context=config.Context( config={})) with pytest.raises(DagsterTypeError, message='Argument mismatch in context default'): execute_pipeline(with_argful_context_pipeline, environment=environment_no_config_error, throw_on_error=True) environment_type_mismatch_error = config.Environment( context=config.Context(config={'string_field': 1})) with pytest.raises(DagsterTypeError, message='Argument mismatch in context default'): execute_pipeline(with_argful_context_pipeline, environment=environment_type_mismatch_error, throw_on_error=True)
def test_execute_pipeline(): pipeline = define_success_pipeline() environment = config.Environment(solids={ 'load_num_csv': config.Solid({'path': script_relative_path('num.csv')}) }, ) result = execute_pipeline(pipeline, environment=environment) assert result.success assert result.result_for_solid('sum_solid').transformed_value().to_dict( 'list') == { 'num1': [1, 3], 'num2': [2, 4], 'sum': [3, 7], } assert result.result_for_solid('sum_sq_solid').transformed_value().to_dict( 'list') == { 'num1': [1, 3], 'num2': [2, 4], 'sum': [3, 7], 'sum_sq': [9, 49], }
def test_hello_world_composed(): pipeline = create_hello_world_solid_composed_pipeline() pipeline_result = execute_pipeline( pipeline, environment=config.Environment( solids={ 'read_hello_world': config.Solid({ 'path': script_relative_path('num.csv') }), }, ), ) assert pipeline_result.success result = pipeline_result.result_for_solid('hello_world') assert result.success assert result.transformed_value().to_dict('list') == { 'num1': [1, 3], 'num2': [2, 4], 'sum': [3, 7], }
def test_output_sql_sum_sq_solid(): create_sum_sq_table = define_create_table_solid('create_sum_sq_table') pipeline = create_sum_sq_pipeline( in_mem_context(), DagsterSqlTableExpression('num_table'), [create_sum_sq_table], {create_sum_sq_table.name: { 'expr': DependencyDefinition('sum_sq_table') }} ) environment = config.Environment( solids={'create_sum_sq_table': config.Solid({ 'table_name': 'sum_sq_table' })}, ) pipeline_result = execute_pipeline(pipeline=pipeline, environment=environment) assert pipeline_result.success result_list = pipeline_result.result_list assert len(result_list) == 3 engine = pipeline_result.context.resources.sa.engine result_list = engine.connect().execute('SELECT * FROM sum_sq_table').fetchall() assert result_list == [(1, 2, 3, 9), (3, 4, 7, 49)]
def execute_pipeline( pipeline, environment=None, throw_on_error=True, ): ''' "Synchronous" version of `execute_pipeline_iterator`. Note: throw_on_error is very useful in testing contexts when not testing for error conditions Parameters: pipeline (PipelineDefinition): pipeline to run execution (ExecutionContext): execution context of the run throw_on_error (bool): throw_on_error makes the function throw when an error is encoutered rather than returning the py:class:`SolidExecutionResult` in an error-state. Returns: PipelineExecutionResult ''' check.inst_param(pipeline, 'pipeline', PipelineDefinition) environment = check.opt_inst_param( environment, 'environment', config.Environment, config.Environment(), ) execution_graph = ExecutionGraph.from_pipeline(pipeline) return _execute_graph(execution_graph, environment, throw_on_error)
def test_config_for_no_config(): def _t_fn(*_args): raise Exception('should not reach') solid_def = SolidDefinition( name='no_config_solid', inputs=[], outputs=[], transform_fn=_t_fn, ) pipeline = PipelineDefinition(solids=[solid_def]) with pytest.raises( DagsterInvariantViolationError, match="Solid no_config_solid was provided {'some_config': 1} but does not take config", ): execute_pipeline( pipeline, config.Environment(solids={ 'no_config_solid': config.Solid({ 'some_config': 1, }), }), )
def test_basic_solid_with_config(): did_get = {} def _t_fn(info, _inputs): did_get['yep'] = info.config solid = SolidDefinition( name='solid_with_context', inputs=[], outputs=[], config_def=ConfigDefinition.config_dict({ 'some_config': Field(types.String) }), transform_fn=_t_fn, ) pipeline = PipelineDefinition(solids=[solid]) execute_pipeline( pipeline, config.Environment(solids={'solid_with_context': config.Solid({ 'some_config': 'foo' })}), ) assert 'yep' in did_get assert 'some_config' in did_get['yep']
def test_execute_solid_with_input_same_name(): a_thing_solid = single_output_transform( 'a_thing', inputs=[InputDefinition(name='a_thing')], transform_fn=lambda context, inputs: inputs['a_thing'] + inputs[ 'a_thing'], output=dagster.OutputDefinition(), ) pipeline = PipelineDefinition( solids=[define_pass_value_solid('pass_value'), a_thing_solid], dependencies={ 'a_thing': { 'a_thing': DependencyDefinition('pass_value') } }, ) result = execute_pipeline( pipeline, config.Environment( solids={'pass_value': config.Solid({'value': 'foo'})}), ) assert result.result_for_solid('a_thing').transformed_value() == 'foofoo'
def test_yield_context(): events = [] @solid(inputs=[], outputs=[OutputDefinition()]) def custom_context_transform(info): assert info.context.resources == {'field_one': 'value_two'} assert info.context._context_dict['foo'] == 'bar' # pylint: disable=W0212 events.append('during') def _yield_context(info): events.append('before') context = ExecutionContext(resources=info.config) with context.value('foo', 'bar'): yield context events.append('after') pipeline = PipelineDefinition( solids=[custom_context_transform], context_definitions={ 'custom_one': PipelineContextDefinition( config_def=ConfigDefinition.config_dict( {'field_one': Field(dagster_type=types.String)}), context_fn=_yield_context, ), }) environment_one = config.Environment( context=config.Context('custom_one', {'field_one': 'value_two'})) execute_pipeline(pipeline, environment=environment_one) assert events == ['before', 'during', 'after']
def test_multi_input_partial_execution(): pipeline = create_multi_input_pipeline() first_sum_table = 'first_sum_table' first_mult_table = 'first_mult_table' first_sum_mult_table = 'first_sum_mult_table' environment = config.Environment(solids={ 'sum_table': config.Solid({'sum_table': first_sum_table}), 'mult_table': config.Solid({ 'mult_table': first_mult_table, }), 'sum_mult_table': config.Solid({ 'sum_table': first_sum_table, 'mult_table': first_mult_table, 'sum_mult_table': first_sum_mult_table, }), }, ) first_pipeline_result = execute_pipeline(pipeline, environment=environment) assert first_pipeline_result.success assert len(first_pipeline_result.result_list) == 3 assert _load_table(first_pipeline_result.context, first_sum_table) == [(1, 2, 3), (3, 4, 7)] assert _load_table(first_pipeline_result.context, first_mult_table) == [(1, 2, 2), (3, 4, 12)] assert _load_table(first_pipeline_result.context, first_sum_mult_table) == [(1, 3, 2), (3, 7, 12)] return
def test_single_templated_sql_solid_double_table_with_api(): sum_table_arg = 'specific_sum_table' num_table_arg = 'specific_num_table' sql = '''CREATE TABLE {{sum_table}} AS SELECT num1, num2, num1 + num2 as sum FROM {{num_table}}''' sum_solid = create_templated_sql_transform_solid( name='sum_solid', sql=sql, table_arguments=['sum_table', 'num_table'], ) pipeline = pipeline_test_def(solids=[sum_solid], context=in_mem_context(num_table_arg)) environment = config.Environment( solids={ 'sum_solid': config.Solid({ 'sum_table': sum_table_arg, 'num_table': num_table_arg, }) }) result = execute_pipeline(pipeline, environment=environment) assert result.success assert _load_table(result.context, sum_table_arg) == [(1, 2, 3), (3, 4, 7)]
def test_with_from_through_specifying_all_solids(): pipeline = create_multi_input_pipeline() first_sum_table = 'first_sum_table' first_mult_table = 'first_mult_table' first_sum_mult_table = 'first_sum_mult_table' environment = config.Environment(solids={ 'sum_table': config.Solid({ 'sum_table': first_sum_table, }), 'mult_table': config.Solid({ 'mult_table': first_mult_table, }), 'sum_mult_table': config.Solid({ 'sum_table': first_sum_table, 'mult_table': first_mult_table, 'sum_mult_table': first_sum_mult_table, }), }, ) pipeline_result = execute_pipeline(pipeline, environment=environment) assert len(pipeline_result.result_list) == 3 assert _load_table(pipeline_result.context, first_sum_table) == [(1, 2, 3), (3, 4, 7)] assert _load_table(pipeline_result.context, first_mult_table) == [(1, 2, 2), (3, 4, 12)] assert _load_table(pipeline_result.context, first_sum_mult_table) == [(1, 3, 2), (3, 7, 12)]
def test_pipeline_execution_graph_diamond(): pipeline = PipelineDefinition(solids=create_diamond_solids(), dependencies=diamond_deps()) environment = config.Environment() return _do_test(pipeline, lambda: execute_pipeline_iterator( pipeline, environment=environment, ))
def test_aliased_configs(): @solid( inputs=[], config_def=ConfigDefinition(types.Int), ) def load_constant(info): return info.config pipeline = PipelineDefinition( solids=[load_constant], dependencies={ SolidInstance(load_constant.name, 'load_a'): {}, SolidInstance(load_constant.name, 'load_b'): {}, }) result = execute_pipeline( pipeline, config.Environment(solids={ 'load_a': config.Solid(2), 'load_b': config.Solid(3), })) assert result.success assert result.result_for_solid('load_a').transformed_value() == 2 assert result.result_for_solid('load_b').transformed_value() == 3
def test_default_context_with_log_level(): @solid(inputs=[], outputs=[OutputDefinition()]) def default_context_transform(info): for logger in info.context._logger.loggers: assert logger.level == INFO pipeline = PipelineDefinition(solids=[default_context_transform]) execute_pipeline(pipeline, environment=config.Environment(context=config.Context( config={'log_level': 'INFO'}))) with pytest.raises(DagsterTypeError, message='Argument mismatch in context default'): execute_pipeline(pipeline, environment=config.Environment(context=config.Context( config={'log_level': 2})))
def test_default_value(): def _get_config_test_solid(config_key, config_value): @solid(inputs=[], outputs=[OutputDefinition()]) def config_test(info): assert info.context.resources == {config_key: config_value} return config_test pipeline = PipelineDefinition( solids=[_get_config_test_solid('field_one', 'heyo')], context_definitions={ 'custom_one': PipelineContextDefinition( config_def=ConfigDefinition.config_dict({ 'field_one': Field( dagster_type=types.String, is_optional=True, default_value='heyo', ) }), context_fn=lambda info: ExecutionContext(resources=info.config ), ), }) execute_pipeline(pipeline, environment=config.Environment( context=config.Context('custom_one', {})))
def test_construct_full_environment_default_context_name(): document = ''' context: config: context_arg: context_value ''' environment = config.construct_environment(yaml.load(document)) assert environment == config.Environment(context=config.Context( 'default', {'context_arg': 'context_value'}), )
def test_notebook_dag(): pipeline_result = execute_pipeline( define_test_notebook_dag_pipeline(), environment=config.Environment(solids={ 'load_a': config.Solid(1), 'load_b': config.Solid(2), })) assert pipeline_result.success assert pipeline_result.result_for_solid('add_two').transformed_value() == 3 assert pipeline_result.result_for_solid( 'mult_two').transformed_value() == 6
def test_part_thirteen_step_two(): pipeline_result = execute_pipeline( define_part_thirteen_step_two(), config.Environment(solids={ 'load_a': config.Solid(23), 'load_b': config.Solid(38), }) ) assert pipeline_result.success solid_result = pipeline_result.result_for_solid('a_plus_b') assert solid_result.transformed_value() == 23 + 38
def test_execute_two_solids_with_same_input_name(): input_def = InputDefinition(name='a_thing') solid_one = single_output_transform( 'solid_one', inputs=[input_def], transform_fn=lambda context, inputs: inputs['a_thing'] + inputs['a_thing'], output=dagster.OutputDefinition(), ) solid_two = single_output_transform( 'solid_two', inputs=[input_def], transform_fn=lambda context, inputs: inputs['a_thing'] + inputs['a_thing'], output=dagster.OutputDefinition(), ) pipeline = dagster.PipelineDefinition( solids=[ define_pass_value_solid('pass_to_one'), define_pass_value_solid('pass_to_two'), solid_one, solid_two, ], dependencies={ 'solid_one': { 'a_thing': DependencyDefinition('pass_to_one') }, 'solid_two': { 'a_thing': DependencyDefinition('pass_to_two') } } ) result = execute_pipeline( pipeline, environment=config.Environment( solids={ 'pass_to_one': config.Solid({ 'value': 'foo' }), 'pass_to_two': config.Solid({ 'value': 'bar' }), } ) ) assert result.success assert result.result_for_solid('solid_one').transformed_value() == 'foofoo' assert result.result_for_solid('solid_two').transformed_value() == 'barbar'
def test_two_input_solid(): def transform(_context, inputs): num_csv1 = inputs['num_csv1'] num_csv2 = inputs['num_csv2'] check.inst_param(num_csv1, 'num_csv1', pd.DataFrame) check.inst_param(num_csv2, 'num_csv2', pd.DataFrame) num_csv1['sum'] = num_csv1['num1'] + num_csv2['num2'] return num_csv1 two_input_solid = _dataframe_solid( name='two_input_solid', inputs=[ InputDefinition('num_csv1', dagster_pd.DataFrame), InputDefinition('num_csv2', dagster_pd.DataFrame), ], transform_fn=transform, ) environment = config.Environment( solids={ 'load_csv1': config.Solid( {'path': script_relative_path('num.csv')}), 'load_csv2': config.Solid( {'path': script_relative_path('num.csv')}), }) pipeline = PipelineDefinition(solids=[ dagster_pd.load_csv_solid('load_csv1'), dagster_pd.load_csv_solid('load_csv2'), two_input_solid ], dependencies={ 'two_input_solid': { 'num_csv1': DependencyDefinition('load_csv1'), 'num_csv2': DependencyDefinition('load_csv2'), } }) pipeline_result = execute_pipeline(pipeline, environment) assert pipeline_result.success df = pipeline_result.result_for_solid( 'two_input_solid').transformed_value() # df = get_solid_transformed_value(create_test_context(), two_input_solid, environment) assert isinstance(df, pd.DataFrame) assert df.to_dict('list') == { 'num1': [1, 3], 'num2': [2, 4], 'sum': [3, 7] }
def test_pandas_source_test_pipeline(): pipeline = define_pandas_source_test_pipeline() pipeline_result = execute_pipeline( pipeline, config.Environment(solids={ 'pandas_source_test': config.Solid(script_relative_path('num.csv')), }, ), ) assert pipeline_result.success solid_result = pipeline_result.result_for_solid('pandas_source_test') expected = pd.read_csv(script_relative_path('num.csv')) assert solid_result.transformed_value().equals(expected)
def test_intro_tutorial_part_nine_step_one(): result = execute_pipeline( define_part_nine_step_one(), config.Environment(solids={ 'injest_a': config.Solid(2), 'injest_b': config.Solid(3), }, )) assert result.success assert result.result_for_solid('injest_a').transformed_value() == 2 assert result.result_for_solid('injest_b').transformed_value() == 3 assert result.result_for_solid('add_ints').transformed_value() == 5 assert result.result_for_solid('mult_ints').transformed_value() == 6
def do_execute_command(pipeline, env, printer): check.inst_param(pipeline, 'pipeline', PipelineDefinition) check.opt_str_param(env, 'env') check.callable_param(printer, 'printer') if env: env_config = load_yaml_from_path(env) environment = config.construct_environment(env_config) else: environment = config.Environment() pipeline_iter = execute_pipeline_iterator(pipeline, environment) process_results_for_console(pipeline_iter)
def test_hello_world_config(): with_config_solid = dm.define_dagstermill_solid( 'with_config', nb_test_path('hello_world_with_config'), [], [OutputDefinition()], ) pipeline = PipelineDefinition(solids=[with_config_solid]) pipeline_result = execute_pipeline( pipeline, config.Environment(solids={'with_config': config.Solid(script_relative_path('num.csv'))}), ) assert pipeline_result.success assert pipeline_result.result_for_solid('with_config').transformed_value() == 100
def test_any_config_definition(): called = {} conf_value = 234 @solid(config_def=ConfigDefinition()) def hello_world(info): assert info.config == conf_value called['yup'] = True result = execute_single_solid( create_test_context(), hello_world, environment=config.Environment( solids={'hello_world': config.Solid(conf_value)})) assert called['yup']
def test_pandas_multiple_inputs(): environment = config.Environment(solids={ 'load_one': config.Solid({'path': script_relative_path('num.csv')}), 'load_two': config.Solid({'path': script_relative_path('num.csv')}), }, ) def transform_fn(_context, inputs): return inputs['num_csv1'] + inputs['num_csv2'] double_sum = _dataframe_solid(name='double_sum', inputs=[ InputDefinition('num_csv1', dagster_pd.DataFrame), InputDefinition('num_csv2', dagster_pd.DataFrame), ], transform_fn=transform_fn) pipeline = PipelineDefinition( solids=[ dagster_pd.load_csv_solid('load_one'), dagster_pd.load_csv_solid('load_two'), double_sum ], dependencies={ 'double_sum': { 'num_csv1': DependencyDefinition('load_one'), 'num_csv2': DependencyDefinition('load_two'), } }, ) output_df = execute_pipeline( pipeline, environment=environment, ).result_for_solid('double_sum').transformed_value() assert not output_df.empty assert output_df.to_dict('list') == { 'num1': [2, 6], 'num2': [4, 8], }
def test_hello_world_pipeline_no_api(): def hello_world_transform_fn(_context, inputs): num_df = inputs['num_df'] num_df['sum'] = num_df['num1'] + num_df['num2'] return num_df read_csv_solid = define_read_csv_solid('read_csv_solid') hello_world = single_output_transform( name='hello_world', inputs=[InputDefinition('num_df')], transform_fn=hello_world_transform_fn, output=OutputDefinition(), ) pipeline = PipelineDefinition( solids=[read_csv_solid, hello_world], dependencies={ 'hello_world': { 'num_df': DependencyDefinition('read_csv_solid'), }, } ) pipeline_result = execute_pipeline( pipeline, config.Environment( solids={ 'read_csv_solid': config.Solid({ 'path': script_relative_path('num.csv'), }), }, ), ) assert pipeline_result.success result = pipeline_result.result_for_solid('hello_world') assert result.transformed_value().to_dict('list') == { 'num1': [1, 3], 'num2': [2, 4], 'sum': [3, 7], }