def test_single_solid_with_context_config(): @resource(config_field=Field(Int, is_optional=True, default_value=2)) def num_resource(init_context): return init_context.resource_config ran = {'count': 0} @solid def check_context_config_for_two(context): assert context.resources.num == 2 ran['count'] += 1 pipeline_def = PipelineDefinition( solid_defs=[check_context_config_for_two], mode_defs=[ModeDefinition(resource_defs={'num': num_resource})], ) result = execute_solid_within_pipeline( pipeline_def, 'check_context_config_for_two', environment_dict={'resources': {'num': {'config': 2}}}, ) assert result.success assert ran['count'] == 1 result = execute_solid_within_pipeline(pipeline_def, 'check_context_config_for_two') assert result.success assert ran['count'] == 2
def test_single_solid_type_checking_output_error(): @lambda_solid(output_def=OutputDefinition(Int)) def return_string(): return 'ksjdfkjd' pipeline_def = PipelineDefinition(solid_defs=[return_string]) with pytest.raises(DagsterTypeCheckError): execute_solid_within_pipeline(pipeline_def, 'return_string')
def test_failing_solid_in_isolation(): class ThisException(Exception): pass @lambda_solid def throw_an_error(): raise ThisException('nope') pipeline_def = PipelineDefinition(solid_defs=[throw_an_error]) with pytest.raises(DagsterExecutionStepExecutionError) as e_info: execute_solid_within_pipeline(pipeline_def, 'throw_an_error') assert isinstance(e_info.value.__cause__, ThisException)
def test_single_solid_error(): class SomeError(Exception): pass @lambda_solid def throw_error(): raise SomeError() pipeline_def = PipelineDefinition(solid_defs=[throw_error]) with pytest.raises(DagsterExecutionStepExecutionError) as e_info: execute_solid_within_pipeline(pipeline_def, 'throw_error') assert isinstance(e_info.value.__cause__, SomeError)
def test_execute_solid_in_diamond(): solid_result = execute_solid_within_pipeline( create_diamond_pipeline(), "A", inputs={"A_input": [{"a key": "a value"}]} ) assert solid_result.success assert solid_result.output_value() == [{"a key": "a value"}, {"A": "compute_called"}]
def test_single_solid_with_multiple_inputs(): @lambda_solid def solid_one(): return 1 @lambda_solid(input_defs=[InputDefinition(name='num_one'), InputDefinition('num_two')]) def add_solid(num_one, num_two): return num_one + num_two pipeline_def = PipelineDefinition( solid_defs=[solid_one, add_solid], dependencies={ 'add_solid': { 'num_one': DependencyDefinition('solid_one'), 'num_two': DependencyDefinition('solid_one'), } }, ) result = execute_solid_within_pipeline( pipeline_def, 'add_solid', inputs={'num_one': 2, 'num_two': 3}, environment_dict={'loggers': {'console': {'config': {'log_level': 'DEBUG'}}}}, ) assert result.success assert result.output_value() == 5
def test_execute_solid_in_diamond(): solid_result = execute_solid_within_pipeline( create_diamond_pipeline(), 'A', inputs={'A_input': [{'a key': 'a value'}]} ) assert solid_result.success assert solid_result.output_value() == [{'a key': 'a value'}, {'A': 'compute_called'}]
def test_single_solid_in_isolation(): @lambda_solid def solid_one(): return 1 pipeline_def = PipelineDefinition(solid_defs=[solid_one]) result = execute_solid_within_pipeline(pipeline_def, 'solid_one') assert result.success assert result.output_value() == 1
def test_execute_aliased_solid_in_diamond(): a_source = define_stub_solid("A_source", [input_set("A_input")]) @pipeline def aliased_pipeline(): create_root_solid("A").alias("aliased")(a_source()) solid_result = execute_solid_within_pipeline( aliased_pipeline, "aliased", inputs={"A_input": [{"a key": "a value"}]} ) assert solid_result.success assert solid_result.output_value() == [{"a key": "a value"}, {"aliased": "compute_called"}]
def test_execute_aliased_solid_in_diamond(): a_source = define_stub_solid('A_source', [input_set('A_input')]) @pipeline def aliased_pipeline(): create_root_solid('A').alias('aliased')(a_source()) solid_result = execute_solid_within_pipeline( aliased_pipeline, 'aliased', inputs={'A_input': [{'a key': 'a value'}]} ) assert solid_result.success assert solid_result.output_value() == [{'a key': 'a value'}, {'aliased': 'compute_called'}]
def test_execute_aliased_solid_in_diamond(): a_source = define_stub_solid('A_source', [input_set('A_input')]) pipeline_def = PipelineDefinition( name='aliased_pipeline', solid_defs=[a_source, create_root_solid('A')], dependencies={ SolidInvocation('A', alias='aliased'): {'A_input': DependencyDefinition(a_source.name)} }, ) solid_result = execute_solid_within_pipeline( pipeline_def, 'aliased', inputs={'A_input': [{'a key': 'a value'}]} ) assert solid_result.success assert solid_result.result_value() == [{'a key': 'a value'}, {'aliased': 'transform_called'}]
def test_single_solid_with_config(): ran = {} @solid(config_field=Field(Int)) def check_config_for_two(context): assert context.solid_config == 2 ran['check_config_for_two'] = True pipeline_def = PipelineDefinition(solid_defs=[check_config_for_two]) result = execute_solid_within_pipeline( pipeline_def, 'check_config_for_two', environment_dict={'solids': {'check_config_for_two': {'config': 2}}}, ) assert result.success assert ran['check_config_for_two']
def test_single_solid_with_single(): @lambda_solid def solid_one(): return 1 @lambda_solid(input_defs=[InputDefinition(name='num')]) def add_one_solid(num): return num + 1 pipeline_def = PipelineDefinition( solid_defs=[solid_one, add_one_solid], dependencies={'add_one_solid': {'num': DependencyDefinition('solid_one')}}, ) result = execute_solid_within_pipeline(pipeline_def, 'add_one_solid', inputs={'num': 2}) assert result.success assert result.output_value() == 3
def test_spark_dataframe_output_csv(): spark = SparkSession.builder.getOrCreate() num_df = (spark.read.format('csv').options( header='true', inferSchema='true').load(file_relative_path(__file__, 'num.csv'))) assert num_df.collect() == [Row(num1=1, num2=2)] @solid(input_defs=[InputDefinition('df', DataFrame)], output_defs=[OutputDefinition(DataFrame)]) def passthrough_df(_context, df): return df @pipeline def passthrough(): passthrough_df() # pylint: disable=no-value-for-parameter with tempfile.TemporaryDirectory() as tempdir: file_name = os.path.join(tempdir, 'output.csv') result = execute_solid_within_pipeline( passthrough, 'passthrough_df', inputs={'df': num_df}, environment_dict={ 'solids': { 'passthrough_df': { 'outputs': [{ 'result': { 'csv': { 'path': file_name, 'header': True } } }] } } }, ) from_file_df = (spark.read.format('csv').options( header='true', inferSchema='true').load(file_name)) assert result.result_value().collect() == from_file_df.collect()
def test_airline_demo_load_df(): db_info_mock = DbInfo( engine=mock.MagicMock(), url='url', jdbc_url='url', dialect='dialect', load_table=mock.MagicMock(), host='host', db_name='db_name', ) @pipeline(mode_defs=[ ModeDefinition( resource_defs={ 'db_info': ResourceDefinition.hardcoded_resource(db_info_mock) }) ]) def load_df_test(): load_data_to_database_from_spark() # pylint: disable=no-value-for-parameter solid_result = execute_solid_within_pipeline( load_df_test, 'load_data_to_database_from_spark', inputs={'data_frame': mock.MagicMock(spec=DataFrame)}, environment_dict={ 'solids': { 'load_data_to_database_from_spark': { 'config': { 'table_name': 'foo' } } } }, ) assert solid_result.success mats = solid_result.materializations_during_compute assert len(mats) == 1 mat = mats[0] assert len(mat.metadata_entries) == 2 entries = {me.label: me for me in mat.metadata_entries} assert entries['Host'].entry_data.text == 'host' assert entries['Db'].entry_data.text == 'db_name'