Esempio n. 1
0
def test_single_solid_with_context_config():
    @resource(config_field=Field(Int, is_optional=True, default_value=2))
    def num_resource(init_context):
        return init_context.resource_config

    ran = {'count': 0}

    @solid
    def check_context_config_for_two(context):
        assert context.resources.num == 2
        ran['count'] += 1

    pipeline_def = PipelineDefinition(
        solid_defs=[check_context_config_for_two],
        mode_defs=[ModeDefinition(resource_defs={'num': num_resource})],
    )

    result = execute_solid_within_pipeline(
        pipeline_def,
        'check_context_config_for_two',
        environment_dict={'resources': {'num': {'config': 2}}},
    )

    assert result.success
    assert ran['count'] == 1

    result = execute_solid_within_pipeline(pipeline_def, 'check_context_config_for_two')

    assert result.success
    assert ran['count'] == 2
Esempio n. 2
0
def test_single_solid_type_checking_output_error():
    @lambda_solid(output_def=OutputDefinition(Int))
    def return_string():
        return 'ksjdfkjd'

    pipeline_def = PipelineDefinition(solid_defs=[return_string])

    with pytest.raises(DagsterTypeCheckError):
        execute_solid_within_pipeline(pipeline_def, 'return_string')
Esempio n. 3
0
def test_failing_solid_in_isolation():
    class ThisException(Exception):
        pass

    @lambda_solid
    def throw_an_error():
        raise ThisException('nope')

    pipeline_def = PipelineDefinition(solid_defs=[throw_an_error])

    with pytest.raises(DagsterExecutionStepExecutionError) as e_info:
        execute_solid_within_pipeline(pipeline_def, 'throw_an_error')

    assert isinstance(e_info.value.__cause__, ThisException)
Esempio n. 4
0
def test_single_solid_error():
    class SomeError(Exception):
        pass

    @lambda_solid
    def throw_error():
        raise SomeError()

    pipeline_def = PipelineDefinition(solid_defs=[throw_error])

    with pytest.raises(DagsterExecutionStepExecutionError) as e_info:
        execute_solid_within_pipeline(pipeline_def, 'throw_error')

    assert isinstance(e_info.value.__cause__, SomeError)
Esempio n. 5
0
def test_execute_solid_in_diamond():
    solid_result = execute_solid_within_pipeline(
        create_diamond_pipeline(), "A", inputs={"A_input": [{"a key": "a value"}]}
    )

    assert solid_result.success
    assert solid_result.output_value() == [{"a key": "a value"}, {"A": "compute_called"}]
Esempio n. 6
0
def test_single_solid_with_multiple_inputs():
    @lambda_solid
    def solid_one():
        return 1

    @lambda_solid(input_defs=[InputDefinition(name='num_one'), InputDefinition('num_two')])
    def add_solid(num_one, num_two):
        return num_one + num_two

    pipeline_def = PipelineDefinition(
        solid_defs=[solid_one, add_solid],
        dependencies={
            'add_solid': {
                'num_one': DependencyDefinition('solid_one'),
                'num_two': DependencyDefinition('solid_one'),
            }
        },
    )

    result = execute_solid_within_pipeline(
        pipeline_def,
        'add_solid',
        inputs={'num_one': 2, 'num_two': 3},
        environment_dict={'loggers': {'console': {'config': {'log_level': 'DEBUG'}}}},
    )

    assert result.success
    assert result.output_value() == 5
def test_execute_solid_in_diamond():
    solid_result = execute_solid_within_pipeline(
        create_diamond_pipeline(), 'A', inputs={'A_input': [{'a key': 'a value'}]}
    )

    assert solid_result.success
    assert solid_result.output_value() == [{'a key': 'a value'}, {'A': 'compute_called'}]
Esempio n. 8
0
def test_single_solid_in_isolation():
    @lambda_solid
    def solid_one():
        return 1

    pipeline_def = PipelineDefinition(solid_defs=[solid_one])

    result = execute_solid_within_pipeline(pipeline_def, 'solid_one')
    assert result.success
    assert result.output_value() == 1
Esempio n. 9
0
def test_execute_aliased_solid_in_diamond():
    a_source = define_stub_solid("A_source", [input_set("A_input")])

    @pipeline
    def aliased_pipeline():
        create_root_solid("A").alias("aliased")(a_source())

    solid_result = execute_solid_within_pipeline(
        aliased_pipeline, "aliased", inputs={"A_input": [{"a key": "a value"}]}
    )

    assert solid_result.success
    assert solid_result.output_value() == [{"a key": "a value"}, {"aliased": "compute_called"}]
Esempio n. 10
0
def test_execute_aliased_solid_in_diamond():
    a_source = define_stub_solid('A_source', [input_set('A_input')])

    @pipeline
    def aliased_pipeline():
        create_root_solid('A').alias('aliased')(a_source())

    solid_result = execute_solid_within_pipeline(
        aliased_pipeline, 'aliased', inputs={'A_input': [{'a key': 'a value'}]}
    )

    assert solid_result.success
    assert solid_result.output_value() == [{'a key': 'a value'}, {'aliased': 'compute_called'}]
Esempio n. 11
0
def test_execute_aliased_solid_in_diamond():
    a_source = define_stub_solid('A_source', [input_set('A_input')])
    pipeline_def = PipelineDefinition(
        name='aliased_pipeline',
        solid_defs=[a_source, create_root_solid('A')],
        dependencies={
            SolidInvocation('A', alias='aliased'): {'A_input': DependencyDefinition(a_source.name)}
        },
    )

    solid_result = execute_solid_within_pipeline(
        pipeline_def, 'aliased', inputs={'A_input': [{'a key': 'a value'}]}
    )

    assert solid_result.success
    assert solid_result.result_value() == [{'a key': 'a value'}, {'aliased': 'transform_called'}]
Esempio n. 12
0
def test_single_solid_with_config():
    ran = {}

    @solid(config_field=Field(Int))
    def check_config_for_two(context):
        assert context.solid_config == 2
        ran['check_config_for_two'] = True

    pipeline_def = PipelineDefinition(solid_defs=[check_config_for_two])
    result = execute_solid_within_pipeline(
        pipeline_def,
        'check_config_for_two',
        environment_dict={'solids': {'check_config_for_two': {'config': 2}}},
    )

    assert result.success
    assert ran['check_config_for_two']
Esempio n. 13
0
def test_single_solid_with_single():
    @lambda_solid
    def solid_one():
        return 1

    @lambda_solid(input_defs=[InputDefinition(name='num')])
    def add_one_solid(num):
        return num + 1

    pipeline_def = PipelineDefinition(
        solid_defs=[solid_one, add_one_solid],
        dependencies={'add_one_solid': {'num': DependencyDefinition('solid_one')}},
    )

    result = execute_solid_within_pipeline(pipeline_def, 'add_one_solid', inputs={'num': 2})
    assert result.success
    assert result.output_value() == 3
Esempio n. 14
0
def test_spark_dataframe_output_csv():
    spark = SparkSession.builder.getOrCreate()
    num_df = (spark.read.format('csv').options(
        header='true',
        inferSchema='true').load(file_relative_path(__file__, 'num.csv')))

    assert num_df.collect() == [Row(num1=1, num2=2)]

    @solid(input_defs=[InputDefinition('df', DataFrame)],
           output_defs=[OutputDefinition(DataFrame)])
    def passthrough_df(_context, df):
        return df

    @pipeline
    def passthrough():
        passthrough_df()  # pylint: disable=no-value-for-parameter

    with tempfile.TemporaryDirectory() as tempdir:
        file_name = os.path.join(tempdir, 'output.csv')
        result = execute_solid_within_pipeline(
            passthrough,
            'passthrough_df',
            inputs={'df': num_df},
            environment_dict={
                'solids': {
                    'passthrough_df': {
                        'outputs': [{
                            'result': {
                                'csv': {
                                    'path': file_name,
                                    'header': True
                                }
                            }
                        }]
                    }
                }
            },
        )

        from_file_df = (spark.read.format('csv').options(
            header='true', inferSchema='true').load(file_name))

        assert result.result_value().collect() == from_file_df.collect()
Esempio n. 15
0
def test_airline_demo_load_df():
    db_info_mock = DbInfo(
        engine=mock.MagicMock(),
        url='url',
        jdbc_url='url',
        dialect='dialect',
        load_table=mock.MagicMock(),
        host='host',
        db_name='db_name',
    )

    @pipeline(mode_defs=[
        ModeDefinition(
            resource_defs={
                'db_info': ResourceDefinition.hardcoded_resource(db_info_mock)
            })
    ])
    def load_df_test():
        load_data_to_database_from_spark()  # pylint: disable=no-value-for-parameter

    solid_result = execute_solid_within_pipeline(
        load_df_test,
        'load_data_to_database_from_spark',
        inputs={'data_frame': mock.MagicMock(spec=DataFrame)},
        environment_dict={
            'solids': {
                'load_data_to_database_from_spark': {
                    'config': {
                        'table_name': 'foo'
                    }
                }
            }
        },
    )
    assert solid_result.success
    mats = solid_result.materializations_during_compute
    assert len(mats) == 1
    mat = mats[0]
    assert len(mat.metadata_entries) == 2
    entries = {me.label: me for me in mat.metadata_entries}
    assert entries['Host'].entry_data.text == 'host'
    assert entries['Db'].entry_data.text == 'db_name'