예제 #1
0
def test_error_monster_wrong_mode():
    with pytest.raises(DagsterInvariantViolationError):
        execute_pipeline_with_mode(
            pipeline=error_monster,
            mode='nope',
            environment_dict={
                'solids': {
                    'start': {
                        'config': {
                            'throw_in_solid': False,
                            'return_wrong_type': False
                        }
                    },
                    'middle': {
                        'config': {
                            'throw_in_solid': False,
                            'return_wrong_type': False
                        }
                    },
                    'end': {
                        'config': {
                            'throw_in_solid': False,
                            'return_wrong_type': False
                        }
                    },
                },
                'resources': {
                    'errorable_resource': {
                        'config': {
                            'throw_on_resource_init': False
                        }
                    }
                },
            },
        )
예제 #2
0
def test_execute_multi_mode_with_resources():
    pipeline_def = define_multi_mode_with_resources_pipeline()

    add_mode_result = execute_pipeline_with_mode(
        pipeline=pipeline_def,
        mode='add_mode',
        environment_dict={'resources': {
            'op': {
                'config': 2
            }
        }},
    )

    assert add_mode_result.result_for_solid(
        'apply_to_three').output_value() == 5

    mult_mode_result = execute_pipeline_with_mode(
        pipeline=pipeline_def,
        mode='mult_mode',
        environment_dict={'resources': {
            'op': {
                'config': 3
            }
        }},
    )

    assert mult_mode_result.result_for_solid(
        'apply_to_three').output_value() == 9
예제 #3
0
def test_execute_multi_mode_loggers_with_multiple_loggers_single_config():
    (
        pipeline_def,
        foo_logger_captured_results,
        bar_logger_captured_results,
    ) = define_multi_mode_with_loggers_pipeline()

    execute_pipeline_with_mode(
        pipeline_def,
        mode='foo_bar_mode',
        environment_dict={
            'loggers': {
                'foo': {
                    'config': {
                        'log_level': 'DEBUG'
                    }
                }
            }
        },
    )

    foo_original_messages = parse_captured_results(foo_logger_captured_results)

    assert len([x for x in foo_original_messages if 'Here we are' in x]) == 1

    assert not bar_logger_captured_results
예제 #4
0
def test_execute_multi_mode_errors():
    multi_mode_pipeline = define_multi_mode_pipeline()

    with pytest.raises(DagsterInvariantViolationError):
        execute_pipeline(multi_mode_pipeline)

    with pytest.raises(DagsterInvariantViolationError):
        execute_pipeline_with_mode(pipeline=multi_mode_pipeline,
                                   mode='wrong_mode')
예제 #5
0
def test_execute_pipeline_with_non_existant_mode():
    with pytest.raises(DagsterInvariantViolationError):
        execute_pipeline_with_mode(
            pipeline_with_mode,
            'BAD',
            environment_dict={
                'solids': {'solid_that_uses_adder_resource': {'inputs': {'number': {'value': 4}}}}
            },
        )
예제 #6
0
def test_execute_multi_mode():
    multi_mode_pipeline = define_multi_mode_pipeline()

    assert (execute_pipeline_with_mode(
        pipeline=multi_mode_pipeline,
        mode='mode_one').result_for_solid('return_three').output_value() == 3)

    assert (execute_pipeline_with_mode(
        pipeline=multi_mode_pipeline,
        mode='mode_two').result_for_solid('return_three').output_value() == 3)
예제 #7
0
def test_local():
    result = execute_pipeline_with_mode(
        pipeline=example_pipe,
        mode='local',
        environment_dict={'solids': {'blah': {'config': {'foo': 'a string', 'bar': 123}}},},
    )
    assert result.success
예제 #8
0
def test_warehouse_resource(postgres):
    environment_dict = {
        'solids': {
            'read_csv': {
                'inputs': {
                    'csv_path': {
                        'value': 'cereal.csv'
                    }
                }
            }
        },
        'resources': {
            'warehouse': {
                'config': {
                    'conn_str': postgres
                }
            }
        },
    }
    with pushd(script_relative_path('../../dagster_examples/intro_tutorial/')):
        result = execute_pipeline_with_mode(
            pipeline=modes_pipeline,
            mode='dev',
            environment_dict=environment_dict,
        )
    assert result.success

    if not BUILDKITE:
        with pushd(
                script_relative_path(
                    '../../dagster_examples/intro_tutorial/')):
            result = execute_pipeline_with_preset(presets_pipeline,
                                                  preset_name='dev')
        assert result.success
예제 #9
0
def test_default_pyspark_decorator():
    @pyspark_solid(pyspark_resource_key='first_pyspark')
    def first_pyspark_job(context):
        list_p = [('Michelle', 19), ('Austin', 29), ('Lydia', 35)]
        rdd = context.resources.first_pyspark.spark_context.parallelize(list_p)
        res = rdd.take(2)
        for name, age in res:
            print('%s: %d' % (name, age))

    @pyspark_solid(pyspark_resource_key='last_pyspark')
    def last_pyspark_job(context):
        list_p = [('John', 19), ('Jennifer', 29), ('Adam', 35), ('Henry', 50)]
        rdd = context.resources.last_pyspark.spark_context.parallelize(list_p)
        res = rdd.take(2)
        for name, age in res:
            print('%s: %d' % (name, age))

    @pipeline(mode_defs=[
        ModeDefinition(
            'default',
            resource_defs={
                'first_pyspark': pyspark_resource,
                'last_pyspark': pyspark_resource
            },
        )
    ])
    def pipe():
        first_pyspark_job()
        last_pyspark_job()

    assert execute_pipeline_with_mode(pipeline=pipe, mode='default').success
예제 #10
0
def test_named_pyspark_decorator():
    @pyspark_solid(name='blah',
                   description='foo bar',
                   config={'foo': Field(str)})
    def pyspark_job(context):
        rdd = context.resources.pyspark.spark_context.parallelize(range(10))
        for item in rdd.collect():
            print(item)

    @pipeline(mode_defs=[
        ModeDefinition('default', resource_defs={'pyspark': pyspark_resource})
    ])
    def pipe():
        pyspark_job()

    assert execute_pipeline_with_mode(
        pipeline=pipe,
        mode='default',
        environment_dict={
            'solids': {
                'blah': {
                    'config': {
                        'foo': 'baz'
                    }
                }
            }
        },
    ).success
예제 #11
0
def test_execute_single_mode():
    single_mode_pipeline = define_single_mode_pipeline()
    assert single_mode_pipeline.is_single_mode is True

    assert execute_pipeline(single_mode_pipeline).result_for_solid(
        'return_two').output_value() == 2

    assert (execute_pipeline_with_mode(
        single_mode_pipeline,
        mode='the_mode').result_for_solid('return_two').output_value() == 2)
예제 #12
0
def test_airline_pipeline_1_warehouse(postgres, pg_hostname):
    warehouse_config_object = load_yaml_from_globs(
        config_path('test_base.yaml'), config_path('local_warehouse.yaml'))
    result_warehouse = execute_pipeline_with_mode(
        pipeline=warehouse_pipeline_def,
        mode='local',
        environment_dict=warehouse_config_object,
        instance=DagsterInstance.local_temp(),
    )
    assert result_warehouse.success
예제 #13
0
def test_ingest_pipeline_fast(postgres, pg_hostname):
    ingest_config_dict = load_yaml_from_globs(
        config_path('test_base.yaml'), config_path('local_fast_ingest.yaml'))
    result_ingest = execute_pipeline_with_mode(
        pipeline=ingest_pipeline_def,
        mode='local',
        environment_dict=ingest_config_dict,
        instance=DagsterInstance.local_temp(),
    )

    assert result_ingest.success
예제 #14
0
def test_execute_pipeline_with_mode():
    pipeline_result = execute_pipeline_with_mode(
        pipeline_with_mode,
        environment_dict={
            'solids': {'solid_that_uses_adder_resource': {'inputs': {'number': {'value': 4}}}}
        },
        mode='add_one',
    )
    assert pipeline_result.success
    assert pipeline_result.result_for_solid('solid_that_uses_adder_resource').output_value() == 5

    pipeline_result = execute_pipeline_with_mode(
        pipeline_with_mode,
        environment_dict={
            'solids': {'solid_that_uses_adder_resource': {'inputs': {'number': {'value': 4}}}}
        },
        mode='add_two',
    )
    assert pipeline_result.success
    assert pipeline_result.result_for_solid('solid_that_uses_adder_resource').output_value() == 6
예제 #15
0
def test_execute_multi_mode_loggers_with_single_logger_extra_config():
    pipeline_def, _, __ = define_multi_mode_with_loggers_pipeline()

    with pytest.raises(DagsterInvalidConfigError):
        execute_pipeline_with_mode(
            pipeline=pipeline_def,
            mode='foo_mode',
            environment_dict={
                'loggers': {
                    'foo': {
                        'config': {
                            'log_level': 'DEBUG'
                        }
                    },
                    'bar': {
                        'config': {
                            'log_level': 'DEBUG'
                        }
                    },
                }
            },
        )
예제 #16
0
def test_bad_requirements_txt():
    with pytest.raises(DagsterInvalidDefinitionError) as exc_info:
        execute_pipeline_with_mode(
            pipeline=example_pipe,
            mode='prod',
            environment_dict={
                'solids': {'blah': {'config': {'foo': 'a string', 'bar': 123}}},
                'resources': {
                    'pyspark': {
                        'config': {
                            'requirements_file_path': 'DOES_NOT_EXIST',
                            'pipeline_file': __file__,
                            'pipeline_fn_name': 'example_pipe',
                            'cluster_id': 'some_cluster_id',
                            'staging_bucket': 'dagster-scratch-80542c2',
                            'region_name': 'us-west-1',
                        }
                    }
                },
            },
        )
    assert 'The requirements.txt file that was specified does not exist' in str(exc_info.value)
예제 #17
0
def test_simple_pyspark_decorator():
    @pyspark_solid
    def pyspark_job(context):
        rdd = context.resources.pyspark.spark_context.parallelize(range(10))
        for item in rdd.collect():
            print(item)

    @pipeline(mode_defs=[
        ModeDefinition('default', resource_defs={'pyspark': pyspark_resource})
    ])
    def pipe():
        pyspark_job()

    assert execute_pipeline_with_mode(pipeline=pipe, mode='default').success
예제 #18
0
def test_pyspark_decorator_with_arguments():
    @solid
    def produce_number(_):
        return 10

    @pyspark_solid(input_defs=[InputDefinition('count', int)])
    def pyspark_job(context, count):
        rdd = context.resources.pyspark.spark_context.parallelize(range(count))
        for item in rdd.collect():
            print(item)

    @pipeline(mode_defs=[
        ModeDefinition('default', resource_defs={'pyspark': pyspark_resource})
    ])
    def pipe():
        pyspark_job(produce_number())

    assert execute_pipeline_with_mode(pipeline=pipe, mode='default').success
예제 #19
0
def test_pyspark_emr(mock_wait):
    run_job_flow_args = dict(
        Instances={
            'InstanceCount': 1,
            'KeepJobFlowAliveWhenNoSteps': True,
            'MasterInstanceType': 'c3.medium',
            'Placement': {'AvailabilityZone': 'us-west-1a'},
            'SlaveInstanceType': 'c3.xlarge',
        },
        JobFlowRole='EMR_EC2_DefaultRole',
        LogUri='s3://mybucket/log',
        Name='cluster',
        ServiceRole='EMR_DefaultRole',
        VisibleToAllUsers=True,
    )

    # Doing cluster setup outside of a solid here, because run_job_flow is not yet plumbed through
    # to the pyspark EMR resource.
    job_runner = EmrJobRunner(region='us-west-1')
    context = create_test_pipeline_execution_context()
    cluster_id = job_runner.run_job_flow(context, run_job_flow_args)

    result = execute_pipeline_with_mode(
        pipeline=example_pipe,
        mode='prod',
        environment_dict={
            'solids': {'blah': {'config': {'foo': 'a string', 'bar': 123}}},
            'resources': {
                'pyspark': {
                    'config': {
                        'pipeline_file': __file__,
                        'pipeline_fn_name': 'example_pipe',
                        'cluster_id': cluster_id,
                        'staging_bucket': 'dagster-scratch-80542c2',
                        'region_name': 'us-west-1',
                    }
                }
            },
        },
    )
    assert result.success
    assert mock_wait.called_once
예제 #20
0
def test_do_it_live_emr():
    result = execute_pipeline_with_mode(
        pipeline=example_pipe,
        mode='prod',
        environment_dict={
            'solids': {'blah': {'config': {'foo': 'a string', 'bar': 123}}},
            'resources': {
                'pyspark': {
                    'config': {
                        'pipeline_file': __file__,
                        'pipeline_fn_name': 'example_pipe',
                        'cluster_id': os.environ.get('AWS_EMR_JOB_FLOW_ID'),
                        'staging_bucket': 'dagster-scratch-80542c2',
                        'region_name': 'us-west-1',
                        'wait_for_logs': True,
                    }
                }
            },
        },
    )
    assert result.success
예제 #21
0
def test_error_monster_success():
    assert execute_pipeline(
        error_monster,
        environment_dict={
            'solids': {
                'start': {
                    'config': {
                        'throw_in_solid': False,
                        'return_wrong_type': False
                    }
                },
                'middle': {
                    'config': {
                        'throw_in_solid': False,
                        'return_wrong_type': False
                    }
                },
                'end': {
                    'config': {
                        'throw_in_solid': False,
                        'return_wrong_type': False
                    }
                },
            },
            'resources': {
                'errorable_resource': {
                    'config': {
                        'throw_on_resource_init': False
                    }
                }
            },
        },
    ).success

    assert execute_pipeline_with_mode(
        pipeline=error_monster,
        mode='errorable_mode',
        environment_dict={
            'solids': {
                'start': {
                    'config': {
                        'throw_in_solid': False,
                        'return_wrong_type': False
                    }
                },
                'middle': {
                    'config': {
                        'throw_in_solid': False,
                        'return_wrong_type': False
                    }
                },
                'end': {
                    'config': {
                        'throw_in_solid': False,
                        'return_wrong_type': False
                    }
                },
            },
            'resources': {
                'errorable_resource': {
                    'config': {
                        'throw_on_resource_init': False
                    }
                }
            },
        },
    ).success
예제 #22
0
def test_generate_training_set(mocker):
    mocker.patch('dagster_examples.bay_bikes.solids.read_sql_table',
                 side_effect=mock_read_sql)

    # Execute Pipeline
    test_pipeline_result = execute_pipeline_with_mode(
        pipeline=generate_test_training_set_pipeline,
        mode='testing',
        environment_dict=compose_training_data_env_dict(),
    )
    assert test_pipeline_result.success

    # Check solids
    EXPECTED_TRAFFIC_RECORDS = [
        {
            'interval_date': date(2019, 7, 31),
            'peak_traffic_load': 1,
            'time': Timestamp('2019-07-31 00:00:00'),
        },
        {
            'interval_date': date(2019, 8, 31),
            'peak_traffic_load': 1,
            'time': Timestamp('2019-08-31 00:00:00'),
        },
    ]
    traffic_dataset = test_pipeline_result.output_for_solid(
        'transform_into_traffic_dataset',
        output_name='traffic_dataframe').to_dict('records')
    assert all(record in EXPECTED_TRAFFIC_RECORDS
               for record in traffic_dataset)

    EXPECTED_WEATHER_RECORDS = [
        {
            'time': Timestamp('2019-08-31 00:00:00'),
            'summary': 'Clear throughout the day.',
            'icon': 'clear-day',
            'sunriseTime': 1546269960,
            'sunsetTime': 1546304520,
            'precipIntensity': 0.0007,
            'precipIntensityMax': 0.0019,
            'precipProbability': 0.05,
            'precipType': 'rain',
            'temperatureHigh': 56.71,
            'temperatureHighTime': 1546294020,
            'temperatureLow': 44.75,
            'temperatureLowTime': 1546358040,
            'dewPoint': 28.34,
            'humidity': 0.43,
            'pressure': 1017.7,
            'windSpeed': 12.46,
            'windGust': 26.85,
            'windGustTime': 1546289220,
            'windBearing': 0,
            'cloudCover': 0.11,
            'uvIndex': 2,
            'uvIndexTime': 1546287180,
            'visibility': 10,
            'ozone': 314.4,
        },
        {
            'time': Timestamp('2019-07-31 00:00:00'),
            'summary': 'Clear throughout the day.',
            'icon': 'clear-day',
            'sunriseTime': 1546356420,
            'sunsetTime': 1546390920,
            'precipIntensity': 0.0005,
            'precipIntensityMax': 0.0016,
            'precipProbability': 0.02,
            'precipType': 'sunny',
            'temperatureHigh': 55.91,
            'temperatureHighTime': 1546382040,
            'temperatureLow': 41.18,
            'temperatureLowTime': 1546437660,
            'dewPoint': 20.95,
            'humidity': 0.33,
            'pressure': 1023.3,
            'windSpeed': 6.77,
            'windGust': 22.08,
            'windGustTime': 1546343340,
            'windBearing': 22,
            'cloudCover': 0.1,
            'uvIndex': 2,
            'uvIndexTime': 1546373580,
            'visibility': 10,
            'ozone': 305.3,
        },
    ]
    weather_dataset = test_pipeline_result.output_for_solid(
        'produce_weather_dataset',
        output_name='weather_dataframe').to_dict('records')
    assert all(record in EXPECTED_WEATHER_RECORDS
               for record in weather_dataset)

    # Ensure we are generating the expected training set
    training_set, labels = test_pipeline_result.output_for_solid(
        'produce_training_set')
    assert len(labels) == 1 and labels[0] == 1
    assert array_equal(
        training_set,
        [[
            [
                1546356420.0,
                1546390920.0,
                0.0005,
                0.0016,
                0.02,
                55.91,
                1546382040.0,
                41.18,
                1546437660.0,
                20.95,
                0.33,
                1023.3,
                6.77,
                22.08,
                1546343340.0,
                22.0,
                0.1,
                2.0,
                1546373580.0,
                10.0,
                305.3,
            ],
            [
                1546269960.0,
                1546304520.0,
                0.0007,
                0.0019,
                0.05,
                56.71,
                1546294020.0,
                44.75,
                1546358040.0,
                28.34,
                0.43,
                1017.7,
                12.46,
                26.85,
                1546289220.0,
                0.0,
                0.11,
                2.0,
                1546287180.0,
                10.0,
                314.4,
            ],
        ]],
    )
    materialization_events = [
        event for event in test_pipeline_result.step_event_list
        if event.solid_name == 'upload_training_set_to_gcs'
        and event.event_type_value == 'STEP_MATERIALIZATION'
    ]
    assert len(materialization_events) == 1
    materialization = materialization_events[
        0].event_specific_data.materialization
    assert materialization.label == 'GCS Blob'
    materialization_event_metadata = materialization.metadata_entries
    assert len(materialization_event_metadata) == 1
    assert materialization_event_metadata[
        0].label == 'google cloud storage URI'
    assert materialization_event_metadata[0].entry_data.text.startswith(
        'gs://dagster-scratch-ccdfe1e/training_data')

    # Clean up
    shutil.rmtree(os.path.join(tempfile.gettempdir(), 'testing-storage'),
                  ignore_errors=True)
예제 #23
0
    mode_defs=[
        ModeDefinition(
            name='unittest',
            resource_defs={'warehouse': local_sqlite_warehouse_resource},
        ),
        ModeDefinition(
            name='dev',
            resource_defs={
                'warehouse': sqlachemy_postgres_warehouse_resource
            },
        ),
    ]
)
def modes_pipeline():
    normalize_calories(read_csv())


if __name__ == '__main__':
    environment_dict = {
        'solids': {
            'read_csv': {'inputs': {'csv_path': {'value': 'cereal.csv'}}}
        },
        'resources': {'warehouse': {'config': {'conn_str': ':memory:'}}},
    }
    result = execute_pipeline_with_mode(
        pipeline=modes_pipeline,
        mode='unittest',
        environment_dict=environment_dict,
    )
    assert result.success
예제 #24
0
def test_wrong_single_mode():
    with pytest.raises(DagsterInvariantViolationError):
        assert (execute_pipeline_with_mode(
            pipeline=define_single_mode_pipeline(),
            mode='wrong_mode').result_for_solid('return_two').output_value() ==
                2)