예제 #1
0
def test_presets():
    @solid(config={'error': Bool})
    def can_fail(context):
        if context.solid_config['error']:
            raise Exception('I did an error')
        return 'cool'

    @lambda_solid
    def always_fail():
        raise Exception('I always do this')

    pipeline = PipelineDefinition(
        name='simple',
        solid_defs=[can_fail, always_fail],
        preset_defs=[
            PresetDefinition.from_files(
                'passing',
                environment_files=[file_relative_path(__file__, 'pass_env.yaml')],
                solid_subset=['can_fail'],
            ),
            PresetDefinition(
                'passing_direct_dict',
                environment_dict={'solids': {'can_fail': {'config': {'error': False}}}},
                solid_subset=['can_fail'],
            ),
            PresetDefinition.from_files(
                'failing_1',
                environment_files=[file_relative_path(__file__, 'fail_env.yaml')],
                solid_subset=['can_fail'],
            ),
            PresetDefinition.from_files(
                'failing_2', environment_files=[file_relative_path(__file__, 'pass_env.yaml')]
            ),
        ],
    )

    with pytest.raises(DagsterInvalidDefinitionError):
        PresetDefinition.from_files(
            'invalid_1', environment_files=[file_relative_path(__file__, 'not_a_file.yaml')]
        )

    with pytest.raises(DagsterInvariantViolationError):
        PresetDefinition.from_files(
            'invalid_2',
            environment_files=[file_relative_path(__file__, 'test_repository_definition.py')],
        )

    assert execute_pipeline_with_preset(pipeline, 'passing').success

    assert execute_pipeline_with_preset(pipeline, 'passing_direct_dict').success

    with pytest.raises(Exception):
        execute_pipeline_with_preset(pipeline, 'failing_1')

    with pytest.raises(Exception):
        execute_pipeline_with_preset(pipeline, 'failing_2')

    with pytest.raises(Exception, match="Could not find preset"):
        execute_pipeline_with_preset(pipeline, 'not_failing')
예제 #2
0
def define_multi_mode_with_resources_pipeline():
    # API red alert. One has to wrap a type in Field because it is callable
    @resource(config_schema=Int)
    def adder_resource(init_context):
        return lambda x: x + init_context.resource_config

    @resource(config_schema=Int)
    def multer_resource(init_context):
        return lambda x: x * init_context.resource_config

    @resource(config_schema={"num_one": Int, "num_two": Int})
    def double_adder_resource(init_context):
        return (
            lambda x: x
            + init_context.resource_config["num_one"]
            + init_context.resource_config["num_two"]
        )

    @solid(required_resource_keys={"op"})
    def apply_to_three(context):
        return context.resources.op(3)

    return PipelineDefinition(
        name="multi_mode_with_resources",
        solid_defs=[apply_to_three],
        mode_defs=[
            ModeDefinition(
                name="add_mode", resource_defs={"op": adder_resource, "io_manager": fs_io_manager}
            ),
            ModeDefinition(name="mult_mode", resource_defs={"op": multer_resource}),
            ModeDefinition(
                name="double_adder_mode",
                resource_defs={"op": double_adder_resource},
                description="Mode that adds two numbers to thing",
            ),
        ],
        preset_defs=[
            PresetDefinition.from_files(
                "add",
                mode="add_mode",
                config_files=[
                    file_relative_path(
                        __file__, "../environments/multi_mode_with_resources/add_mode.yaml"
                    )
                ],
            ),
            PresetDefinition(
                "multiproc",
                mode="add_mode",
                run_config={
                    "resources": {"op": {"config": 2}},
                    "execution": {"multiprocess": {}},
                },
            ),
        ],
    )
예제 #3
0
def define_multi_mode_with_resources_pipeline():
    # API red alert. One has to wrap a type in Field because it is callable
    @resource(config=Int)
    def adder_resource(init_context):
        return lambda x: x + init_context.resource_config

    @resource(config=Int)
    def multer_resource(init_context):
        return lambda x: x * init_context.resource_config

    @resource(config={'num_one': Int, 'num_two': Int})
    def double_adder_resource(init_context):
        return (
            lambda x: x
            + init_context.resource_config['num_one']
            + init_context.resource_config['num_two']
        )

    @solid(required_resource_keys={'op'})
    def apply_to_three(context):
        return context.resources.op(3)

    return PipelineDefinition(
        name='multi_mode_with_resources',
        solid_defs=[apply_to_three],
        mode_defs=[
            ModeDefinition(name='add_mode', resource_defs={'op': adder_resource}),
            ModeDefinition(name='mult_mode', resource_defs={'op': multer_resource}),
            ModeDefinition(
                name='double_adder_mode',
                resource_defs={'op': double_adder_resource},
                description='Mode that adds two numbers to thing',
            ),
        ],
        preset_defs=[
            PresetDefinition.from_files(
                'add',
                mode='add_mode',
                environment_files=[
                    file_relative_path(
                        __file__, './environments/multi_mode_with_resources/add_mode.yaml'
                    )
                ],
            ),
            PresetDefinition(
                'multiproc',
                mode='add_mode',
                environment_dict={
                    'resources': {'op': {'config': 2}},
                    'execution': {'multiprocess': {}},
                    'storage': {'filesystem': {}},
                },
            ),
        ],
    )
예제 #4
0
def define_multi_mode_with_resources_pipeline():
    @resource(config_field=Field(Int))
    def adder_resource(init_context):
        return lambda x: x + init_context.resource_config

    @resource(config_field=Field(Int))
    def multer_resource(init_context):
        return lambda x: x * init_context.resource_config

    @resource(config={'num_one': Field(Int), 'num_two': Field(Int)})
    def double_adder_resource(init_context):
        return (lambda x: x + init_context.resource_config['num_one'] +
                init_context.resource_config['num_two'])

    @solid
    def apply_to_three(context):
        return context.resources.op(3)

    return PipelineDefinition(
        name='multi_mode_with_resources',
        solid_defs=[apply_to_three],
        mode_defs=[
            ModeDefinition(name='add_mode',
                           resource_defs={'op': adder_resource}),
            ModeDefinition(name='mult_mode',
                           resource_defs={'op': multer_resource}),
            ModeDefinition(
                name='double_adder_mode',
                resource_defs={'op': double_adder_resource},
                description='Mode that adds two numbers to thing',
            ),
        ],
        preset_defs=[
            PresetDefinition.from_files(
                'add',
                mode='add_mode',
                environment_files=[
                    script_relative_path(
                        './environments/multi_mode_with_resources/add_mode.yaml'
                    )
                ],
            )
        ],
    )
예제 #5
0
    if context.solid_config['return_wrong_type']:
        return string + string

    return int(string)


@pipeline(
    mode_defs=[
        ModeDefinition(
            name='errorable_mode', resource_defs={'errorable_resource': define_errorable_resource()}
        )
    ],
    preset_defs=[
        PresetDefinition.from_files(
            'passing',
            environment_files=[file_relative_path(__file__, 'environments/error.yaml')],
            mode='errorable_mode',
        )
    ],
)
def error_monster():
    start = emit_num.alias('start')()
    middle = num_to_str.alias('middle')(num=start)
    str_to_num.alias('end')(string=middle)


if __name__ == '__main__':
    result = execute_pipeline(
        error_monster,
        {
            'solids': {
예제 #6
0
        'db_info': redshift_db_info_resource,
        'tempfile': tempfile_resource,
        'file_cache': s3_file_cache,
    },
    system_storage_defs=s3_plus_default_storage_defs,
)


@pipeline(
    # ordered so the local is first and therefore the default
    mode_defs=[local_mode, test_mode, prod_mode],
    preset_defs=[
        PresetDefinition.from_files(
            name='local_fast',
            mode='local',
            environment_files=[
                file_relative_path(__file__, 'environments/local_base.yaml'),
                file_relative_path(__file__, 'environments/local_fast_ingest.yaml'),
            ],
        ),
        PresetDefinition.from_files(
            name='local_full',
            mode='local',
            environment_files=[
                file_relative_path(__file__, 'environments/local_base.yaml'),
                file_relative_path(__file__, 'environments/local_full_ingest.yaml'),
            ],
        ),
    ],
)
def airline_demo_ingest_pipeline():
    # on time data
예제 #7
0
def test_presets():
    @solid(config_schema={"error": Bool})
    def can_fail(context):
        if context.solid_config["error"]:
            raise Exception("I did an error")
        return "cool"

    @lambda_solid
    def always_fail():
        raise Exception("I always do this")

    pipe = PipelineDefinition(
        name="simple",
        solid_defs=[can_fail, always_fail],
        preset_defs=[
            PresetDefinition.from_files(
                "passing",
                config_files=[file_relative_path(__file__, "pass_env.yaml")],
                solid_selection=["can_fail"],
            ),
            PresetDefinition.from_files(
                "passing_overide_to_fail",
                config_files=[file_relative_path(__file__, "pass_env.yaml")],
                solid_selection=["can_fail"],
            ).with_additional_config(
                {"solids": {
                    "can_fail": {
                        "config": {
                            "error": True
                        }
                    }
                }}),
            PresetDefinition(
                "passing_direct_dict",
                run_config={
                    "solids": {
                        "can_fail": {
                            "config": {
                                "error": False
                            }
                        }
                    }
                },
                solid_selection=["can_fail"],
            ),
            PresetDefinition.from_files(
                "failing_1",
                config_files=[file_relative_path(__file__, "fail_env.yaml")],
                solid_selection=["can_fail"],
            ),
            PresetDefinition.from_files(
                "failing_2",
                config_files=[file_relative_path(__file__, "pass_env.yaml")]),
            PresetDefinition(
                "subset",
                solid_selection=["can_fail"],
            ),
        ],
    )

    with pytest.raises(DagsterInvariantViolationError):
        PresetDefinition.from_files(
            "invalid_1",
            config_files=[file_relative_path(__file__, "not_a_file.yaml")])

    with pytest.raises(DagsterInvariantViolationError):
        PresetDefinition.from_files(
            "invalid_2",
            config_files=[
                file_relative_path(__file__, "test_repository_definition.py")
            ],
        )

    assert execute_pipeline(pipe, preset="passing").success

    assert execute_pipeline(pipe, preset="passing_direct_dict").success
    assert execute_pipeline(pipe, preset="failing_1",
                            raise_on_error=False).success == False

    assert execute_pipeline(pipe, preset="failing_2",
                            raise_on_error=False).success == False

    with pytest.raises(DagsterInvariantViolationError,
                       match="Could not find preset"):
        execute_pipeline(pipe, preset="not_failing", raise_on_error=False)

    assert (execute_pipeline(pipe,
                             preset="passing_overide_to_fail",
                             raise_on_error=False).success == False)

    assert execute_pipeline(
        pipe,
        preset="passing",
        run_config={
            "solids": {
                "can_fail": {
                    "config": {
                        "error": False
                    }
                }
            }
        },
    ).success

    with pytest.raises(
            check.CheckError,
            match=re.escape(
                "The environment set in preset 'passing' does not agree with the environment passed "
                "in the `run_config` argument."),
    ):
        execute_pipeline(
            pipe,
            preset="passing",
            run_config={"solids": {
                "can_fail": {
                    "config": {
                        "error": True
                    }
                }
            }},
        )

    assert execute_pipeline(
        pipe,
        preset="subset",
        run_config={
            "solids": {
                "can_fail": {
                    "config": {
                        "error": False
                    }
                }
            }
        },
    ).success
예제 #8
0
    output_def=OutputDefinition(dagster_pd.DataFrame),
)
def always_fails_solid(**_kwargs):
    raise Exception('I am a programmer and I make error')


@pipeline
def pandas_hello_world_fails():
    always_fails_solid(sum_sq_solid=sum_sq_solid(sum_df=sum_solid())  # pylint: disable=no-value-for-parameter
                       )


@pipeline(preset_defs=[
    PresetDefinition.from_files(
        'test',
        environment_files=[
            file_relative_path(__file__,
                               'environments/pandas_hello_world_test.yaml')
        ],
    ),
    PresetDefinition.from_files(
        'prod',
        environment_files=[
            file_relative_path(__file__,
                               'environments/pandas_hello_world_prod.yaml')
        ],
    ),
])
def pandas_hello_world():
    sum_sq_solid(sum_solid())  # pylint: disable=no-value-for-parameter
예제 #9
0
    ],
)
def merge_staging_to_delta_table(properties):

    prop_s3_coordinate = upload_to_s3(cache_properies_from_rest_api(properties))
    # return assets for property
    return merge_property_delta(input_dataframe=flatten_json(s3_to_df(prop_s3_coordinate)))


@pipeline(
    mode_defs=[local_mode],
    preset_defs=[
        PresetDefinition.from_files(
            name='local',
            mode='local',
            config_files=[
                file_relative_path(__file__, 'config_environments/local_base.yaml'),
                file_relative_path(__file__, 'config_pipelines/scrape_realestate.yaml'),
            ],
        ),
    ],
)
def scrape_realestate():

    merge_staging_to_delta_table.alias('merge_SO_buy')(
        list_changed_properties.alias('list_SO_buy_flat')()
    )

    data_exploration()
        list_changed_properties.alias('list_BE_buy_flat')()
    )
예제 #10
0
    return people.count()


mode = ModeDefinition(
    name='prod',
    resource_defs={
        'pyspark_step_launcher': emr_pyspark_step_launcher,
        'pyspark': pyspark_resource,
        's3': s3_resource,
    },
    system_storage_defs=s3_plus_default_storage_defs,
)

preset = PresetDefinition.from_files(
    name='prod',
    mode='prod',
    environment_files=['prod_resources.yaml', 's3_storage.yaml'],
)


@pipeline(
    mode_defs=[mode],
    preset_defs=[preset],
)
def my_pipeline():
    count_people(filter_over_50(make_people()))


def define_repository():
    return RepositoryDefinition('emr_pyspark', pipeline_defs=[my_pipeline])
예제 #11
0
def test_presets():
    @solid(config_schema={'error': Bool})
    def can_fail(context):
        if context.solid_config['error']:
            raise Exception('I did an error')
        return 'cool'

    @lambda_solid
    def always_fail():
        raise Exception('I always do this')

    pipeline = PipelineDefinition(
        name='simple',
        solid_defs=[can_fail, always_fail],
        preset_defs=[
            PresetDefinition.from_files(
                'passing',
                environment_files=[
                    file_relative_path(__file__, 'pass_env.yaml')
                ],
                solid_selection=['can_fail'],
            ),
            PresetDefinition.from_files(
                'passing_overide_to_fail',
                environment_files=[
                    file_relative_path(__file__, 'pass_env.yaml')
                ],
                solid_selection=['can_fail'],
            ).with_additional_config(
                {'solids': {
                    'can_fail': {
                        'config': {
                            'error': True
                        }
                    }
                }}),
            PresetDefinition(
                'passing_direct_dict',
                run_config={
                    'solids': {
                        'can_fail': {
                            'config': {
                                'error': False
                            }
                        }
                    }
                },
                solid_selection=['can_fail'],
            ),
            PresetDefinition.from_files(
                'failing_1',
                environment_files=[
                    file_relative_path(__file__, 'fail_env.yaml')
                ],
                solid_selection=['can_fail'],
            ),
            PresetDefinition.from_files('failing_2',
                                        environment_files=[
                                            file_relative_path(
                                                __file__, 'pass_env.yaml')
                                        ]),
            PresetDefinition(
                'subset',
                solid_selection=['can_fail'],
            ),
        ],
    )

    with pytest.raises(DagsterInvalidDefinitionError):
        PresetDefinition.from_files('invalid_1',
                                    environment_files=[
                                        file_relative_path(
                                            __file__, 'not_a_file.yaml')
                                    ])

    with pytest.raises(DagsterInvariantViolationError):
        PresetDefinition.from_files(
            'invalid_2',
            environment_files=[
                file_relative_path(__file__, 'test_repository_definition.py')
            ],
        )

    assert execute_pipeline(pipeline, preset='passing').success

    assert execute_pipeline(pipeline, preset='passing_direct_dict').success
    assert execute_pipeline(pipeline, preset='failing_1',
                            raise_on_error=False).success == False

    assert execute_pipeline(pipeline, preset='failing_2',
                            raise_on_error=False).success == False

    with pytest.raises(DagsterInvariantViolationError,
                       match='Could not find preset'):
        execute_pipeline(pipeline, preset='not_failing', raise_on_error=False)

    assert (execute_pipeline(pipeline,
                             preset='passing_overide_to_fail',
                             raise_on_error=False).success == False)

    assert execute_pipeline(
        pipeline,
        preset='passing',
        run_config={
            'solids': {
                'can_fail': {
                    'config': {
                        'error': False
                    }
                }
            }
        },
    ).success

    with pytest.raises(
            check.CheckError,
            match=re.escape(
                'The environment set in preset \'passing\' does not agree with the environment passed '
                'in the `run_config` argument.'),
    ):
        execute_pipeline(
            pipeline,
            preset='passing',
            run_config={'solids': {
                'can_fail': {
                    'config': {
                        'error': True
                    }
                }
            }},
        )

    assert execute_pipeline(
        pipeline,
        preset='subset',
        run_config={
            'solids': {
                'can_fail': {
                    'config': {
                        'error': False
                    }
                }
            }
        },
    ).success
예제 #12
0
                        }
                    }
                },
                'resources': {
                    'warehouse': {
                        'config': {
                            'conn_str': ':memory:'
                        }
                    }
                },
            },
            mode='unittest',
        ),
        PresetDefinition.from_files(
            'dev',
            config_files=[
                file_relative_path(__file__, 'presets_dev_warehouse.yaml'),
                file_relative_path(__file__, 'presets_csv.yaml'),
            ],
            mode='dev',
        ),
    ],
)
def presets_pipeline():
    normalize_calories(read_csv())


if __name__ == '__main__':
    result = execute_pipeline(presets_pipeline, preset='unittest')
    assert result.success
예제 #13
0
@solid
def make_burger(_):
    return 'cheese burger'


@solid
def add_sauce(_, food, sauce: Sauce):
    return '{food} with {flavor} sauce'.format(food=food, flavor=sauce.flavor)


@solid
def inspect_sauce(context, sauce: Sauce) -> Sauce:
    context.log.info('The sauce tastes {flavor}'.format(flavor=sauce.flavor))
    return sauce


@pipeline(preset_defs=[
    PresetDefinition.from_files(
        'test_input', [script_relative_path('./custom_type_input.yaml')]),
    PresetDefinition.from_files(
        'test_output',
        [
            script_relative_path('./custom_type_output.yaml'),
            script_relative_path('./custom_type_input.yaml'),
        ],
    ),
])
def burger_time():
    inspected_sauce = inspect_sauce()
    add_sauce(make_burger(), inspected_sauce)
예제 #14
0
파일: repo.py 프로젝트: varokas/dagster-1
@pipeline(
    mode_defs=[
        ModeDefinition(
            intermediate_storage_defs=s3_plus_default_intermediate_storage_defs,
            resource_defs={'s3': s3_resource},
            executor_defs=default_executors + [celery_k8s_job_executor],
        )
    ],
    preset_defs=[
        PresetDefinition.from_files(
            'example',
            config_files=[
                file_relative_path(
                    __file__,
                    os.path.join('..', 'run_config', 'celery_k8s.yaml')),
                file_relative_path(
                    __file__, os.path.join('..', 'run_config',
                                           'pipeline.yaml')),
            ],
            mode='default',
        ),
    ],
)
def example_pipe():
    count_letters(multiply_the_word())


@repository
def example_repo():
    return [example_pipe]
예제 #15
0
            },
        ),
        ModeDefinition(
            name='production',
            resource_defs={
                'credentials_vault': credentials_vault,
                'transporter': production_transporter,
                'volume': mount,
            },
        ),
    ],
    preset_defs=[
        PresetDefinition.from_files(
            'dev',
            mode='local',
            environment_files=[
                file_relative_path(__file__, 'environments/weather_base.yaml'),
                file_relative_path(__file__, 'environments/weather_dev.yaml'),
            ],
        ),
        PresetDefinition.from_files(
            'production',
            mode='production',
            environment_files=[
                file_relative_path(__file__, 'environments/weather_base.yaml'),
                file_relative_path(__file__, 'environments/weather_production.yaml'),
            ],
        ),
    ],
)
def extract_daily_weather_data_pipeline():
    upload_weather_report = upload_file_to_bucket.alias('upload_weather_report')
예제 #16
0
def define_papermill_pandas_hello_world_solid():
    return dagstermill.define_dagstermill_solid(
        name='papermill_pandas_hello_world',
        notebook_path=nb_test_path('papermill_pandas_hello_world'),
        input_defs=[InputDefinition(name='df', dagster_type=DataFrame)],
        output_defs=[OutputDefinition(DataFrame)],
    )


@pipeline(preset_defs=[
    PresetDefinition.from_files(
        'test',
        config_files=[
            file_relative_path(
                __file__,
                'pandas_hello_world/environments/papermill_pandas_hello_world_test.yaml',
            )
        ],
    ),
    PresetDefinition.from_files(
        'prod',
        config_files=[
            file_relative_path(
                __file__,
                'pandas_hello_world/environments/papermill_pandas_hello_world_prod.yaml',
            )
        ],
    ),
])
def papermill_pandas_hello_world_pipeline():
예제 #17
0
                        }
                    }
                },
                'resources': {
                    'warehouse': {
                        'config': {
                            'conn_str': ':memory:'
                        }
                    }
                },
            },
            mode='unittest',
        ),
        PresetDefinition.from_files(
            'dev',
            environment_files=[
                script_relative_path('presets_dev_warehouse.yaml'),
                script_relative_path('presets_csv.yaml'),
            ],
            mode='dev',
        ),
    ],
)
def presets_pipeline():
    normalize_calories(read_csv())


if __name__ == '__main__':
    result = execute_pipeline_with_preset(presets_pipeline, 'unittest')
    assert result.success
예제 #18
0
# (use local disk instead of S3 for local runs, sqlite instead of Snowflake, etc)
prod_mode = ModeDefinition(
    name='prod',
    resource_defs={
        's3': s3_resource,
        'snowflake': snowflake_resource
    },
    system_storage_defs=s3_plus_default_storage_defs,
)

# Presets are a type of configuration where each file can overwrite values in the next, this is where per-environment
# settings and, I guess, secrets go?
preset_defs = PresetDefinition.from_files(
    name='prod',
    mode='prod',
    environment_files=[
        file_relative_path(__file__, 'environments/shared.yaml'),
    ],
)


# Solids are the units of execution that are steps in a pipeline. Lambda solids are simpler and do not require
# configuration or context.
@lambda_solid(
    output_def=OutputDefinition(DataFrame),  # Describes what this solid will return
)
def generate_test_data():
    """
    Generate random data
    """
    df = pd.DataFrame(np.random.randint(0, 100, size=(100, 4)),
예제 #19
0
파일: pipeline.py 프로젝트: sd2k/dagster
    input_defs=[InputDefinition("sum_sq_solid", dagster_pd.DataFrame)],
    output_def=OutputDefinition(dagster_pd.DataFrame),
)
def always_fails_solid(**_kwargs):
    raise Exception("I am a programmer and I make error")


@pipeline
def pandas_hello_world_fails():
    always_fails_solid(sum_sq_solid=sum_sq_solid(sum_df=sum_solid()))


@pipeline(preset_defs=[
    PresetDefinition.from_files(
        "test",
        config_files=[
            file_relative_path(__file__,
                               "environments/pandas_hello_world_test.yaml")
        ],
    ),
    PresetDefinition.from_files(
        "prod",
        config_files=[
            file_relative_path(__file__,
                               "environments/pandas_hello_world_prod.yaml")
        ],
    ),
])
def pandas_hello_world():
    sum_sq_solid(sum_solid())
예제 #20
0
파일: pipelines.py 프로젝트: zkan/dagster
@pipeline(
    mode_defs=[
        ModeDefinition(
            name='development',
            resource_defs={
                'credentials_vault': credentials_vault,
                'postgres_db': postgres_db_info_resource,
            },
        ),
    ],
    preset_defs=[
        PresetDefinition.from_files(
            'development',
            mode='development',
            environment_files=[
                file_relative_path(__file__,
                                   'environments/dev_database_resources.yaml'),
                file_relative_path(__file__, 'environments/weather.yaml'),
            ],
        ),
    ],
)
def extract_daily_weather_data_pipeline():
    insert_weather_report_into_table = insert_into_table.alias(
        'insert_weather_report_into_table')
    insert_weather_report_into_table(
        download_weather_report_from_weather_api())


@pipeline(
    mode_defs=[
예제 #21
0
from dagster_aws.s3 import s3_system_storage, s3_resource

import numpy as np

celery_mode_defs = [
    ModeDefinition(name='default',
                   resource_defs={'s3': s3_resource},
                   executor_defs=default_executors + [celery_executor],
                   system_storage_defs=default_system_storage_defs +
                   [s3_system_storage])
]

celery_yaml_path = Path(__file__).parent / 'celery_execution.yaml'
presets = [
    PresetDefinition.from_files(name='celery',
                                environment_files=[str(celery_yaml_path)],
                                mode='default')
]


@solid
def generate_number(_):
    print('hello')
    num = np.random.randint(low=1, high=100)
    return num


@solid
def square_number(_, number):
    square = np.power(number, 2)
    return square
예제 #22
0
@pipeline(
    mode_defs=[
        ModeDefinition(
            name='default',
            resource_defs={
                's3': s3_resource,
                'snowflake': snowflake_resource,
                'spark': spark_resource,
            },
        )
    ],
    preset_defs=[
        PresetDefinition.from_files(
            name='default',
            mode='default',
            environment_files=[file_relative_path(__file__, 'environments/default.yaml')],
        )
    ],
)
def event_ingest_pipeline():
    event_ingest = create_spark_solid(
        name='event_ingest',
        main_class='io.dagster.events.EventPipeline',
        description='Ingest events from JSON to Parquet',
    )

    @solid(input_defs=[InputDefinition('start', Nothing)], required_resource_keys={'snowflake'})
    def snowflake_load(context):
        # TODO: express dependency of this solid on event_ingest
        context.resources.snowflake.load_table_from_local_parquet(
예제 #23
0
파일: repo.py 프로젝트: xsswhale/dagster
             "io_manager": s3_pickle_io_manager
         },
         executor_defs=default_executors + [celery_k8s_job_executor],
     ),
     ModeDefinition(
         name="test",
         executor_defs=default_executors + [celery_k8s_job_executor],
     ),
 ],
 preset_defs=[
     PresetDefinition.from_files(
         "celery_k8s",
         config_files=[
             file_relative_path(
                 __file__,
                 os.path.join("..", "run_config", "celery_k8s.yaml")),
             file_relative_path(
                 __file__, os.path.join("..", "run_config",
                                        "pipeline.yaml")),
         ],
         mode="default",
     ),
     PresetDefinition.from_files(
         "default",
         config_files=[
             file_relative_path(
                 __file__, os.path.join("..", "run_config",
                                        "pipeline.yaml")),
         ],
         mode="default",
     ),
 ],
예제 #24
0
@pipeline(
    mode_defs=[
        ModeDefinition(
            name='development',
            resource_defs={
                'credentials_vault': credentials_vault,
                'postgres_db': postgres_db_info_resource,
            },
        ),
    ],
    preset_defs=[
        PresetDefinition.from_files(
            'development',
            mode='development',
            environment_files=[
                file_relative_path(__file__, 'environments/dev_resources.yaml'),
                file_relative_path(__file__, 'environments/weather.yaml'),
            ],
        ),
    ],
)
def extract_daily_weather_data_pipeline():
    insert_weather_report_into_table = insert_row_into_table.alias(
        'insert_weather_report_into_table'
    )
    insert_weather_report_into_table(download_weather_report_from_weather_api())


# TODO: Add Local Mode when tests are written
@pipeline(
    mode_defs=[
예제 #25
0
                        }
                    }
                },
                "resources": {
                    "warehouse": {
                        "config": {
                            "conn_str": ":memory:"
                        }
                    }
                },
            },
            mode="unittest",
        ),
        PresetDefinition.from_files(
            "dev",
            config_files=[
                file_relative_path(__file__, "presets_dev_warehouse.yaml"),
                file_relative_path(__file__, "presets_csv.yaml"),
            ],
            mode="dev",
        ),
    ],
)
def presets_pipeline():
    normalize_calories(read_csv())


if __name__ == "__main__":
    result = execute_pipeline(presets_pipeline, preset="unittest")
    assert result.success
예제 #26
0
파일: pipelines.py 프로젝트: mapbox/dagster
@pipeline(
    mode_defs=[
        ModeDefinition(
            name='development',
            resource_defs={
                'credentials_vault': credentials_vault,
                'postgres_db': postgres_db_info_resource,
            },
        ),
    ],
    preset_defs=[
        PresetDefinition.from_files(
            'development',
            mode='development',
            environment_files=[
                file_relative_path(__file__,
                                   'environments/dev_resources.yaml'),
                file_relative_path(__file__, 'environments/weather.yaml'),
            ],
        ),
    ],
)
def extract_daily_weather_data_pipeline():
    insert_weather_report_into_table = insert_into_table.alias(
        'insert_weather_report_into_table')
    insert_weather_report_into_table(
        download_weather_report_from_weather_api())


@pipeline(
    mode_defs=[
예제 #27
0
파일: setup.py 프로젝트: jmbrooks/dagster
                'field_six_nullable_int_list':
                Field([Noneable(int)], is_optional=True),
            },
        },
    )
    def a_solid_with_multilayered_config(_):
        return None

    return a_solid_with_multilayered_config()


@pipeline(preset_defs=[
    PresetDefinition.from_files(
        name='prod',
        environment_files=[
            file_relative_path(__file__,
                               '../environments/csv_hello_world_prod.yaml')
        ],
    ),
    PresetDefinition.from_files(
        name='test',
        environment_files=[
            file_relative_path(__file__,
                               '../environments/csv_hello_world_test.yaml')
        ],
    ),
    PresetDefinition(
        name='test_inline',
        environment_dict={
            'solids': {
                'sum_solid': {
예제 #28
0
@pipeline(
    mode_defs=[
        ModeDefinition(
            "dev",
            resource_defs={
                "basedosdados_config": basedosdados_config,
                "timezone_config": timezone_config,
                "discord_webhook": discord_webhook,
                "keepalive_key": keepalive_key,
            },
        ),
    ],
    preset_defs=[
        PresetDefinition.from_files(
            "realized_trips",
            config_files=[str(Path(__file__).parent / "realized_trips.yaml")],
            mode="dev",
        ),
    ],
    tags={
        "pipeline": "br_rj_riodejaneiro_gtfs_realized_trips",
        "dagster-k8s/config": {
            "container_config": {
                "resources": {
                    "requests": {"cpu": "250m", "memory": "500Mi"},
                    "limits": {"cpu": "1500m", "memory": "1Gi"},
                },
            }
        },
    },
)
예제 #29
0
파일: setup.py 프로젝트: helloworld/dagster
            },
        },
    )
    def a_solid_with_multilayered_config(_):
        return None

    a_solid_with_multilayered_config()


@pipeline(
    mode_defs=[default_mode_def_for_test],
    preset_defs=[
        PresetDefinition.from_files(
            name="prod",
            config_files=[
                file_relative_path(
                    __file__, "../environments/csv_hello_world_prod.yaml")
            ],
        ),
        PresetDefinition.from_files(
            name="test",
            config_files=[
                file_relative_path(
                    __file__, "../environments/csv_hello_world_test.yaml")
            ],
        ),
        PresetDefinition(
            name="test_inline",
            run_config={
                "solids": {
                    "sum_solid": {
예제 #30
0
파일: pipelines.py 프로젝트: xhochy/dagster
@pipeline(
    mode_defs=[
        ModeDefinition(
            name='local',
            resource_defs={'transporter': local_transporter, 'volume': temporary_directory_mount},
        ),
        ModeDefinition(
            name='production',
            resource_defs={'transporter': production_transporter, 'volume': mount},
        ),
    ],
    preset_defs=[
        PresetDefinition.from_files(
            'dev',
            mode='local',
            environment_files=[
                file_relative_path(__file__, 'environments/bay_bike_pipeline_base.yaml'),
                file_relative_path(__file__, 'environments/bay_bike_pipeline_dev.yaml'),
            ],
        ),
        PresetDefinition.from_files(
            'production',
            mode='production',
            environment_files=[
                file_relative_path(__file__, 'environments/bay_bike_pipeline_base.yaml'),
                file_relative_path(__file__, 'environments/bay_bike_pipeline_production.yaml'),
            ],
        ),
    ],
)
def extract_monthly_bay_bike_pipeline():
    upload_consolidated_csv = upload_file_to_bucket.alias('upload_consolidated_csv')