Exemple #1
0
def intro_tutorial_path(path):
    return script_relative_path('../../../dagster/tutorials/intro_tutorial/{}'.format(path))
Exemple #2
0
def build_docker_image(docker_client):
    with pushd(script_relative_path('test_project')):
        subprocess.check_output(['./build.sh'], shell=True)

    return IMAGE
Exemple #3
0
import dagstermill as dm
from dagster import InputDefinition, job
from dagster.utils import script_relative_path
from docs_snippets.legacy.data_science.download_file import download_file

k_means_iris = dm.define_dagstermill_op(
    "k_means_iris",
    script_relative_path("iris-kmeans_2.ipynb"),
    output_notebook_name="iris_kmeans_output",
    input_defs=[
        InputDefinition("path",
                        str,
                        description="Local path to the Iris dataset")
    ],
)


@job(resource_defs={
    "output_notebook_io_manager": dm.local_output_notebook_io_manager,
})
def iris_classify():
    k_means_iris(download_file())
Exemple #4
0
def csv_hello_world_solids_config_fs_storage():
    return {
        'solids': {'sum_solid': {'inputs': {'num': script_relative_path('../data/num.csv')}}},
        'storage': {'filesystem': {}},
    }
Exemple #5
0
import dagstermill as dm
from dagster_examples.util import download_file

from dagster import Field, InputDefinition, Int, pipeline
from dagster.utils import script_relative_path

k_means_iris = dm.define_dagstermill_solid(
    'k_means_iris',
    script_relative_path('iris-kmeans_2.ipynb'),
    input_defs=[
        InputDefinition('path',
                        str,
                        description='Local path to the Iris dataset')
    ],
    config=Field(Int,
                 default_value=3,
                 is_required=False,
                 description='The number of clusters to find'),
)


@pipeline
def iris_pipeline():
    k_means_iris(download_file())
Exemple #6
0
import dagstermill as dm

from dagster import PipelineDefinition
from dagster.utils import script_relative_path

k_means_iris_solid = dm.define_dagstermill_solid(
    'k_means_iris', script_relative_path('iris-kmeans.ipynb'))


def define_iris_pipeline():
    return PipelineDefinition(name='iris_pipeline',
                              solid_defs=[k_means_iris_solid])
Exemple #7
0
                    ),
                }
            )
        ),
    )
    def a_solid_with_multilayered_config(_):
        return None

    return a_solid_with_multilayered_config()


@pipeline(
    preset_defs=[
        PresetDefinition.from_files(
            name='prod',
            environment_files=[script_relative_path('../environments/csv_hello_world_prod.yaml')],
        ),
        PresetDefinition.from_files(
            name='test',
            environment_files=[script_relative_path('../environments/csv_hello_world_test.yaml')],
        ),
    ]
)
def csv_hello_world():
    return sum_sq_solid(sum_df=sum_solid())


@pipeline
def csv_hello_world_with_expectations():
    ss = sum_solid()
    sum_sq_solid(sum_df=ss)
Exemple #8
0
def test_notebook_view():
    notebook_path = script_relative_path('render_uuid_notebook.ipynb')
    html_response, code = notebook_view({'path': notebook_path})
    assert '6cac0c38-2c97-49ca-887c-4ac43f141213' in html_response
    assert code == 200
Exemple #9
0
def test_successful_host_dagit_ui():
    with mock.patch('gevent.pywsgi.WSGIServer'):
        handle = ExecutionTargetHandle.for_repo_yaml(script_relative_path('./repository.yaml'))
        host_dagit_ui(log=False, log_dir=None, handle=handle, use_sync=True, host=None, port=2343)
Exemple #10
0
                        'field_six_nullable_int_list':
                        Field(List[Optional[Int]], is_optional=True),
                    })),
            })),
    )
    def a_solid_with_multilayered_config(_):
        return None

    return a_solid_with_multilayered_config()


@pipeline(preset_defs=[
    PresetDefinition.from_files(
        name='prod',
        environment_files=[
            script_relative_path('../environments/csv_hello_world_prod.yaml')
        ],
    ),
    PresetDefinition.from_files(
        name='test',
        environment_files=[
            script_relative_path('../environments/csv_hello_world_test.yaml')
        ],
    ),
    PresetDefinition(
        name='test_inline',
        environment_dict={
            'solids': {
                'sum_solid': {
                    'inputs': {
                        'num': script_relative_path("../data/num.csv")
Exemple #11
0
def test_create_app():
    handle = ExecutionTargetHandle.for_repo_yaml(script_relative_path('./repository.yaml'))
    pipeline_run_storage = PipelineRunStorage()
    assert create_app(handle, pipeline_run_storage, use_synchronous_execution_manager=True)
    assert create_app(handle, pipeline_run_storage, use_synchronous_execution_manager=False)
Exemple #12
0
    PipelineDefinition,
    RepositoryDefinition,
    SolidDefinition,
)

from dagster.utils import script_relative_path

from dagster_graphql.implementation.context import DagsterGraphQLContext
from dagster_graphql.implementation.pipeline_execution_manager import SynchronousExecutionManager
from dagster_graphql.implementation.pipeline_run_storage import PipelineRunStorage
from dagster_graphql.test.utils import execute_dagster_graphql

from dagster_graphql_tests.graphql.setup import define_context, define_repository

# This is needed to find production query in all cases
sys.path.insert(0, os.path.abspath(script_relative_path('.')))

from production_query import (  # pylint: disable=wrong-import-position,wrong-import-order
    PRODUCTION_QUERY, )
from setup import PoorMansDataFrame  # pylint: disable=no-name-in-module


def test_enum_query():
    ENUM_QUERY = '''{
  pipeline(params: { name:"pipeline_with_enum_config" }){
    name
    configTypes {
      __typename
      name
      ... on EnumConfigType {
        values
Exemple #13
0
def test_intro_tutorial_cli_actual_dag():
    check_cli_execute_file_pipeline(
        script_relative_path('../../dagster_examples/intro_tutorial/actual_dag.py'),
        'define_diamond_dag_pipeline',
    )
Exemple #14
0
from dagster_airflow_tests.marks import nettest

from dagster import ExecutionTargetHandle
from dagster.core.utils import make_new_run_id
from dagster.utils import load_yaml_from_glob_list, script_relative_path

from .utils import validate_pipeline_execution

# TODO (Nate): Will remove in follow-up diff
ENVIRONMENTS_PATH = script_relative_path(
    os.path.join(
        '..',
        '..',
        '..',
        '..',
        '.buildkite',
        'images',
        'docker',
        'test_project',
        'test_pipelines',
        'environments',
    ))


@nettest
def test_s3_storage(
    dagster_airflow_k8s_operator_pipeline,
    dagster_docker_image,
    environments_path,
):  # pylint: disable=redefined-outer-name
    pipeline_name = 'demo_pipeline'
def path_to_tutorial_file(path):
    return script_relative_path(os.path.join("../../docs_snippets/intro_tutorial/", path))
def test_list_command():
    runner = CliRunner()

    execute_list_command(
        {
            'repository_yaml': None,
            'python_file': script_relative_path('test_cli_commands.py'),
            'module_name': None,
            'fn_name': 'define_bar_repo',
        },
        no_print,
    )

    result = runner.invoke(
        pipeline_list_command,
        [
            '-f',
            script_relative_path('test_cli_commands.py'), '-n',
            'define_bar_repo'
        ],
    )

    assert result.exit_code == 0
    assert result.output == ('Repository bar\n'
                             '**************\n'
                             'Pipeline: baz\n'
                             'Description:\n'
                             'Not much tbh\n'
                             'Solids: (Execution Order)\n'
                             '    do_input\n'
                             '*************\n'
                             'Pipeline: foo\n'
                             'Solids: (Execution Order)\n'
                             '    do_something\n')

    execute_list_command(
        {
            'repository_yaml': None,
            'python_file': None,
            'module_name': 'dagster_examples.intro_tutorial.repos',
            'fn_name': 'define_repo',
        },
        no_print,
    )

    result = runner.invoke(
        pipeline_list_command,
        ['-m', 'dagster_examples.intro_tutorial.repos', '-n', 'define_repo'])
    assert result.exit_code == 0
    assert result.output == ('Repository demo_repository\n'
                             '**************************\n'
                             'Pipeline: repo_demo_pipeline\n'
                             'Solids: (Execution Order)\n'
                             '    hello_world\n')

    execute_list_command(
        {
            'repository_yaml': script_relative_path('repository.yaml'),
            'python_file': None,
            'module_name': None,
            'fn_name': None,
        },
        no_print,
    )

    result = runner.invoke(
        pipeline_list_command,
        ['-y', script_relative_path('repository.yaml')])
    assert result.exit_code == 0
    assert result.output == ('Repository demo_repository\n'
                             '**************************\n'
                             'Pipeline: repo_demo_pipeline\n'
                             'Solids: (Execution Order)\n'
                             '    hello_world\n')

    with pytest.raises(CliUsageError):
        execute_list_command(
            {
                'repository_yaml': None,
                'python_file': 'foo.py',
                'module_name': 'dagster_examples.intro_tutorial.repos',
                'fn_name': 'define_repo',
            },
            no_print,
        )

    result = runner.invoke(
        pipeline_list_command,
        [
            '-f', 'foo.py', '-m', 'dagster_examples.intro_tutorial.repos',
            '-n', 'define_repo'
        ],
    )
    assert result.exit_code == 1
    assert isinstance(result.exception, CliUsageError)

    with pytest.raises(CliUsageError):
        execute_list_command(
            {
                'repository_yaml': None,
                'python_file': None,
                'module_name': 'dagster_examples.intro_tutorial.repos',
                'fn_name': None,
            },
            no_print,
        )

    result = runner.invoke(pipeline_list_command,
                           ['-m', 'dagster_examples.intro_tutorial.repos'])
    assert result.exit_code == 1
    assert isinstance(result.exception, CliUsageError)

    with pytest.raises(CliUsageError):
        execute_list_command(
            {
                'repository_yaml': None,
                'python_file': script_relative_path('test_cli_commands.py'),
                'module_name': None,
                'fn_name': None,
            },
            no_print,
        )

    result = runner.invoke(
        pipeline_list_command,
        ['-f', script_relative_path('test_cli_commands.py')])
    assert result.exit_code == 1
    assert isinstance(result.exception, CliUsageError)
Exemple #17
0
def load_custom_trip_dataframe(_) -> DataFrame:
    return read_csv(
        script_relative_path('./ebike_trips.csv'),
        parse_dates=['start_time', 'end_time'],
        date_parser=lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S.%f'),
    )
Exemple #18
0
def test_presets():
    @solid(config_field=Field(Dict(fields={'error': Field(Bool)})))
    def can_fail(context):
        if context.solid_config['error']:
            raise Exception('I did an error')
        return 'cool'

    @lambda_solid
    def always_fail():
        raise Exception('I always do this')

    pipeline = PipelineDefinition(
        name='simple',
        solid_defs=[can_fail, always_fail],
        preset_defs=[
            PresetDefinition.from_files(
                'passing',
                environment_files=[script_relative_path('pass_env.yaml')],
                solid_subset=['can_fail'],
            ),
            PresetDefinition(
                'passing_direct_dict',
                environment_dict={
                    'solids': {
                        'can_fail': {
                            'config': {
                                'error': False
                            }
                        }
                    }
                },
                solid_subset=['can_fail'],
            ),
            PresetDefinition.from_files(
                'failing_1',
                environment_files=[script_relative_path('fail_env.yaml')],
                solid_subset=['can_fail'],
            ),
            PresetDefinition.from_files(
                'failing_2',
                environment_files=[script_relative_path('pass_env.yaml')]),
        ],
    )

    with pytest.raises(DagsterInvalidDefinitionError):
        PresetDefinition.from_files(
            'invalid_1',
            environment_files=[script_relative_path('not_a_file.yaml')])

    with pytest.raises(DagsterInvariantViolationError):
        PresetDefinition.from_files(
            'invalid_2',
            environment_files=[
                script_relative_path('test_repository_definition.py')
            ])

    assert execute_pipeline_with_preset(pipeline, 'passing').success

    assert execute_pipeline_with_preset(pipeline,
                                        'passing_direct_dict').success

    with pytest.raises(DagsterExecutionStepExecutionError):
        execute_pipeline_with_preset(pipeline, 'failing_1')

    with pytest.raises(DagsterExecutionStepExecutionError):
        execute_pipeline_with_preset(pipeline, 'failing_2')

    with pytest.raises(DagsterInvariantViolationError,
                       match="Could not find preset"):
        execute_pipeline_with_preset(pipeline, 'not_failing')
Exemple #19
0
def test_sort():
    with open(script_relative_path("../../../docs_snippets/intro_tutorial/cereal.csv"), "r",) as fd:
        cereals = [row for row in csv.DictReader(fd)]

    execute_solid(sort_by_calories, input_values={"cereals": cereals})
Exemple #20
0
def create_persisted_context():
    full_path = script_relative_path('testdb.db')
    engine = sa.create_engine(
        'sqlite:///{full_path}'.format(full_path=full_path), echo=False)
    return dagster_sqlalchemy.common.create_sql_alchemy_context_params_from_engine(
        engine=engine)
Exemple #21
0
def csv_hello_world_solids_config():
    return {'solids': {'sum_solid': {'inputs': {'num': script_relative_path('../data/num.csv')}}}}
Exemple #22
0
def _get_sql_script_path(name):
    return script_relative_path(
        '../../dagster_sqlalchemy/examples/sql_project_example/sql_files/{name}.sql'
        .format(name=name))
Exemple #23
0
def path_to_file(path):
    return script_relative_path(os.path.join("./", path))
Exemple #24
0
def test_import_module_from_path():
    foo_module = seven.import_module_from_path(
        'foo_module', script_relative_path('foo_module.py'))
    assert foo_module.FOO == 7
Exemple #25
0
def environment_dict(s3_bucket):
    env_dict = load_yaml_from_path(script_relative_path('test_project/env.yaml'))
    env_dict['storage'] = {'s3': {'s3_bucket': s3_bucket}}
    yield env_dict
Exemple #26
0
def test_execute_hammer_through_dagit():
    handle = ExecutionTargetHandle.for_pipeline_python_file(
        script_relative_path(
            '../../../examples/dagster_examples/toys/hammer.py'),
        'hammer_pipeline')
    instance = DagsterInstance.local_temp()

    execution_manager = MultiprocessingExecutionManager()

    context = DagsterGraphQLContext(handle=handle,
                                    execution_manager=execution_manager,
                                    instance=instance)

    executor = SyncExecutor()

    variables = {
        'executionParams': {
            'environmentConfigData': {
                'storage': {
                    'filesystem': {}
                },
                'execution': {
                    'dask': {}
                }
            },
            'selector': {
                'name': handle.build_pipeline_definition().name
            },
            'mode': 'default',
        }
    }

    start_pipeline_result = graphql(
        request_string=START_PIPELINE_EXECUTION_MUTATION,
        schema=create_schema(),
        context=context,
        variables=variables,
        executor=executor,
    )

    run_id = start_pipeline_result.data['startPipelineExecution']['run'][
        'runId']

    context.execution_manager.join()

    subscription = execute_dagster_graphql(context,
                                           SUBSCRIPTION_QUERY,
                                           variables={'runId': run_id})

    subscribe_results = []
    subscription.subscribe(subscribe_results.append)

    messages = [
        x['__typename']
        for x in subscribe_results[0].data['pipelineRunLogs']['messages']
    ]

    assert 'PipelineProcessStartEvent' in messages
    assert 'PipelineProcessStartedEvent' in messages
    assert 'PipelineStartEvent' in messages
    assert 'PipelineSuccessEvent' in messages
    assert 'PipelineProcessExitedEvent' in messages
Exemple #27
0
def test_tutorial_config_schema():
    with pushd(
            script_relative_path(
                "../../../docs_snippets/intro_tutorial/basics/e02_solids/")):
        result = execute_pipeline(
            config_pipeline,
            run_config={
                "solids": {
                    "read_csv": {
                        "inputs": {
                            "csv_path": {
                                "value": "cereal.csv"
                            }
                        }
                    },
                    "sort_by_calories": {
                        "config": {
                            "reverse": False
                        }
                    },
                }
            },
        )

    assert result.success
    assert len(result.solid_result_list) == 2
    assert isinstance(
        result.result_for_solid("sort_by_calories").output_value(), dict)
    assert result.result_for_solid("sort_by_calories").output_value() == {
        "least_caloric":
        OrderedDict([
            ("name", "All-Bran with Extra Fiber"),
            ("mfr", "K"),
            ("type", "C"),
            ("calories", "50"),
            ("protein", "4"),
            ("fat", "0"),
            ("sodium", "140"),
            ("fiber", "14"),
            ("carbo", "8"),
            ("sugars", "0"),
            ("potass", "330"),
            ("vitamins", "25"),
            ("shelf", "3"),
            ("weight", "1"),
            ("cups", "0.5"),
            ("rating", "93.704912"),
        ]),
        "most_caloric":
        OrderedDict([
            ("name", "Mueslix Crispy Blend"),
            ("mfr", "K"),
            ("type", "C"),
            ("calories", "160"),
            ("protein", "3"),
            ("fat", "2"),
            ("sodium", "150"),
            ("fiber", "3"),
            ("carbo", "17"),
            ("sugars", "13"),
            ("potass", "160"),
            ("vitamins", "25"),
            ("shelf", "3"),
            ("weight", "1.5"),
            ("cups", "0.67"),
            ("rating", "30.313351"),
        ]),
    }
    return result
Exemple #28
0
"""isort:skip_file"""

import sys

from dagster import repository
from dagster.utils import script_relative_path

sys.path.append(script_relative_path("."))

from hello_cereal import hello_cereal_pipeline
from complex_pipeline import complex_pipeline


@repository
def hello_cereal_repository():
    # Note that we can pass a dict of functions, rather than a list of
    # pipeline definitions. This allows us to construct pipelines lazily,
    # if, e.g., initializing a pipeline involves any heavy compute
    return {
        "pipelines": {
            "hello_cereal_pipeline": lambda: hello_cereal_pipeline,
            "complex_pipeline": lambda: complex_pipeline,
        }
    }
Exemple #29
0
def nb_test_path(name):
    return script_relative_path('notebooks/{name}.ipynb'.format(name=name))
Exemple #30
0
def read_sauce(_context, path):
    with open(script_relative_path(path), 'r') as fd:
        return Sauce(fd.read())