def test_yaml_file():
    module = importlib.import_module('dagster_examples.intro_tutorial.repos')

    handle = handle_for_pipeline_cli_args(
        {
            'module_name': None,
            'pipeline_name': 'foobar',
            'python_file': None,
            'fn_name': None,
            'repository_yaml': script_relative_path('repository_module.yaml'),
        }
    )
    assert handle.mode == _ExecutionTargetMode.PIPELINE

    expected = LoaderEntrypoint(module, 'dagster_examples.intro_tutorial.repos', 'define_repo')
    assert handle.entrypoint.module == expected.module
    assert handle.entrypoint.module_name == expected.module_name

    assert handle.entrypoint.fn_name == expected.fn_name
    assert handle.entrypoint.from_handle == handle

    assert handle.data.pipeline_name == 'foobar'

    with pytest.raises(UsageError):
        assert handle_for_pipeline_cli_args({'module_name': 'kdjfdk', 'pipeline_name': 'foobar'})

    with pytest.raises(UsageError):
        assert handle_for_pipeline_cli_args({'fn_name': 'kjdfkd', 'pipeline_name': 'foobar'})

    with pytest.raises(UsageError):
        assert handle_for_pipeline_cli_args({'pipeline_name': 'foobar', 'python_file': 'kjdfkdj'})
def test_repository_python_file():
    python_file = script_relative_path('bar_repo.py')
    module = imp.load_source('bar_repo', python_file)

    handle = handle_for_pipeline_cli_args(
        {'pipeline_name': 'foo', 'python_file': python_file, 'fn_name': 'define_bar_repo'}
    )
    assert handle.mode == _ExecutionTargetMode.PIPELINE
    assert handle.entrypoint == LoaderEntrypoint(module, 'bar_repo', 'define_bar_repo', handle)
    assert handle.data.pipeline_name == 'foo'
    assert handle.entrypoint.from_handle == handle

    with pytest.raises(UsageError):
        handle_for_pipeline_cli_args(
            {
                'module_name': 'kdjfkd',
                'pipeline_name': 'foo',
                'python_file': script_relative_path('bar_repo.py'),
                'fn_name': 'define_bar_repo',
                'repository_yaml': None,
            }
        )

    with pytest.raises(UsageError):
        handle_for_pipeline_cli_args(
            {
                'module_name': None,
                'pipeline_name': 'foo',
                'python_file': script_relative_path('bar_repo.py'),
                'fn_name': 'define_bar_repo',
                'repository_yaml': 'kjdfkdjf',
            }
        )
Example #3
0
def test_in_pipeline_manager_solid_config():
    with in_pipeline_manager() as manager:
        assert manager.context.solid_config is None

    with in_pipeline_manager(
        solid_handle=SolidHandle('hello_world_config', 'hello_world_config', None),
        handle=handle_for_pipeline_cli_args(
            {
                'module_name': 'dagstermill.examples.repository',
                'fn_name': 'define_hello_world_config_pipeline',
            }
        ),
    ) as manager:
        assert manager.context.solid_config == {'greeting': 'hello'}

    with in_pipeline_manager(
        solid_handle=SolidHandle('hello_world_config', 'hello_world_config', None),
        environment_dict={'solids': {'hello_world_config': {'config': {'greeting': 'bonjour'}}}},
        handle=handle_for_pipeline_cli_args(
            {
                'module_name': 'dagstermill.examples.repository',
                'fn_name': 'define_hello_world_config_pipeline',
            }
        ),
    ) as manager:
        assert manager.context.solid_config == {'greeting': 'bonjour'}
Example #4
0
def execute_execute_command_with_preset(preset_name, cli_args, _mode):
    pipeline = handle_for_pipeline_cli_args(cli_args).build_pipeline_definition()
    cli_args.pop('pipeline_name')

    return execute_pipeline_with_preset(
        pipeline, preset_name, instance=DagsterInstance.get(), raise_on_error=False
    )
Example #5
0
def in_pipeline_manager(pipeline_name='hello_world_pipeline',
                        solid_handle=SolidHandle('hello_world', 'hello_world',
                                                 None),
                        **kwargs):
    manager = Manager()

    run_id = str(uuid.uuid4())

    marshal_dir = tempfile.mkdtemp()

    handle = handle_for_pipeline_cli_args({
        'pipeline_name':
        pipeline_name,
        'module_name':
        'dagstermill.examples.repository',
        'fn_name':
        'define_hello_world_pipeline',
    })

    try:
        with safe_tempfile_path() as output_log_file_path:
            context_dict = {
                'run_config': RunConfig(run_id=run_id, mode='default'),
                'solid_handle': solid_handle,
                'handle': handle,
                'marshal_dir': marshal_dir,
                'environment_dict': {},
                'output_log_path': output_log_file_path,
            }

            manager.reconstitute_pipeline_context(
                **dict(context_dict, **kwargs))
            yield manager
    finally:
        shutil.rmtree(marshal_dir)
Example #6
0
File: cli.py Project: nikie/dagster
def construct_environment_yaml(preset_name, env, pipeline_name, module_name):
    # Load environment dict from either a preset or yaml file globs
    if preset_name:
        if env:
            raise click.UsageError('Can not use --preset with --env.')

        cli_args = {
            'fn_name': pipeline_name,
            'pipeline_name': pipeline_name,
            'module_name': module_name,
        }
        pipeline = handle_for_pipeline_cli_args(
            cli_args).build_pipeline_definition()
        environment_dict = pipeline.get_preset(preset_name).environment_dict

    else:
        env = list(env)
        environment_dict = load_yaml_from_glob_list(env) if env else {}

    # If not provided by the user, ensure we have storage location defined
    if 'storage' not in environment_dict:
        system_tmp_path = seven.get_system_temp_directory()
        dagster_tmp_path = os.path.join(system_tmp_path, 'dagster-airflow',
                                        pipeline_name)
        environment_dict['storage'] = {
            'filesystem': {
                'config': {
                    'base_dir': six.ensure_str(dagster_tmp_path)
                }
            }
        }

    return environment_dict
Example #7
0
def test_papermill_pandas_hello_world_pipeline():
    handle = handle_for_pipeline_cli_args({
        'module_name':
        'dagster_pandas.examples',
        'fn_name':
        'papermill_pandas_hello_world_pipeline',
    })

    pipeline = handle.build_pipeline_definition()
    pipeline_result = execute_pipeline(
        pipeline,
        {
            'solids': {
                'papermill_pandas_hello_world': {
                    'inputs': {
                        'df': {
                            'csv': {
                                'path':
                                file_relative_path(__file__, 'num_prod.csv')
                            }
                        }
                    }
                }
            }
        },
        instance=DagsterInstance.local_temp(),
    )
    assert pipeline_result.success
    solid_result = pipeline_result.result_for_solid(
        'papermill_pandas_hello_world')
    expected = pd.read_csv(file_relative_path(__file__, 'num_prod.csv')) + 1
    assert solid_result.output_value().equals(expected)
Example #8
0
def test_event_callback_logging():
    events = defaultdict(list)

    def _event_callback(record):
        assert isinstance(record, EventRecord)
        if record.is_dagster_event:
            events[record.dagster_event.event_type].append(record)

    handle = handle_for_pipeline_cli_args({
        'module_name':
        'dagstermill.examples.repository',
        'fn_name':
        'define_hello_logging_pipeline',
    })
    run_config = RunConfig()
    instance = DagsterInstance.local_temp()
    instance.watch_event_logs(run_config.run_id, -1, _event_callback)

    execute_pipeline(handle.build_pipeline_definition(),
                     run_config=run_config,
                     instance=instance)

    passed_before_timeout = False
    retries = 5
    while retries > 0:
        time.sleep(0.333)
        if DagsterEventType.PIPELINE_FAILURE in events.keys():
            break
        if DagsterEventType.PIPELINE_SUCCESS in events.keys():
            passed_before_timeout = True
            break
        retries -= 1

    assert passed_before_timeout
Example #9
0
def execute_execute_command_with_preset(preset, cli_args, _mode):
    pipeline = handle_for_pipeline_cli_args(
        cli_args).build_pipeline_definition()
    cli_args.pop('pipeline_name')

    kwargs = pipeline.get_preset(preset)
    return execute_pipeline(**kwargs)
Example #10
0
def test_logging():

    handle = handle_for_pipeline_cli_args({
        'python_file':
        script_relative_path('./test_logging.py'),
        'fn_name':
        'define_hello_logging_pipeline',
    })

    pipeline_def = handle.build_pipeline_definition()

    with safe_tempfile_path() as test_file_path:
        with safe_tempfile_path() as critical_file_path:
            execute_pipeline(
                pipeline_def,
                {
                    'loggers': {
                        'test': {
                            'config': {
                                'name': 'test',
                                'file_path': test_file_path,
                                'log_level': 'DEBUG',
                            }
                        },
                        'critical': {
                            'config': {
                                'name': 'critical',
                                'file_path': critical_file_path,
                                'log_level': 'CRITICAL',
                            }
                        },
                    }
                },
                instance=DagsterInstance.local_temp(),
            )

            with open(test_file_path, 'r') as test_file:
                records = [
                    json.loads(line)
                    for line in test_file.read().strip('\n').split('\n')
                    if line
                ]

            with open(critical_file_path, 'r') as critical_file:
                critical_records = [
                    json.loads(line)
                    for line in critical_file.read().strip('\n').split('\n')
                    if line
                ]

    messages = [x['dagster_meta']['orig_message'] for x in records]

    assert 'Hello, there!' in messages

    critical_messages = [
        x['dagster_meta']['orig_message'] for x in critical_records
    ]

    assert 'Hello, there!' not in critical_messages
def test_load_from_pipeline_file():
    pipeline = handle_for_pipeline_cli_args({
        'fn_name': 'define_foo_pipeline',
        'python_file': __file__
    }).build_pipeline_definition()

    assert isinstance(pipeline, PipelineDefinition)
    assert pipeline.name == 'foo'
def test_load_from_pipeline_file():
    handle = handle_for_pipeline_cli_args(
        {'fn_name': 'define_foo_pipeline', 'python_file': __file__}
    )
    pipeline = handle.build_pipeline_definition()

    assert isinstance(pipeline, PipelineDefinition)
    assert pipeline.name == 'foo'
    assert ExecutionTargetHandle.get_handle(pipeline) == (handle, None)
def test_loader_from_default_repository_file_yaml():
    handle = handle_for_pipeline_cli_args(
        {'pipeline_name': 'foo', 'repository_yaml': script_relative_path('repository_file.yaml')}
    )
    pipeline = handle.build_pipeline_definition()

    assert isinstance(pipeline, PipelineDefinition)
    assert pipeline.name == 'foo'
    assert ExecutionTargetHandle.get_handle(pipeline) == (handle, None)
def test_load_from_repository_module():
    pipeline = handle_for_pipeline_cli_args({
        'module_name': 'dagster_examples.intro_tutorial.repos',
        'pipeline_name': 'repo_demo_pipeline',
        'fn_name': 'define_repo',
    }).build_pipeline_definition()

    assert isinstance(pipeline, PipelineDefinition)
    assert pipeline.name == 'repo_demo_pipeline'
Example #15
0
def pipeline_snapshot_command(solid_subset, **kwargs):
    handle = handle_for_pipeline_cli_args(kwargs)
    definition = handle.build_pipeline_definition()

    if solid_subset:
        definition = definition.build_sub_pipeline(solid_subset.split(","))

    active_data = active_pipeline_data_from_def(definition)
    click.echo(serialize_dagster_namedtuple(active_data))
def test_load_from_pipeline_module():
    handle = handle_for_pipeline_cli_args(
        {'module_name': 'dagster_examples.intro_tutorial.repos', 'fn_name': 'hello_cereal_pipeline'}
    )
    pipeline = handle.build_pipeline_definition()

    assert isinstance(pipeline, PipelineDefinition)
    assert pipeline.name == 'hello_cereal_pipeline'
    assert ExecutionTargetHandle.get_handle(pipeline) == (handle, None)
def test_load_from_repository_file():
    pipeline = handle_for_pipeline_cli_args({
        'pipeline_name': 'foo',
        'python_file': __file__,
        'fn_name': 'define_bar_repo'
    }).build_pipeline_definition()

    assert isinstance(pipeline, PipelineDefinition)
    assert pipeline.name == 'foo'
Example #18
0
def test_logging():

    handle = handle_for_pipeline_cli_args({
        'python_file':
        script_relative_path('./test_logging.py'),
        'fn_name':
        'define_hello_logging_pipeline',
    })

    pipeline_def = handle.build_pipeline_definition()

    with tempfile.NamedTemporaryFile() as test_file:
        with tempfile.NamedTemporaryFile() as critical_file:
            execute_pipeline(
                pipeline_def,
                {
                    'loggers': {
                        'test': {
                            'config': {
                                'name': 'test',
                                'file_path': test_file.name,
                                'log_level': 'DEBUG',
                            }
                        },
                        'critical': {
                            'config': {
                                'name': 'critical',
                                'file_path': critical_file.name,
                                'log_level': 'CRITICAL',
                            }
                        },
                    }
                },
            )

            test_file.seek(0)
            critical_file.seek(0)

            records = [
                json.loads(line) for line in test_file.read().decode(
                    'utf-8').strip('\n').split('\n') if line
            ]
            critical_records = [
                json.loads(line) for line in critical_file.read().decode(
                    'utf-8').strip('\n').split('\n') if line
            ]

    messages = [x['dagster_meta']['orig_message'] for x in records]

    assert 'Hello, there!' in messages

    critical_messages = [
        x['dagster_meta']['orig_message'] for x in critical_records
    ]

    assert 'Hello, there!' not in critical_messages
Example #19
0
def execute_execute_command_with_preset(preset, raise_on_error, cli_args,
                                        _mode):
    pipeline = handle_for_pipeline_cli_args(
        cli_args).build_pipeline_definition()
    cli_args.pop('pipeline_name')

    kwargs = pipeline.get_preset(preset)
    kwargs['run_config'] = kwargs['run_config'].with_executor_config(
        InProcessExecutorConfig(raise_on_error=raise_on_error))
    return execute_pipeline(**kwargs)
def test_loader_from_default_repository_module_yaml():
    pipeline = handle_for_pipeline_cli_args({
        'pipeline_name':
        'repo_demo_pipeline',
        'repository_yaml':
        script_relative_path('repository_module.yaml'),
    }).build_pipeline_definition()

    assert isinstance(pipeline, PipelineDefinition)
    assert pipeline.name == 'repo_demo_pipeline'
def test_pipeline_module():
    handle = handle_for_pipeline_cli_args({
        'module_name': 'dagster',
        'fn_name': 'define_pipeline',
        'pipeline_name': None,
        'python_file': None,
        'repository_yaml': None,
    })
    assert handle.mode == _ExecutionTargetMode.PIPELINE
    assert handle.entrypoint == LoaderEntrypoint(
        importlib.import_module('dagster'), 'dagster', 'define_pipeline')
Example #22
0
def test_loader_from_default_repository_module_yaml():
    handle = handle_for_pipeline_cli_args({
        'pipeline_name':
        'hello_cereal_pipeline',
        'repository_yaml':
        file_relative_path(__file__, 'repository_module.yaml'),
    })
    pipeline = handle.build_pipeline_definition()

    assert isinstance(pipeline, PipelineDefinition)
    assert pipeline.name == 'hello_cereal_pipeline'
    assert ExecutionTargetHandle.get_handle(pipeline) == (handle, None)
Example #23
0
def get_pipeline_snapshot_from_cli_args(cli_args):
    _cli_load_invariant(cli_args.get('pipeline_name') is not None)

    if cli_args.get('image'):
        _cli_load_invariant(
            is_module_available('docker'),
            msg='--image is not supported without dagster[docker] or the Python package docker installed.',
        )
        active_repo_data = get_active_repository_data_from_image(cli_args.get('image'))
        return active_repo_data.get_pipeline_snapshot(cli_args.get('pipeline_name')[0])
    else:
        pipeline_definition = handle_for_pipeline_cli_args(cli_args).build_pipeline_definition()
        return PipelineSnapshot.from_pipeline_def(pipeline_definition)
Example #24
0
def execute_execute_command_with_preset(preset_name, cli_args, _mode):
    pipeline = handle_for_pipeline_cli_args(
        cli_args).build_pipeline_definition()
    cli_args.pop('pipeline_name')
    tags = get_tags_from_args(cli_args)

    return execute_pipeline(
        pipeline,
        preset=preset_name,
        instance=DagsterInstance.get(),
        raise_on_error=False,
        run_config=RunConfig(tags=tags),
    )
def test_repository_module():
    module = importlib.import_module('dagster')

    handle = handle_for_pipeline_cli_args({
        'module_name': 'dagster',
        'pipeline_name': 'foo',
        'python_file': None,
        'fn_name': 'define_bar_repo',
        'repository_yaml': None,
    })
    assert handle.mode == _ExecutionTargetMode.REPOSITORY
    assert handle.entrypoint == LoaderEntrypoint(module, 'dagster',
                                                 'define_bar_repo')
    assert handle.data.pipeline_name == 'foo'
def test_pipeline_python_file():
    python_file = script_relative_path('foo_pipeline.py')
    module = imp.load_source('foo_pipeline', python_file)

    handle = handle_for_pipeline_cli_args({
        'module_name': None,
        'fn_name': 'define_pipeline',
        'pipeline_name': None,
        'python_file': python_file,
        'repository_yaml': None,
    })
    assert handle.mode == _ExecutionTargetMode.PIPELINE
    assert handle.entrypoint == LoaderEntrypoint(module, 'foo_pipeline',
                                                 'define_pipeline')
Example #27
0
def execute_execute_command_with_preset(preset, raise_on_error, cli_args,
                                        mode):
    pipeline = handle_for_pipeline_cli_args(
        cli_args).build_pipeline_definition()
    cli_args.pop('pipeline_name')
    repository = handle_for_repo_cli_args(
        cli_args).build_repository_definition()
    kwargs = repository.get_preset_pipeline(pipeline.name, preset)

    return execute_pipeline(run_config=RunConfig(
        mode=mode,
        executor_config=InProcessExecutorConfig(
            raise_on_error=raise_on_error)),
                            **kwargs)
def exec_for_test(fn_name, env=None, **kwargs):
    result = None

    handle = handle_for_pipeline_cli_args(
        {'module_name': 'dagstermill.examples.repository', 'fn_name': fn_name}
    )

    pipeline = handle.build_pipeline_definition()

    try:
        result = execute_pipeline(pipeline, env, **kwargs)
        yield result
    finally:
        if result:
            cleanup_result_notebook(result)
Example #29
0
def test_yield_unserializable_result():
    manager = Manager()
    assert manager.yield_result(threading.Lock())

    with in_pipeline_manager(
        solid_handle=SolidHandle('hello_world_output', 'hello_world_output', None),
        handle=handle_for_pipeline_cli_args(
            {
                'module_name': 'dagstermill.examples.repository',
                'fn_name': 'define_hello_world_with_output_pipeline',
            }
        ),
    ) as manager:
        with pytest.raises(TypeError):
            manager.yield_result(threading.Lock())
Example #30
0
def test_pipeline_python_file():
    python_file = file_relative_path(__file__, 'foo_pipeline.py')
    module = import_module_from_path('foo_pipeline', python_file)

    handle = handle_for_pipeline_cli_args({
        'module_name': None,
        'fn_name': 'define_pipeline',
        'pipeline_name': None,
        'python_file': python_file,
        'repository_yaml': None,
    })
    assert handle.mode == _ExecutionTargetMode.PIPELINE
    expected = LoaderEntrypoint(module, 'foo_pipeline', 'define_pipeline')
    assert handle.entrypoint.module == expected.module
    assert handle.entrypoint.module_name == expected.module_name
    assert handle.entrypoint.fn_name == expected.fn_name
    assert handle.entrypoint.from_handle == handle