Exemple #1
0
def define_context(raise_on_error=True):
    return DagsterGraphQLContext(
        RepositoryContainer(repository=define_repository()),
        PipelineRunStorage(),
        execution_manager=SynchronousExecutionManager(),
        raise_on_error=raise_on_error,
    )
Exemple #2
0
def define_context(raise_on_error=True, log_dir=None):
    return DagsterGraphQLContext(
        handle=ExecutionTargetHandle.for_repo_fn(define_repository),
        pipeline_runs=PipelineRunStorage(log_dir),
        execution_manager=SynchronousExecutionManager(),
        raise_on_error=raise_on_error,
    )
Exemple #3
0
def host_dagit_ui(log, log_dir, repository_container, sync, host, port):
    if log:

        def create_pipeline_run(*args, **kwargs):
            return LogFilePipelineRun(log_dir, *args, **kwargs)

    else:
        create_pipeline_run = InMemoryPipelineRun

    pipeline_run_storage = PipelineRunStorage(
        create_pipeline_run=create_pipeline_run)

    app = create_app(repository_container,
                     pipeline_run_storage,
                     use_synchronous_execution_manager=sync)
    server = pywsgi.WSGIServer((host, port),
                               app,
                               handler_class=WebSocketHandler)
    print('Serving on http://{host}:{port}'.format(host=host, port=port))
    try:
        server.serve_forever()
    except OSError as os_error:
        if 'Address already in use' in str(os_error):
            six.raise_from(
                Exception((
                    'Another process on your machine is already listening on port {port}. '
                    'It is possible that you have another instance of dagit '
                    'running somewhere using the same port. Or it could be another '
                    'random process. Either kill that process or us the -p option to '
                    'select another port.').format(port=port)),
                os_error,
            )
        else:
            raise os_error
Exemple #4
0
def host_dagit_ui(log, log_dir, handle, use_sync, host, port):
    check.inst_param(handle, 'handle', ExecutionTargetHandle)

    pipeline_run_storage = PipelineRunStorage(log_dir if log else None)

    app = create_app(handle,
                     pipeline_run_storage,
                     use_synchronous_execution_manager=use_sync)
    server = pywsgi.WSGIServer((host, port),
                               app,
                               handler_class=WebSocketHandler)
    print('Serving on http://{host}:{port}'.format(host=host, port=port))
    try:
        server.serve_forever()
    except OSError as os_error:
        if 'Address already in use' in str(os_error):
            six.raise_from(
                Exception((
                    'Another process on your machine is already listening on port {port}. '
                    'It is possible that you have another instance of dagit '
                    'running somewhere using the same port. Or it could be another '
                    'random process. Either kill that process or us the -p option to '
                    'select another port.').format(port=port)),
                os_error,
            )
        else:
            raise os_error
Exemple #5
0
def test_smoke_app():
    repository_container = RepositoryContainer(repository=define_repo())
    pipeline_run_storage = PipelineRunStorage()
    flask_app = app.create_app(repository_container, pipeline_run_storage)
    client = flask_app.test_client()

    result = client.post('/graphql', data={'query': 'query { pipelines { nodes { name }}}'})
    data = json.loads(result.data.decode('utf-8'))
    assert len(data['data']['pipelines']['nodes']) == 1
    assert {node_data['name'] for node_data in data['data']['pipelines']['nodes']} == set(
        ['repo_demo_pipeline']
    )

    result = client.get('/graphql')
    assert result.status_code == 400
    data = json.loads(result.data.decode('utf-8'))
    assert len(data['errors']) == 1
    assert data['errors'][0]['message'] == 'Must provide query string.'

    result = client.get('/dagit/notebook?path=foo.bar')
    assert result.status_code == 400
    assert result.data.decode('utf-8') == 'Invalid Path'

    result = client.post('/graphql', data={'query': 'query { version { slkjd } }'})
    data = json.loads(result.data.decode('utf-8'))
    assert 'errors' in data
    assert len(data['errors']) == 1
    assert 'must not have a sub selection' in data['errors'][0]['message']

    result = client.get('static/foo/bar')
    assert result.status_code == 404
Exemple #6
0
def define_examples_context(raise_on_error=True):
    return DagsterGraphQLContext(
        handle=ExecutionTargetHandle.for_repo_module('dagster_examples', 'define_demo_repo'),
        pipeline_runs=PipelineRunStorage(),
        execution_manager=SynchronousExecutionManager(),
        raise_on_error=raise_on_error,
    )
def test_pipelines_python_error():
    ctx = DagsterGraphQLContext(
        handle=ExecutionTargetHandle.for_repo_fn(define_error_pipeline_repo),
        pipeline_runs=PipelineRunStorage(),
        execution_manager=SynchronousExecutionManager(),
    )
    result = execute_dagster_graphql(ctx, PIPELINES)
    assert result.data['pipelinesOrError']['__typename'] == "PythonError"
Exemple #8
0
def test_pipelines_python_error():
    ctx = DagsterGraphQLContext(
        RepositoryContainer(repository=define_error_pipeline_repo()),
        PipelineRunStorage(),
        execution_manager=SynchronousExecutionManager(),
    )
    result = execute_dagster_graphql(ctx, PIPELINES)
    assert result.data['pipelinesOrError']['__typename'] == "PythonError"
Exemple #9
0
def test_index_view():
    with create_app(
        ExecutionTargetHandle.for_repo_yaml(script_relative_path('./repository.yaml')),
        PipelineRunStorage(),
    ).test_client() as client:
        res = client.get('/')

    assert res.status_code == 200, res.data
    assert b'You need to enable JavaScript to run this app' in res.data
Exemple #10
0
def test_notebook_view():
    notebook_path = script_relative_path('render_uuid_notebook.ipynb')

    with create_app(
        ExecutionTargetHandle.for_repo_yaml(script_relative_path('./repository.yaml')),
        PipelineRunStorage(),
    ).test_client() as client:
        res = client.get('/dagit/notebook?path={}'.format(notebook_path))

    assert res.status_code == 200
    # This magic guid is hardcoded in the notebook
    assert b'6cac0c38-2c97-49ca-887c-4ac43f141213' in res.data
Exemple #11
0
def test_pipelines_or_error_invalid():

    context = DagsterGraphQLContext(
        handle=ExecutionTargetHandle.for_repo_fn(define_test_repository),
        pipeline_runs=PipelineRunStorage(),
        execution_manager=SynchronousExecutionManager(),
    )

    result = execute_dagster_graphql(
        context,
        '{ pipelinesOrError { ... on InvalidDefinitionError { message } } }')
    msg = result.data['pipelinesOrError']['message']
    assert 'circular reference detected in solid "csolid"' in msg
Exemple #12
0
def test_pipelines_or_error_invalid():
    repository = RepositoryDefinition(
        name='test',
        pipeline_dict={'pipeline': define_circular_dependency_pipeline})
    context = DagsterGraphQLContext(
        RepositoryContainer(repository=repository),
        PipelineRunStorage(),
        execution_manager=SynchronousExecutionManager(),
    )
    result = execute_dagster_graphql(
        context,
        '{ pipelinesOrError { ... on InvalidDefinitionError { message } } }')
    msg = result.data['pipelinesOrError']['message']
    assert "Circular reference detected in solid csolid" in msg
Exemple #13
0
def load_dagit_for_repo_cli_args(n_pipelines=1, **kwargs):
    handle = handle_for_repo_cli_args(kwargs)
    pipeline_run_storage = PipelineRunStorage()

    app = create_app(handle, pipeline_run_storage)

    client = app.test_client()

    res = client.get('/graphql?query={query_string}'.format(
        query_string=PIPELINES_OR_ERROR_QUERY))
    json_res = json.loads(res.data.decode('utf-8'))
    assert 'data' in json_res
    assert 'pipelinesOrError' in json_res['data']
    assert 'nodes' in json_res['data']['pipelinesOrError']
    assert len(json_res['data']['pipelinesOrError']['nodes']) == n_pipelines

    return res
Exemple #14
0
def test_smoke_app():
    pipeline_run_storage = PipelineRunStorage()
    flask_app = app.create_app(
        ExecutionTargetHandle.for_repo_module(
            module_name='dagster_examples.intro_tutorial.repos',
            fn_name='define_repo'),
        pipeline_run_storage,
    )
    client = flask_app.test_client()

    result = client.post(
        '/graphql', data={'query': 'query { pipelines { nodes { name }}}'})
    data = json.loads(result.data.decode('utf-8'))
    assert len(data['data']['pipelines']['nodes']) == 1
    assert {
        node_data['name']
        for node_data in data['data']['pipelines']['nodes']
    } == set(['repo_demo_pipeline'])

    result = client.get('/graphql')
    assert result.status_code == 400
    data = json.loads(result.data.decode('utf-8'))
    assert len(data['errors']) == 1
    assert data['errors'][0]['message'] == 'Must provide query string.'

    result = client.get('/dagit/notebook?path=foo.bar')
    assert result.status_code == 400
    assert result.data.decode('utf-8') == 'Invalid Path'

    result = client.post('/graphql',
                         data={'query': 'query { version { slkjd } }'})
    data = json.loads(result.data.decode('utf-8'))
    assert 'errors' in data
    assert len(data['errors']) == 1
    assert 'must not have a sub selection' in data['errors'][0]['message']

    result = client.get('static/foo/bar')
    assert result.status_code == 404

    result = client.get('vendor/foo/bar')
    assert result.status_code == 404
Exemple #15
0
def test_create_app():
    handle = ExecutionTargetHandle.for_repo_yaml(script_relative_path('./repository.yaml'))
    pipeline_run_storage = PipelineRunStorage(create_pipeline_run=InMemoryPipelineRun)
    assert create_app(handle, pipeline_run_storage, use_synchronous_execution_manager=True)
    assert create_app(handle, pipeline_run_storage, use_synchronous_execution_manager=False)
Exemple #16
0
def test_all_step_events():  # pylint: disable=too-many-locals
    handle = ExecutionTargetHandle.for_pipeline_fn(define_test_events_pipeline)
    pipeline = handle.build_pipeline_definition()
    mode = pipeline.get_default_mode_name()
    execution_plan = create_execution_plan(pipeline, {}, mode=mode)
    step_levels = execution_plan.topological_step_levels()
    run_config = RunConfig(
        executor_config=InProcessExecutorConfig(raise_on_error=False),
        storage_mode=RunStorageMode.FILESYSTEM,
    )

    unhandled_events = STEP_EVENTS.copy()

    # Exclude types that are not step events
    ignored_events = {
        'LogMessageEvent',
        'PipelineStartEvent',
        'PipelineSuccessEvent',
        'PipelineInitFailureEvent',
        'PipelineFailureEvent',
    }

    step_event_fragment = get_step_event_fragment()
    log_message_event_fragment = get_log_message_event_fragment()
    query = '\n'.join(
        (
            PIPELINE_EXECUTION_QUERY_TEMPLATE.format(
                step_event_fragment=step_event_fragment.include_key,
                log_message_event_fragment=log_message_event_fragment.include_key,
            ),
            step_event_fragment.fragment,
            log_message_event_fragment.fragment,
        )
    )

    event_counts = defaultdict(int)

    for step_level in step_levels:
        for step in step_level:

            variables = {
                'executionParams': {
                    'selector': {'name': pipeline.name},
                    'environmentConfigData': {'storage': {'filesystem': {}}},
                    'mode': mode,
                    'executionMetadata': {'runId': run_config.run_id},
                    'stepKeys': [step.key],
                }
            }

            pipeline_run_storage = PipelineRunStorage()

            res = execute_query(handle, query, variables, pipeline_run_storage=pipeline_run_storage)

            # go through the same dict, decrement all the event records we've seen from the GraphQL
            # response
            if not res.get('errors'):
                run_logs = res['data']['startPipelineExecution']['run']['logs']['nodes']

                events = [
                    dagster_event_from_dict(e, pipeline.name)
                    for e in run_logs
                    if e['__typename'] not in ignored_events
                ]

                for event in events:
                    key = event.step_key + '.' + event.event_type_value
                    event_counts[key] -= 1
                unhandled_events -= {DagsterEventType(e.event_type_value) for e in events}

            # build up a dict, incrementing all the event records we've produced in the run storage
            logs = pipeline_run_storage.get_run_by_id(run_config.run_id).all_logs()
            for log in logs:
                if not log.dagster_event or (
                    DagsterEventType(log.dagster_event.event_type_value) not in STEP_EVENTS
                ):
                    continue
                key = log.dagster_event.step_key + '.' + log.dagster_event.event_type_value
                event_counts[key] += 1

    # Ensure we've processed all the events that were generated in the run storage
    assert sum(event_counts.values()) == 0

    # Ensure we've handled the universe of event types
    assert not unhandled_events