Ejemplo n.º 1
0
def test_running():
    run_id = make_new_run_id()
    handle = ExecutionTargetHandle.for_pipeline_python_file(__file__, 'passing_pipeline')
    environment_dict = {
        'solids': {'sum_solid': {'inputs': {'num': script_relative_path('data/num.csv')}}}
    }
    selector = ExecutionSelector('csv_hello_world')

    instance = DagsterInstance.local_temp()
    pipeline_run = instance.create_run(
        PipelineRun(
            pipeline_name=passing_pipeline.name,
            run_id=run_id,
            selector=selector,
            environment_dict=environment_dict,
            mode='default',
            reexecution_config=None,
            step_keys_to_execute=None,
            tags=None,
            status=PipelineRunStatus.NOT_STARTED,
        )
    )
    execution_manager = SubprocessExecutionManager(instance)
    execution_manager.execute_pipeline(handle, passing_pipeline, pipeline_run, instance)
    execution_manager.join()
    assert instance.get_run_by_id(run_id).status == PipelineRunStatus.SUCCESS
    events = instance.all_logs(run_id)
    assert events

    process_start_events = get_events_of_type(events, DagsterEventType.PIPELINE_PROCESS_START)
    assert len(process_start_events) == 1

    process_started_events = get_events_of_type(events, DagsterEventType.PIPELINE_PROCESS_STARTED)
    assert len(process_started_events) == 1

    process_exited_events = get_events_of_type(events, DagsterEventType.PIPELINE_PROCESS_EXITED)
    assert len(process_exited_events) == 1
Ejemplo n.º 2
0
def test_execution_crash():
    run_id = make_new_run_id()
    handle = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'crashy_pipeline')
    environment_dict = {
        'solids': {
            'sum_solid': {
                'inputs': {
                    'num': file_relative_path(__file__, 'data/num.csv')
                }
            }
        }
    }
    selector = ExecutionSelector('csv_hello_world')

    instance = DagsterInstance.local_temp()
    pipeline_run = instance.create_run(
        PipelineRun(
            pipeline_name=crashy_pipeline.name,
            run_id=run_id,
            selector=selector,
            environment_dict=environment_dict,
            mode='default',
            step_keys_to_execute=None,
            tags=None,
            status=PipelineRunStatus.NOT_STARTED,
        ))
    execution_manager = SubprocessExecutionManager(instance)
    execution_manager.execute_pipeline(handle, crashy_pipeline, pipeline_run,
                                       instance)
    execution_manager.join()
    assert instance.get_run_by_id(run_id).status == PipelineRunStatus.FAILURE
    last_log = instance.all_logs(run_id)[-1]

    assert last_log.message.startswith(
        'Exception: Pipeline execution process for {run_id} unexpectedly exited\n'
        .format(run_id=run_id))
Ejemplo n.º 3
0
def test_two_runs_running():
    handle = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'infinite_loop_pipeline')

    with safe_tempfile_path() as file_one, safe_tempfile_path() as file_two:
        instance = DagsterInstance.local_temp()

        execution_manager = SubprocessExecutionManager(instance)

        pipeline_run_one = instance.create_run_for_pipeline(
            pipeline_def=infinite_loop_pipeline,
            environment_dict={
                'solids': {
                    'loop': {
                        'config': {
                            'file': file_one
                        }
                    }
                }
            },
        )
        execution_manager.execute_pipeline(handle, infinite_loop_pipeline,
                                           pipeline_run_one, instance)

        pipeline_run_two = instance.create_run_for_pipeline(
            pipeline_def=infinite_loop_pipeline,
            environment_dict={
                'solids': {
                    'loop': {
                        'config': {
                            'file': file_two
                        }
                    }
                }
            },
        )

        execution_manager.execute_pipeline(handle, infinite_loop_pipeline,
                                           pipeline_run_two, instance)

        # ensure both runs have begun execution
        while not os.path.exists(file_one) and not os.path.exists(file_two):
            time.sleep(0.1)

        assert execution_manager.is_process_running(pipeline_run_one.run_id)
        assert execution_manager.is_process_running(pipeline_run_two.run_id)

        assert execution_manager.terminate(pipeline_run_one.run_id)

        assert not execution_manager.is_process_running(
            pipeline_run_one.run_id)
        assert execution_manager.is_process_running(pipeline_run_two.run_id)

        assert execution_manager.terminate(pipeline_run_two.run_id)

        assert not execution_manager.is_process_running(
            pipeline_run_one.run_id)
        assert not execution_manager.is_process_running(
            pipeline_run_two.run_id)
Ejemplo n.º 4
0
def test_multiprocessing_execution_for_composite_solid_with_config_mapping():
    environment_dict = {
        'solids': {
            'composite_with_nested_config_solid_and_config_mapping': {
                'config': {
                    'foo': 'baz',
                    'bar': 3
                }
            }
        }
    }

    handle = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'composite_pipeline_with_config_mapping')

    instance = DagsterInstance.local_temp()
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=composite_pipeline_with_config_mapping,
        environment_dict=environment_dict,
    )
    execution_manager = SubprocessExecutionManager(instance)
    execution_manager.execute_pipeline(handle,
                                       composite_pipeline_with_config_mapping,
                                       pipeline_run, instance)
    execution_manager.join()
    assert instance.get_run_by_id(
        pipeline_run.run_id).status == PipelineRunStatus.SUCCESS

    environment_dict = {
        'solids': {
            'composite_with_nested_config_solid_and_config_mapping': {
                'config': {
                    'foo': 'baz',
                    'bar': 3
                }
            }
        },
        'execution': {
            'multiprocess': {}
        },
        'storage': {
            'filesystem': {}
        },
    }

    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=composite_pipeline_with_config_mapping,
        environment_dict=environment_dict,
    )
    execution_manager = SubprocessExecutionManager(instance)
    execution_manager.execute_pipeline(handle,
                                       composite_pipeline_with_config_mapping,
                                       pipeline_run, instance)

    execution_manager.join()
    assert instance.get_run_by_id(
        pipeline_run.run_id).status == PipelineRunStatus.SUCCESS
Ejemplo n.º 5
0
def test_execute_hammer_through_dagit():
    handle = ExecutionTargetHandle.for_pipeline_python_file(
        file_relative_path(
            __file__, '../../../../examples/dagster_examples/toys/hammer.py'),
        'hammer_pipeline',
    )
    instance = DagsterInstance.local_temp()

    execution_manager = SubprocessExecutionManager(instance)

    context = DagsterGraphQLContext(handle=handle,
                                    execution_manager=execution_manager,
                                    instance=instance)

    executor = SyncExecutor()

    variables = {
        'executionParams': {
            'environmentConfigData': {
                'storage': {
                    'filesystem': {}
                },
                'execution': {
                    'dask': {}
                }
            },
            'selector': {
                'name': handle.build_pipeline_definition().name
            },
            'mode': 'default',
        }
    }

    start_pipeline_result = graphql(
        request_string=START_PIPELINE_EXECUTION_MUTATION,
        schema=create_schema(),
        context=context,
        variables=variables,
        executor=executor,
    )

    run_id = start_pipeline_result.data['startPipelineExecution']['run'][
        'runId']

    context.execution_manager.join()

    subscription = execute_dagster_graphql(context,
                                           SUBSCRIPTION_QUERY,
                                           variables={'runId': run_id})

    subscribe_results = []
    subscription.subscribe(subscribe_results.append)

    messages = [
        x['__typename']
        for x in subscribe_results[0].data['pipelineRunLogs']['messages']
    ]

    assert 'PipelineProcessStartEvent' in messages
    assert 'PipelineProcessStartedEvent' in messages
    assert 'PipelineStartEvent' in messages
    assert 'PipelineSuccessEvent' in messages
    assert 'PipelineProcessExitedEvent' in messages
Ejemplo n.º 6
0
Archivo: app.py Proyecto: nikie/dagster
def create_app(handle, instance, reloader=None):
    check.inst_param(handle, 'handle', ExecutionTargetHandle)
    check.inst_param(instance, 'instance', DagsterInstance)
    check.opt_inst_param(reloader, 'reloader', Reloader)

    app = Flask('dagster-ui')
    sockets = Sockets(app)
    app.app_protocol = lambda environ_path_info: 'graphql-ws'

    schema = create_schema()
    subscription_server = DagsterSubscriptionServer(schema=schema)

    execution_manager_settings = instance.dagit_settings.get(
        'execution_manager')
    if execution_manager_settings and execution_manager_settings.get(
            'max_concurrent_runs'):
        execution_manager = QueueingSubprocessExecutionManager(
            instance, execution_manager_settings.get('max_concurrent_runs'))
    else:
        execution_manager = SubprocessExecutionManager(instance)

    warn_if_compute_logs_disabled()

    print('Loading repository...')
    context = DagsterGraphQLContext(
        handle=handle,
        instance=instance,
        execution_manager=execution_manager,
        reloader=reloader,
        version=__version__,
    )

    # Automatically initialize scheduler everytime Dagit loads
    scheduler_handle = context.scheduler_handle
    scheduler = instance.scheduler

    if scheduler_handle:
        if scheduler:
            handle = context.get_handle()
            python_path = sys.executable
            repository_path = handle.data.repository_yaml
            repository = context.get_repository()
            scheduler_handle.up(python_path,
                                repository_path,
                                repository=repository,
                                instance=instance)
        else:
            warnings.warn(MISSING_SCHEDULER_WARNING)

    app.add_url_rule(
        '/graphql',
        'graphql',
        DagsterGraphQLView.as_view(
            'graphql',
            schema=schema,
            graphiql=True,
            # XXX(freiksenet): Pass proper ws url
            graphiql_template=PLAYGROUND_TEMPLATE,
            executor=Executor(),
            context=context,
        ),
    )
    sockets.add_url_rule(
        '/graphql', 'graphql',
        dagster_graphql_subscription_view(subscription_server, context))

    app.add_url_rule(
        # should match the `build_local_download_url`
        '/download/<string:run_id>/<string:step_key>/<string:file_type>',
        'download_view',
        download_view(context),
    )

    # these routes are specifically for the Dagit UI and are not part of the graphql
    # API that we want other people to consume, so they're separate for now.
    # Also grabbing the magic global request args dict so that notebook_view is testable
    app.add_url_rule('/dagit/notebook', 'notebook',
                     lambda: notebook_view(request.args))

    app.add_url_rule('/static/<path:path>/<string:file>', 'static_view',
                     static_view)
    app.add_url_rule('/vendor/<path:path>/<string:file>', 'vendor_view',
                     vendor_view)
    app.add_url_rule('/<string:worker_name>.worker.js', 'worker_view',
                     worker_view)
    app.add_url_rule('/dagit_info', 'sanity_view', info_view)
    app.add_url_rule('/<path:_path>', 'index_catchall', index_view)
    app.add_url_rule('/', 'index', index_view, defaults={'_path': ''})

    CORS(app)

    return app
Ejemplo n.º 7
0
def test_multiprocessing_execution_for_composite_solid_with_config_mapping():
    environment_dict = {
        'solids': {
            'composite_with_nested_config_solid_and_config_mapping': {
                'config': {
                    'foo': 'baz',
                    'bar': 3
                }
            }
        }
    }

    run_id = make_new_run_id()
    handle = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'composite_pipeline_with_config_mapping')

    instance = DagsterInstance.local_temp()
    pipeline_run = instance.create_run(
        PipelineRun(
            pipeline_name=composite_pipeline_with_config_mapping.name,
            run_id=run_id,
            selector=ExecutionSelector('nonce'),
            environment_dict=environment_dict,
            mode='default',
            step_keys_to_execute=None,
            tags=None,
            status=PipelineRunStatus.NOT_STARTED,
        ))
    execution_manager = SubprocessExecutionManager(instance)
    execution_manager.execute_pipeline(handle,
                                       composite_pipeline_with_config_mapping,
                                       pipeline_run, instance)
    execution_manager.join()
    assert instance.get_run_by_id(run_id).status == PipelineRunStatus.SUCCESS

    environment_dict = {
        'solids': {
            'composite_with_nested_config_solid_and_config_mapping': {
                'config': {
                    'foo': 'baz',
                    'bar': 3
                }
            }
        },
        'execution': {
            'multiprocess': {}
        },
        'storage': {
            'filesystem': {}
        },
    }

    run_id = make_new_run_id()

    pipeline_run = instance.create_run(
        PipelineRun(
            pipeline_name=composite_pipeline.name,
            run_id=run_id,
            selector=ExecutionSelector('nonce'),
            environment_dict=environment_dict,
            mode='default',
            step_keys_to_execute=None,
            tags=None,
            status=PipelineRunStatus.NOT_STARTED,
        ))
    execution_manager = SubprocessExecutionManager(instance)
    execution_manager.execute_pipeline(handle, composite_pipeline,
                                       pipeline_run, instance)

    execution_manager.join()
    assert instance.get_run_by_id(run_id).status == PipelineRunStatus.SUCCESS
Ejemplo n.º 8
0
def test_two_runs_running():
    run_id_one = make_new_run_id()
    run_id_two = make_new_run_id()
    handle = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'infinite_loop_pipeline')

    with get_temp_file_locations(2) as files:
        file_one, file_two = files  # pylint: disable=unbalanced-tuple-unpacking

        instance = DagsterInstance.local_temp()

        execution_manager = SubprocessExecutionManager(instance)

        pipeline_run_one = instance.create_run(
            PipelineRun.create_empty_run(
                pipeline_name=infinite_loop_pipeline.name,
                run_id=run_id_one,
                environment_dict={
                    'solids': {
                        'loop': {
                            'config': {
                                'file': file_one
                            }
                        }
                    }
                },
            ))
        execution_manager.execute_pipeline(handle,
                                           infinite_loop_pipeline,
                                           pipeline_run_one,
                                           instance,
                                           raise_on_error=False)

        pipeline_run_two = instance.create_run(
            PipelineRun.create_empty_run(
                pipeline_name=infinite_loop_pipeline.name,
                run_id=run_id_two,
                environment_dict={
                    'solids': {
                        'loop': {
                            'config': {
                                'file': file_two
                            }
                        }
                    }
                },
            ))

        execution_manager.execute_pipeline(handle,
                                           infinite_loop_pipeline,
                                           pipeline_run_two,
                                           instance,
                                           raise_on_error=False)

        # ensure both runs have begun execution
        while not os.path.exists(file_one) and not os.path.exists(file_two):
            time.sleep(0.1)

        assert execution_manager.is_process_running(run_id_one)
        assert execution_manager.is_process_running(run_id_two)

        assert execution_manager.terminate(run_id_one)

        assert not execution_manager.is_process_running(run_id_one)
        assert execution_manager.is_process_running(run_id_two)

        assert execution_manager.terminate(run_id_two)

        assert not execution_manager.is_process_running(run_id_one)
        assert not execution_manager.is_process_running(run_id_two)
Ejemplo n.º 9
0
def define_subprocess_context(instance):
    return DagsterGraphQLContext(
        handle=ExecutionTargetHandle.for_repo_fn(define_repository),
        instance=instance,
        execution_manager=SubprocessExecutionManager(instance),
    )
Ejemplo n.º 10
0
def test_execute_hammer_through_dagit():
    recon_repo = ReconstructableRepository.for_file(
        file_relative_path(
            __file__, '../../../../examples/dagster_examples/toys/hammer.py'),
        'hammer_pipeline',
    )
    instance = DagsterInstance.local_temp()

    execution_manager = SubprocessExecutionManager(instance)

    context = DagsterGraphQLContext(
        environments=[
            InProcessDagsterEnvironment(
                recon_repo,
                execution_manager=execution_manager,
            )
        ],
        instance=instance,
    )

    executor = SyncExecutor()

    variables = {
        'executionParams': {
            'environmentConfigData': {
                'storage': {
                    'filesystem': {}
                },
                'execution': {
                    'dask': {}
                }
            },
            'selector': {
                'name': 'hammer_pipeline'
            },
            'mode': 'default',
        }
    }

    start_pipeline_result = graphql(
        request_string=START_PIPELINE_EXECUTION_MUTATION,
        schema=create_schema(),
        context=context,
        variables=variables,
        executor=executor,
    )

    if start_pipeline_result.errors:
        raise Exception('{}'.format(start_pipeline_result.errors))

    run_id = start_pipeline_result.data['startPipelineExecution']['run'][
        'runId']

    context.legacy_environment.execution_manager.join()

    subscription = execute_dagster_graphql(context,
                                           SUBSCRIPTION_QUERY,
                                           variables={'runId': run_id})

    subscribe_results = []
    subscription.subscribe(subscribe_results.append)

    messages = [
        x['__typename']
        for x in subscribe_results[0].data['pipelineRunLogs']['messages']
    ]

    assert 'PipelineStartEvent' in messages
    assert 'PipelineSuccessEvent' in messages
Ejemplo n.º 11
0
def test_multiprocessing_execution_for_composite_solid():
    environment_dict = {
        'solids': {
            'composite_with_nested_config_solid': {
                'solids': {'node_a': {'config': {'foo': 'baz'}}, 'node_b': {'config': {'bar': 3}}}
            }
        }
    }

    instance = DagsterInstance.local_temp()
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=composite_pipeline, environment_dict=environment_dict,
    )
    execution_manager = SubprocessExecutionManager(instance)
    execution_manager.execute_pipeline(reconstructable(composite_pipeline), pipeline_run, instance)
    execution_manager.join()
    assert instance.get_run_by_id(pipeline_run.run_id).status == PipelineRunStatus.SUCCESS

    environment_dict = {
        'solids': {
            'composite_with_nested_config_solid': {
                'solids': {'node_a': {'config': {'foo': 'baz'}}, 'node_b': {'config': {'bar': 3}}}
            }
        },
        'execution': {'multiprocess': {}},
        'storage': {'filesystem': {}},
    }

    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=composite_pipeline, environment_dict=environment_dict,
    )
    execution_manager = SubprocessExecutionManager(instance)
    execution_manager.execute_pipeline(reconstructable(composite_pipeline), pipeline_run, instance)
    execution_manager.join()
Ejemplo n.º 12
0
def define_subprocess_context_for_file(python_file, fn_name, instance=None):
    return DagsterGraphQLInProcessRepositoryContext(
        handle=ExecutionTargetHandle.for_repo_python_file(python_file, fn_name),
        instance=instance or DagsterInstance.ephemeral(),
        execution_manager=SubprocessExecutionManager(instance),
    )