def create_app(handle, instance): check.inst_param(handle, 'handle', ExecutionTargetHandle) check.inst_param(instance, 'instance', DagsterInstance) app = Flask('dagster-ui') sockets = Sockets(app) app.app_protocol = lambda environ_path_info: 'graphql-ws' schema = create_schema() subscription_server = DagsterSubscriptionServer(schema=schema) execution_manager = MultiprocessingExecutionManager() print('Loading repository...') context = DagsterGraphQLContext(handle=handle, instance=instance, execution_manager=execution_manager, version=__version__) app.add_url_rule( '/graphql', 'graphql', DagsterGraphQLView.as_view( 'graphql', schema=schema, graphiql=True, # XXX(freiksenet): Pass proper ws url graphiql_template=PLAYGROUND_TEMPLATE, executor=Executor(), context=context, ), ) sockets.add_url_rule( '/graphql', 'graphql', dagster_graphql_subscription_view(subscription_server, context)) app.add_url_rule( # should match the `build_local_download_url` '/download/<string:run_id>/<string:step_key>/<string:file_type>', 'download_view', download_view(context), ) # these routes are specifically for the Dagit UI and are not part of the graphql # API that we want other people to consume, so they're separate for now. # Also grabbing the magic global request args dict so that notebook_view is testable app.add_url_rule('/dagit/notebook', 'notebook', lambda: notebook_view(request.args)) app.add_url_rule('/static/<path:path>/<string:file>', 'static_view', static_view) app.add_url_rule('/vendor/<path:path>/<string:file>', 'vendor_view', vendor_view) app.add_url_rule('/<path:_path>', 'index_catchall', index_view) app.add_url_rule('/', 'index', index_view, defaults={'_path': ''}) CORS(app) return app
def create_app(handle, pipeline_run_storage, use_synchronous_execution_manager=False): check.inst_param(handle, 'handle', ExecutionTargetHandle) check.inst_param(pipeline_run_storage, 'pipeline_run_storage', PipelineRunStorage) check.bool_param(use_synchronous_execution_manager, 'use_synchronous_execution_manager') app = Flask('dagster-ui') sockets = Sockets(app) app.app_protocol = lambda environ_path_info: 'graphql-ws' schema = create_schema() subscription_server = DagsterSubscriptionServer(schema=schema) if use_synchronous_execution_manager: execution_manager = SynchronousExecutionManager() else: execution_manager = MultiprocessingExecutionManager() context = DagsterGraphQLContext( handle=handle, pipeline_runs=pipeline_run_storage, execution_manager=execution_manager, version=__version__, ) app.add_url_rule( '/graphql', 'graphql', DagsterGraphQLView.as_view( 'graphql', schema=schema, graphiql=True, # XXX(freiksenet): Pass proper ws url graphiql_template=PLAYGROUND_TEMPLATE, executor=Executor(), context=context, ), ) sockets.add_url_rule( '/graphql', 'graphql', dagster_graphql_subscription_view(subscription_server, context)) # these routes are specifically for the Dagit UI and are not part of the graphql # API that we want other people to consume, so they're separate for now. # Also grabbing the magic glabl request args dict so that notebook_view is testable app.add_url_rule('/dagit/notebook', 'notebook', lambda: notebook_view(request.args)) app.add_url_rule('/static/<path:path>/<string:file>', 'static_view', static_view) app.add_url_rule('/<path:_path>', 'index_catchall', index_view) app.add_url_rule('/', 'index', index_view, defaults={'_path': ''}) CORS(app) return app
def test_running(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_fn(define_passing_pipeline) pipeline = define_passing_pipeline() env_config = { 'solids': {'sum_solid': {'inputs': {'num': script_relative_path('data/num.csv')}}} } selector = ExecutionSelector('csv_hello_world') pipeline_run = InMemoryPipelineRun( run_id, selector, env_config, mode='default', reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, pipeline, pipeline_run, raise_on_error=False) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.SUCCESS events = pipeline_run.all_logs() assert events process_start_events = get_events_of_type(events, DagsterEventType.PIPELINE_PROCESS_START) assert len(process_start_events) == 1 process_started_events = get_events_of_type(events, DagsterEventType.PIPELINE_PROCESS_STARTED) assert len(process_started_events) == 1
def test_execution_crash(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_fn(define_crashy_pipeline) pipeline = define_crashy_pipeline() env_config = { 'solids': {'sum_solid': {'inputs': {'num': script_relative_path('data/num.csv')}}} } selector = ExecutionSelector('csv_hello_world') pipeline_run = InMemoryPipelineRun( run_id, selector, env_config, mode='default', reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, pipeline, pipeline_run, raise_on_error=False) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.FAILURE last_log = pipeline_run.all_logs()[-1] print(last_log.message) assert last_log.message.startswith( 'Exception: Pipeline execution process for {run_id} unexpectedly exited\n'.format( run_id=run_id ) )
def test_failing(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'failing_pipeline') env_config = { 'solids': { 'sum_solid': { 'inputs': { 'num': script_relative_path('data/num.csv') } } } } selector = ExecutionSelector('csv_hello_world') run_storage = InMemoryRunStorage() pipeline_run = run_storage.create_run( run_storage=run_storage, pipeline_name=failing_pipeline.name, run_id=run_id, selector=selector, env_config=env_config, mode='default', reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, failing_pipeline, pipeline_run, raise_on_error=False) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.FAILURE assert pipeline_run.all_logs()
def test_failing(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_fn(define_failing_pipeline) pipeline = define_failing_pipeline() env_config = { 'solids': { 'sum_solid': { 'inputs': { 'num': script_relative_path('data/num.csv') } } } } selector = ExecutionSelector('csv_hello_world') pipeline_run = InMemoryPipelineRun( run_id, selector, env_config, mode='default', execution_plan=create_execution_plan(pipeline, env_config), reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, pipeline, pipeline_run, raise_on_error=False) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.FAILURE assert pipeline_run.all_logs()
def create_app(repository_container, pipeline_runs, use_synchronous_execution_manager=False): app = Flask('dagster-ui') sockets = Sockets(app) app.app_protocol = lambda environ_path_info: 'graphql-ws' schema = create_schema() subscription_server = DagsterSubscriptionServer(schema=schema) if use_synchronous_execution_manager: execution_manager = SynchronousExecutionManager() else: execution_manager = MultiprocessingExecutionManager() context = DagsterGraphQLContext( repository_container=repository_container, pipeline_runs=pipeline_runs, execution_manager=execution_manager, version=__version__, ) app.add_url_rule( '/graphql', 'graphql', DagsterGraphQLView.as_view( 'graphql', schema=schema, graphiql=True, # XXX(freiksenet): Pass proper ws url graphiql_template=PLAYGROUND_TEMPLATE, executor=Executor(), context=context, ), ) sockets.add_url_rule( '/graphql', 'graphql', dagster_graphql_subscription_view(subscription_server, context) ) # these routes are specifically for the Dagit UI and are not part of the graphql # API that we want other people to consume, so they're separate for now. app.add_url_rule('/dagit/notebook', 'notebook', notebook_view) app.add_url_rule('/static/<path:path>/<string:file>', 'static_view', static_view) app.add_url_rule('/<path:_path>', 'index_catchall', index_view) app.add_url_rule('/', 'index', index_view, defaults={'_path': ''}) CORS(app) return app
def test_running(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'passing_pipeline') environment_dict = { 'solids': { 'sum_solid': { 'inputs': { 'num': script_relative_path('data/num.csv') } } } } selector = ExecutionSelector('csv_hello_world') instance = DagsterInstance.local_temp() pipeline_run = instance.create_run( PipelineRun( pipeline_name=passing_pipeline.name, run_id=run_id, selector=selector, environment_dict=environment_dict, mode='default', reexecution_config=None, step_keys_to_execute=None, tags=None, status=PipelineRunStatus.NOT_STARTED, )) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, passing_pipeline, pipeline_run, instance, raise_on_error=False) execution_manager.join() assert instance.get_run(run_id).status == PipelineRunStatus.SUCCESS events = instance.all_logs(run_id) assert events process_start_events = get_events_of_type( events, DagsterEventType.PIPELINE_PROCESS_START) assert len(process_start_events) == 1 process_started_events = get_events_of_type( events, DagsterEventType.PIPELINE_PROCESS_STARTED) assert len(process_started_events) == 1 process_exited_events = get_events_of_type( events, DagsterEventType.PIPELINE_PROCESS_EXITED) assert len(process_exited_events) == 1
def test_running(): run_id = make_new_run_id() repository_container = RepositoryContainer( RepositoryTargetInfo( repository_yaml=None, python_file=__file__, fn_name='define_passing_pipeline', module_name=None, )) pipeline = define_passing_pipeline() env_config = { 'solids': { 'sum_solid': { 'inputs': { 'num': { 'csv': { 'path': script_relative_path('num.csv') } } } } } } selector = ExecutionSelector('pandas_hello_world') pipeline_run = InMemoryPipelineRun( run_id, selector, env_config, create_execution_plan(pipeline, env_config), reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(repository_container, pipeline, pipeline_run, raise_on_error=False) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.SUCCESS events = pipeline_run.all_logs() assert events process_start_events = get_events_of_type( events, DagsterEventType.PIPELINE_PROCESS_START) assert len(process_start_events) == 1 process_started_events = get_events_of_type( events, DagsterEventType.PIPELINE_PROCESS_STARTED) assert len(process_started_events) == 1
def test_execution_crash(): run_id = make_new_run_id() repository_container = RepositoryContainer( RepositoryTargetInfo( repository_yaml=None, python_file=__file__, fn_name='define_crashy_pipeline', module_name=None, )) pipeline = define_crashy_pipeline() env_config = { 'solids': { 'sum_solid': { 'inputs': { 'num': { 'csv': { 'path': script_relative_path('num.csv') } } } } } } selector = ExecutionSelector('pandas_hello_world') pipeline_run = InMemoryPipelineRun( run_id, selector, env_config, create_execution_plan(pipeline, env_config), reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(repository_container, pipeline, pipeline_run, raise_on_error=False) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.FAILURE last_log = pipeline_run.all_logs()[-1] print(last_log.message) assert last_log.message.startswith( 'Exception: Pipeline execution process for {run_id} unexpectedly exited\n' .format(run_id=run_id))
def test_execution_crash(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'crashy_pipeline') environment_dict = { 'solids': { 'sum_solid': { 'inputs': { 'num': script_relative_path('data/num.csv') } } } } selector = ExecutionSelector('csv_hello_world') instance = DagsterInstance.local_temp() pipeline_run = instance.create_run( PipelineRun( pipeline_name=crashy_pipeline.name, run_id=run_id, selector=selector, environment_dict=environment_dict, mode='default', reexecution_config=None, step_keys_to_execute=None, tags=None, status=PipelineRunStatus.NOT_STARTED, )) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, crashy_pipeline, pipeline_run, instance, raise_on_error=False) execution_manager.join() assert instance.get_run(run_id).status == PipelineRunStatus.FAILURE last_log = instance.all_logs(run_id)[-1] assert last_log.message.startswith( 'Exception: Pipeline execution process for {run_id} unexpectedly exited\n' .format(run_id=run_id))
def test_failing(): run_id = make_new_run_id() repository_container = RepositoryContainer( RepositoryTargetInfo( repository_yaml=None, python_file=__file__, fn_name='define_failing_pipeline', module_name=None, )) pipeline = define_failing_pipeline() env_config = { 'solids': { 'sum_solid': { 'inputs': { 'num': { 'csv': { 'path': script_relative_path('num.csv') } } } } } } selector = ExecutionSelector('pandas_hello_world') pipeline_run = InMemoryPipelineRun( run_id, selector, env_config, create_execution_plan(pipeline, env_config), reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(repository_container, pipeline, pipeline_run, raise_on_error=False) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.FAILURE assert pipeline_run.all_logs()
def test_failing(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'failing_pipeline') environment_dict = { 'solids': { 'sum_solid': { 'inputs': { 'num': script_relative_path('data/num.csv') } } } } selector = ExecutionSelector('csv_hello_world') instance = DagsterInstance.ephemeral() pipeline_run = instance.create_run( PipelineRun( pipeline_name=failing_pipeline.name, run_id=run_id, selector=selector, environment_dict=environment_dict, mode='default', reexecution_config=None, step_keys_to_execute=None, status=PipelineRunStatus.NOT_STARTED, )) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, failing_pipeline, pipeline_run, instance, raise_on_error=False) execution_manager.join() assert instance.get_run(run_id).status == PipelineRunStatus.FAILURE assert instance.all_logs(run_id)
def test_multiprocessing_execution_for_composite_solid_with_config_mapping(): environment_dict = { 'solids': { 'composite_with_nested_config_solid_and_config_mapping': { 'config': { 'foo': 'baz', 'bar': 3 } } } } run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'composite_pipeline_with_config_mapping') instance = DagsterInstance.local_temp() pipeline_run = instance.create_run( PipelineRun( pipeline_name=composite_pipeline_with_config_mapping.name, run_id=run_id, selector=ExecutionSelector('nonce'), environment_dict=environment_dict, mode='default', reexecution_config=None, step_keys_to_execute=None, tags=None, status=PipelineRunStatus.NOT_STARTED, )) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, composite_pipeline_with_config_mapping, pipeline_run, instance, raise_on_error=False) execution_manager.join() assert instance.get_run(run_id).status == PipelineRunStatus.SUCCESS environment_dict = { 'solids': { 'composite_with_nested_config_solid_and_config_mapping': { 'config': { 'foo': 'baz', 'bar': 3 } } }, 'execution': { 'multiprocess': {} }, 'storage': { 'filesystem': {} }, } run_id = make_new_run_id() pipeline_run = instance.create_run( PipelineRun( pipeline_name=composite_pipeline.name, run_id=run_id, selector=ExecutionSelector('nonce'), environment_dict=environment_dict, mode='default', reexecution_config=None, step_keys_to_execute=None, tags=None, status=PipelineRunStatus.NOT_STARTED, )) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, composite_pipeline, pipeline_run, instance, raise_on_error=False) execution_manager.join() assert instance.get_run(run_id).status == PipelineRunStatus.SUCCESS
def test_multiprocessing_execution_for_composite_solid(): environment_dict = { 'solids': { 'composite_with_nested_config_solid': { 'solids': { 'node_a': { 'config': { 'foo': 'baz' } }, 'node_b': { 'config': { 'bar': 3 } } } } } } run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'composite_pipeline') run_storage = InMemoryRunStorage() pipeline_run = run_storage.create_run( run_storage=run_storage, pipeline_name=composite_pipeline.name, run_id=run_id, selector=ExecutionSelector('nonce'), env_config=environment_dict, mode='default', reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, composite_pipeline, pipeline_run, raise_on_error=False) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.SUCCESS environment_dict = { 'solids': { 'composite_with_nested_config_solid': { 'solids': { 'node_a': { 'config': { 'foo': 'baz' } }, 'node_b': { 'config': { 'bar': 3 } } } } }, 'execution': { 'multiprocess': {} }, 'storage': { 'filesystem': {} }, } run_id = make_new_run_id() pipeline_run = run_storage.create_run( run_storage=run_storage, pipeline_name=composite_pipeline.name, run_id=run_id, selector=ExecutionSelector('nonce'), env_config=environment_dict, mode='default', reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, composite_pipeline, pipeline_run, raise_on_error=False) execution_manager.join()
def test_execute_hammer_through_dagit(): handle = ExecutionTargetHandle.for_pipeline_python_file( script_relative_path( '../../../examples/dagster_examples/toys/hammer.py'), 'hammer_pipeline') instance = DagsterInstance.local_temp() execution_manager = MultiprocessingExecutionManager() context = DagsterGraphQLContext(handle=handle, execution_manager=execution_manager, instance=instance) executor = SyncExecutor() variables = { 'executionParams': { 'environmentConfigData': { 'storage': { 'filesystem': {} }, 'execution': { 'dask': {} } }, 'selector': { 'name': handle.build_pipeline_definition().name }, 'mode': 'default', } } start_pipeline_result = graphql( request_string=START_PIPELINE_EXECUTION_MUTATION, schema=create_schema(), context=context, variables=variables, executor=executor, ) run_id = start_pipeline_result.data['startPipelineExecution']['run'][ 'runId'] context.execution_manager.join() subscription = execute_dagster_graphql(context, SUBSCRIPTION_QUERY, variables={'runId': run_id}) subscribe_results = [] subscription.subscribe(subscribe_results.append) messages = [ x['__typename'] for x in subscribe_results[0].data['pipelineRunLogs']['messages'] ] assert 'PipelineProcessStartEvent' in messages assert 'PipelineProcessStartedEvent' in messages assert 'PipelineStartEvent' in messages assert 'PipelineSuccessEvent' in messages assert 'PipelineProcessExitedEvent' in messages