def test_running(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file(__file__, 'passing_pipeline') environment_dict = { 'solids': {'sum_solid': {'inputs': {'num': script_relative_path('data/num.csv')}}} } selector = ExecutionSelector('csv_hello_world') instance = DagsterInstance.local_temp() pipeline_run = instance.create_run( PipelineRun( pipeline_name=passing_pipeline.name, run_id=run_id, selector=selector, environment_dict=environment_dict, mode='default', reexecution_config=None, step_keys_to_execute=None, tags=None, status=PipelineRunStatus.NOT_STARTED, ) ) execution_manager = SubprocessExecutionManager(instance) execution_manager.execute_pipeline(handle, passing_pipeline, pipeline_run, instance) execution_manager.join() assert instance.get_run_by_id(run_id).status == PipelineRunStatus.SUCCESS events = instance.all_logs(run_id) assert events process_start_events = get_events_of_type(events, DagsterEventType.PIPELINE_PROCESS_START) assert len(process_start_events) == 1 process_started_events = get_events_of_type(events, DagsterEventType.PIPELINE_PROCESS_STARTED) assert len(process_started_events) == 1 process_exited_events = get_events_of_type(events, DagsterEventType.PIPELINE_PROCESS_EXITED) assert len(process_exited_events) == 1
def test_execution_crash(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'crashy_pipeline') environment_dict = { 'solids': { 'sum_solid': { 'inputs': { 'num': file_relative_path(__file__, 'data/num.csv') } } } } selector = ExecutionSelector('csv_hello_world') instance = DagsterInstance.local_temp() pipeline_run = instance.create_run( PipelineRun( pipeline_name=crashy_pipeline.name, run_id=run_id, selector=selector, environment_dict=environment_dict, mode='default', step_keys_to_execute=None, tags=None, status=PipelineRunStatus.NOT_STARTED, )) execution_manager = SubprocessExecutionManager(instance) execution_manager.execute_pipeline(handle, crashy_pipeline, pipeline_run, instance) execution_manager.join() assert instance.get_run_by_id(run_id).status == PipelineRunStatus.FAILURE last_log = instance.all_logs(run_id)[-1] assert last_log.message.startswith( 'Exception: Pipeline execution process for {run_id} unexpectedly exited\n' .format(run_id=run_id))
def test_two_runs_running(): handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'infinite_loop_pipeline') with safe_tempfile_path() as file_one, safe_tempfile_path() as file_two: instance = DagsterInstance.local_temp() execution_manager = SubprocessExecutionManager(instance) pipeline_run_one = instance.create_run_for_pipeline( pipeline_def=infinite_loop_pipeline, environment_dict={ 'solids': { 'loop': { 'config': { 'file': file_one } } } }, ) execution_manager.execute_pipeline(handle, infinite_loop_pipeline, pipeline_run_one, instance) pipeline_run_two = instance.create_run_for_pipeline( pipeline_def=infinite_loop_pipeline, environment_dict={ 'solids': { 'loop': { 'config': { 'file': file_two } } } }, ) execution_manager.execute_pipeline(handle, infinite_loop_pipeline, pipeline_run_two, instance) # ensure both runs have begun execution while not os.path.exists(file_one) and not os.path.exists(file_two): time.sleep(0.1) assert execution_manager.is_process_running(pipeline_run_one.run_id) assert execution_manager.is_process_running(pipeline_run_two.run_id) assert execution_manager.terminate(pipeline_run_one.run_id) assert not execution_manager.is_process_running( pipeline_run_one.run_id) assert execution_manager.is_process_running(pipeline_run_two.run_id) assert execution_manager.terminate(pipeline_run_two.run_id) assert not execution_manager.is_process_running( pipeline_run_one.run_id) assert not execution_manager.is_process_running( pipeline_run_two.run_id)
def test_multiprocessing_execution_for_composite_solid_with_config_mapping(): environment_dict = { 'solids': { 'composite_with_nested_config_solid_and_config_mapping': { 'config': { 'foo': 'baz', 'bar': 3 } } } } handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'composite_pipeline_with_config_mapping') instance = DagsterInstance.local_temp() pipeline_run = instance.create_run_for_pipeline( pipeline_def=composite_pipeline_with_config_mapping, environment_dict=environment_dict, ) execution_manager = SubprocessExecutionManager(instance) execution_manager.execute_pipeline(handle, composite_pipeline_with_config_mapping, pipeline_run, instance) execution_manager.join() assert instance.get_run_by_id( pipeline_run.run_id).status == PipelineRunStatus.SUCCESS environment_dict = { 'solids': { 'composite_with_nested_config_solid_and_config_mapping': { 'config': { 'foo': 'baz', 'bar': 3 } } }, 'execution': { 'multiprocess': {} }, 'storage': { 'filesystem': {} }, } pipeline_run = instance.create_run_for_pipeline( pipeline_def=composite_pipeline_with_config_mapping, environment_dict=environment_dict, ) execution_manager = SubprocessExecutionManager(instance) execution_manager.execute_pipeline(handle, composite_pipeline_with_config_mapping, pipeline_run, instance) execution_manager.join() assert instance.get_run_by_id( pipeline_run.run_id).status == PipelineRunStatus.SUCCESS
def test_execute_hammer_through_dagit(): handle = ExecutionTargetHandle.for_pipeline_python_file( file_relative_path( __file__, '../../../../examples/dagster_examples/toys/hammer.py'), 'hammer_pipeline', ) instance = DagsterInstance.local_temp() execution_manager = SubprocessExecutionManager(instance) context = DagsterGraphQLContext(handle=handle, execution_manager=execution_manager, instance=instance) executor = SyncExecutor() variables = { 'executionParams': { 'environmentConfigData': { 'storage': { 'filesystem': {} }, 'execution': { 'dask': {} } }, 'selector': { 'name': handle.build_pipeline_definition().name }, 'mode': 'default', } } start_pipeline_result = graphql( request_string=START_PIPELINE_EXECUTION_MUTATION, schema=create_schema(), context=context, variables=variables, executor=executor, ) run_id = start_pipeline_result.data['startPipelineExecution']['run'][ 'runId'] context.execution_manager.join() subscription = execute_dagster_graphql(context, SUBSCRIPTION_QUERY, variables={'runId': run_id}) subscribe_results = [] subscription.subscribe(subscribe_results.append) messages = [ x['__typename'] for x in subscribe_results[0].data['pipelineRunLogs']['messages'] ] assert 'PipelineProcessStartEvent' in messages assert 'PipelineProcessStartedEvent' in messages assert 'PipelineStartEvent' in messages assert 'PipelineSuccessEvent' in messages assert 'PipelineProcessExitedEvent' in messages
def create_app(handle, instance, reloader=None): check.inst_param(handle, 'handle', ExecutionTargetHandle) check.inst_param(instance, 'instance', DagsterInstance) check.opt_inst_param(reloader, 'reloader', Reloader) app = Flask('dagster-ui') sockets = Sockets(app) app.app_protocol = lambda environ_path_info: 'graphql-ws' schema = create_schema() subscription_server = DagsterSubscriptionServer(schema=schema) execution_manager_settings = instance.dagit_settings.get( 'execution_manager') if execution_manager_settings and execution_manager_settings.get( 'max_concurrent_runs'): execution_manager = QueueingSubprocessExecutionManager( instance, execution_manager_settings.get('max_concurrent_runs')) else: execution_manager = SubprocessExecutionManager(instance) warn_if_compute_logs_disabled() print('Loading repository...') context = DagsterGraphQLContext( handle=handle, instance=instance, execution_manager=execution_manager, reloader=reloader, version=__version__, ) # Automatically initialize scheduler everytime Dagit loads scheduler_handle = context.scheduler_handle scheduler = instance.scheduler if scheduler_handle: if scheduler: handle = context.get_handle() python_path = sys.executable repository_path = handle.data.repository_yaml repository = context.get_repository() scheduler_handle.up(python_path, repository_path, repository=repository, instance=instance) else: warnings.warn(MISSING_SCHEDULER_WARNING) app.add_url_rule( '/graphql', 'graphql', DagsterGraphQLView.as_view( 'graphql', schema=schema, graphiql=True, # XXX(freiksenet): Pass proper ws url graphiql_template=PLAYGROUND_TEMPLATE, executor=Executor(), context=context, ), ) sockets.add_url_rule( '/graphql', 'graphql', dagster_graphql_subscription_view(subscription_server, context)) app.add_url_rule( # should match the `build_local_download_url` '/download/<string:run_id>/<string:step_key>/<string:file_type>', 'download_view', download_view(context), ) # these routes are specifically for the Dagit UI and are not part of the graphql # API that we want other people to consume, so they're separate for now. # Also grabbing the magic global request args dict so that notebook_view is testable app.add_url_rule('/dagit/notebook', 'notebook', lambda: notebook_view(request.args)) app.add_url_rule('/static/<path:path>/<string:file>', 'static_view', static_view) app.add_url_rule('/vendor/<path:path>/<string:file>', 'vendor_view', vendor_view) app.add_url_rule('/<string:worker_name>.worker.js', 'worker_view', worker_view) app.add_url_rule('/dagit_info', 'sanity_view', info_view) app.add_url_rule('/<path:_path>', 'index_catchall', index_view) app.add_url_rule('/', 'index', index_view, defaults={'_path': ''}) CORS(app) return app
def test_multiprocessing_execution_for_composite_solid_with_config_mapping(): environment_dict = { 'solids': { 'composite_with_nested_config_solid_and_config_mapping': { 'config': { 'foo': 'baz', 'bar': 3 } } } } run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'composite_pipeline_with_config_mapping') instance = DagsterInstance.local_temp() pipeline_run = instance.create_run( PipelineRun( pipeline_name=composite_pipeline_with_config_mapping.name, run_id=run_id, selector=ExecutionSelector('nonce'), environment_dict=environment_dict, mode='default', step_keys_to_execute=None, tags=None, status=PipelineRunStatus.NOT_STARTED, )) execution_manager = SubprocessExecutionManager(instance) execution_manager.execute_pipeline(handle, composite_pipeline_with_config_mapping, pipeline_run, instance) execution_manager.join() assert instance.get_run_by_id(run_id).status == PipelineRunStatus.SUCCESS environment_dict = { 'solids': { 'composite_with_nested_config_solid_and_config_mapping': { 'config': { 'foo': 'baz', 'bar': 3 } } }, 'execution': { 'multiprocess': {} }, 'storage': { 'filesystem': {} }, } run_id = make_new_run_id() pipeline_run = instance.create_run( PipelineRun( pipeline_name=composite_pipeline.name, run_id=run_id, selector=ExecutionSelector('nonce'), environment_dict=environment_dict, mode='default', step_keys_to_execute=None, tags=None, status=PipelineRunStatus.NOT_STARTED, )) execution_manager = SubprocessExecutionManager(instance) execution_manager.execute_pipeline(handle, composite_pipeline, pipeline_run, instance) execution_manager.join() assert instance.get_run_by_id(run_id).status == PipelineRunStatus.SUCCESS
def test_two_runs_running(): run_id_one = make_new_run_id() run_id_two = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'infinite_loop_pipeline') with get_temp_file_locations(2) as files: file_one, file_two = files # pylint: disable=unbalanced-tuple-unpacking instance = DagsterInstance.local_temp() execution_manager = SubprocessExecutionManager(instance) pipeline_run_one = instance.create_run( PipelineRun.create_empty_run( pipeline_name=infinite_loop_pipeline.name, run_id=run_id_one, environment_dict={ 'solids': { 'loop': { 'config': { 'file': file_one } } } }, )) execution_manager.execute_pipeline(handle, infinite_loop_pipeline, pipeline_run_one, instance, raise_on_error=False) pipeline_run_two = instance.create_run( PipelineRun.create_empty_run( pipeline_name=infinite_loop_pipeline.name, run_id=run_id_two, environment_dict={ 'solids': { 'loop': { 'config': { 'file': file_two } } } }, )) execution_manager.execute_pipeline(handle, infinite_loop_pipeline, pipeline_run_two, instance, raise_on_error=False) # ensure both runs have begun execution while not os.path.exists(file_one) and not os.path.exists(file_two): time.sleep(0.1) assert execution_manager.is_process_running(run_id_one) assert execution_manager.is_process_running(run_id_two) assert execution_manager.terminate(run_id_one) assert not execution_manager.is_process_running(run_id_one) assert execution_manager.is_process_running(run_id_two) assert execution_manager.terminate(run_id_two) assert not execution_manager.is_process_running(run_id_one) assert not execution_manager.is_process_running(run_id_two)
def define_subprocess_context(instance): return DagsterGraphQLContext( handle=ExecutionTargetHandle.for_repo_fn(define_repository), instance=instance, execution_manager=SubprocessExecutionManager(instance), )
def test_execute_hammer_through_dagit(): recon_repo = ReconstructableRepository.for_file( file_relative_path( __file__, '../../../../examples/dagster_examples/toys/hammer.py'), 'hammer_pipeline', ) instance = DagsterInstance.local_temp() execution_manager = SubprocessExecutionManager(instance) context = DagsterGraphQLContext( environments=[ InProcessDagsterEnvironment( recon_repo, execution_manager=execution_manager, ) ], instance=instance, ) executor = SyncExecutor() variables = { 'executionParams': { 'environmentConfigData': { 'storage': { 'filesystem': {} }, 'execution': { 'dask': {} } }, 'selector': { 'name': 'hammer_pipeline' }, 'mode': 'default', } } start_pipeline_result = graphql( request_string=START_PIPELINE_EXECUTION_MUTATION, schema=create_schema(), context=context, variables=variables, executor=executor, ) if start_pipeline_result.errors: raise Exception('{}'.format(start_pipeline_result.errors)) run_id = start_pipeline_result.data['startPipelineExecution']['run'][ 'runId'] context.legacy_environment.execution_manager.join() subscription = execute_dagster_graphql(context, SUBSCRIPTION_QUERY, variables={'runId': run_id}) subscribe_results = [] subscription.subscribe(subscribe_results.append) messages = [ x['__typename'] for x in subscribe_results[0].data['pipelineRunLogs']['messages'] ] assert 'PipelineStartEvent' in messages assert 'PipelineSuccessEvent' in messages
def test_multiprocessing_execution_for_composite_solid(): environment_dict = { 'solids': { 'composite_with_nested_config_solid': { 'solids': {'node_a': {'config': {'foo': 'baz'}}, 'node_b': {'config': {'bar': 3}}} } } } instance = DagsterInstance.local_temp() pipeline_run = instance.create_run_for_pipeline( pipeline_def=composite_pipeline, environment_dict=environment_dict, ) execution_manager = SubprocessExecutionManager(instance) execution_manager.execute_pipeline(reconstructable(composite_pipeline), pipeline_run, instance) execution_manager.join() assert instance.get_run_by_id(pipeline_run.run_id).status == PipelineRunStatus.SUCCESS environment_dict = { 'solids': { 'composite_with_nested_config_solid': { 'solids': {'node_a': {'config': {'foo': 'baz'}}, 'node_b': {'config': {'bar': 3}}} } }, 'execution': {'multiprocess': {}}, 'storage': {'filesystem': {}}, } pipeline_run = instance.create_run_for_pipeline( pipeline_def=composite_pipeline, environment_dict=environment_dict, ) execution_manager = SubprocessExecutionManager(instance) execution_manager.execute_pipeline(reconstructable(composite_pipeline), pipeline_run, instance) execution_manager.join()
def define_subprocess_context_for_file(python_file, fn_name, instance=None): return DagsterGraphQLInProcessRepositoryContext( handle=ExecutionTargetHandle.for_repo_python_file(python_file, fn_name), instance=instance or DagsterInstance.ephemeral(), execution_manager=SubprocessExecutionManager(instance), )