def test_running(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_fn(define_passing_pipeline) pipeline = define_passing_pipeline() env_config = { 'solids': {'sum_solid': {'inputs': {'num': script_relative_path('data/num.csv')}}} } selector = ExecutionSelector('csv_hello_world') pipeline_run = InMemoryPipelineRun( run_id, selector, env_config, mode='default', reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, pipeline, pipeline_run, raise_on_error=False) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.SUCCESS events = pipeline_run.all_logs() assert events process_start_events = get_events_of_type(events, DagsterEventType.PIPELINE_PROCESS_START) assert len(process_start_events) == 1 process_started_events = get_events_of_type(events, DagsterEventType.PIPELINE_PROCESS_STARTED) assert len(process_started_events) == 1
def test_failing(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'failing_pipeline') env_config = { 'solids': { 'sum_solid': { 'inputs': { 'num': script_relative_path('data/num.csv') } } } } selector = ExecutionSelector('csv_hello_world') run_storage = InMemoryRunStorage() pipeline_run = run_storage.create_run( run_storage=run_storage, pipeline_name=failing_pipeline.name, run_id=run_id, selector=selector, env_config=env_config, mode='default', reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, failing_pipeline, pipeline_run, raise_on_error=False) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.FAILURE assert pipeline_run.all_logs()
def test_execution_crash(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_fn(define_crashy_pipeline) pipeline = define_crashy_pipeline() env_config = { 'solids': {'sum_solid': {'inputs': {'num': script_relative_path('data/num.csv')}}} } selector = ExecutionSelector('csv_hello_world') pipeline_run = InMemoryPipelineRun( run_id, selector, env_config, mode='default', reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, pipeline, pipeline_run, raise_on_error=False) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.FAILURE last_log = pipeline_run.all_logs()[-1] print(last_log.message) assert last_log.message.startswith( 'Exception: Pipeline execution process for {run_id} unexpectedly exited\n'.format( run_id=run_id ) )
def test_failing(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_fn(define_failing_pipeline) pipeline = define_failing_pipeline() env_config = { 'solids': { 'sum_solid': { 'inputs': { 'num': script_relative_path('data/num.csv') } } } } selector = ExecutionSelector('csv_hello_world') pipeline_run = InMemoryPipelineRun( run_id, selector, env_config, mode='default', execution_plan=create_execution_plan(pipeline, env_config), reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, pipeline, pipeline_run, raise_on_error=False) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.FAILURE assert pipeline_run.all_logs()
def test_multiprocessing_execution_for_composite_solid_with_config_mapping(): environment_dict = { 'solids': { 'composite_with_nested_config_solid_and_config_mapping': { 'config': {'foo': 'baz', 'bar': 3} } } } run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'composite_pipeline_with_config_mapping' ) pipeline_run = InMemoryPipelineRun( run_id, ExecutionSelector('nonce'), environment_dict, mode='default', reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline( handle, composite_pipeline_with_config_mapping, pipeline_run, raise_on_error=False ) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.SUCCESS environment_dict = { 'solids': { 'composite_with_nested_config_solid_and_config_mapping': { 'config': {'foo': 'baz', 'bar': 3} } }, 'execution': {'multiprocess': {}}, 'storage': {'filesystem': {}}, } run_id = make_new_run_id() pipeline_run = InMemoryPipelineRun( run_id, ExecutionSelector('nonce'), environment_dict, mode='default', reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline( handle, composite_pipeline, pipeline_run, raise_on_error=False ) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.SUCCESS
def test_running(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'passing_pipeline') environment_dict = { 'solids': { 'sum_solid': { 'inputs': { 'num': script_relative_path('data/num.csv') } } } } selector = ExecutionSelector('csv_hello_world') instance = DagsterInstance.local_temp() pipeline_run = instance.create_run( PipelineRun( pipeline_name=passing_pipeline.name, run_id=run_id, selector=selector, environment_dict=environment_dict, mode='default', reexecution_config=None, step_keys_to_execute=None, tags=None, status=PipelineRunStatus.NOT_STARTED, )) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, passing_pipeline, pipeline_run, instance, raise_on_error=False) execution_manager.join() assert instance.get_run(run_id).status == PipelineRunStatus.SUCCESS events = instance.all_logs(run_id) assert events process_start_events = get_events_of_type( events, DagsterEventType.PIPELINE_PROCESS_START) assert len(process_start_events) == 1 process_started_events = get_events_of_type( events, DagsterEventType.PIPELINE_PROCESS_STARTED) assert len(process_started_events) == 1 process_exited_events = get_events_of_type( events, DagsterEventType.PIPELINE_PROCESS_EXITED) assert len(process_exited_events) == 1
def test_running(): run_id = make_new_run_id() repository_container = RepositoryContainer( RepositoryTargetInfo( repository_yaml=None, python_file=__file__, fn_name='define_passing_pipeline', module_name=None, )) pipeline = define_passing_pipeline() env_config = { 'solids': { 'sum_solid': { 'inputs': { 'num': { 'csv': { 'path': script_relative_path('num.csv') } } } } } } selector = ExecutionSelector('pandas_hello_world') pipeline_run = InMemoryPipelineRun( run_id, selector, env_config, create_execution_plan(pipeline, env_config), reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(repository_container, pipeline, pipeline_run, raise_on_error=False) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.SUCCESS events = pipeline_run.all_logs() assert events process_start_events = get_events_of_type( events, DagsterEventType.PIPELINE_PROCESS_START) assert len(process_start_events) == 1 process_started_events = get_events_of_type( events, DagsterEventType.PIPELINE_PROCESS_STARTED) assert len(process_started_events) == 1
def test_execution_crash(): run_id = make_new_run_id() repository_container = RepositoryContainer( RepositoryTargetInfo( repository_yaml=None, python_file=__file__, fn_name='define_crashy_pipeline', module_name=None, )) pipeline = define_crashy_pipeline() env_config = { 'solids': { 'sum_solid': { 'inputs': { 'num': { 'csv': { 'path': script_relative_path('num.csv') } } } } } } selector = ExecutionSelector('pandas_hello_world') pipeline_run = InMemoryPipelineRun( run_id, selector, env_config, create_execution_plan(pipeline, env_config), reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(repository_container, pipeline, pipeline_run, raise_on_error=False) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.FAILURE last_log = pipeline_run.all_logs()[-1] print(last_log.message) assert last_log.message.startswith( 'Exception: Pipeline execution process for {run_id} unexpectedly exited\n' .format(run_id=run_id))
def test_execution_crash(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'crashy_pipeline') environment_dict = { 'solids': { 'sum_solid': { 'inputs': { 'num': script_relative_path('data/num.csv') } } } } selector = ExecutionSelector('csv_hello_world') instance = DagsterInstance.local_temp() pipeline_run = instance.create_run( PipelineRun( pipeline_name=crashy_pipeline.name, run_id=run_id, selector=selector, environment_dict=environment_dict, mode='default', reexecution_config=None, step_keys_to_execute=None, tags=None, status=PipelineRunStatus.NOT_STARTED, )) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, crashy_pipeline, pipeline_run, instance, raise_on_error=False) execution_manager.join() assert instance.get_run(run_id).status == PipelineRunStatus.FAILURE last_log = instance.all_logs(run_id)[-1] assert last_log.message.startswith( 'Exception: Pipeline execution process for {run_id} unexpectedly exited\n' .format(run_id=run_id))
def test_failing(): run_id = make_new_run_id() repository_container = RepositoryContainer( RepositoryTargetInfo( repository_yaml=None, python_file=__file__, fn_name='define_failing_pipeline', module_name=None, )) pipeline = define_failing_pipeline() env_config = { 'solids': { 'sum_solid': { 'inputs': { 'num': { 'csv': { 'path': script_relative_path('num.csv') } } } } } } selector = ExecutionSelector('pandas_hello_world') pipeline_run = InMemoryPipelineRun( run_id, selector, env_config, create_execution_plan(pipeline, env_config), reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(repository_container, pipeline, pipeline_run, raise_on_error=False) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.FAILURE assert pipeline_run.all_logs()
def test_failing(): run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'failing_pipeline') environment_dict = { 'solids': { 'sum_solid': { 'inputs': { 'num': script_relative_path('data/num.csv') } } } } selector = ExecutionSelector('csv_hello_world') instance = DagsterInstance.ephemeral() pipeline_run = instance.create_run( PipelineRun( pipeline_name=failing_pipeline.name, run_id=run_id, selector=selector, environment_dict=environment_dict, mode='default', reexecution_config=None, step_keys_to_execute=None, status=PipelineRunStatus.NOT_STARTED, )) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, failing_pipeline, pipeline_run, instance, raise_on_error=False) execution_manager.join() assert instance.get_run(run_id).status == PipelineRunStatus.FAILURE assert instance.all_logs(run_id)
def test_multiprocessing_execution_for_composite_solid_with_config_mapping(): environment_dict = { 'solids': { 'composite_with_nested_config_solid_and_config_mapping': { 'config': { 'foo': 'baz', 'bar': 3 } } } } run_id = make_new_run_id() handle = ExecutionTargetHandle.for_pipeline_python_file( __file__, 'composite_pipeline_with_config_mapping') instance = DagsterInstance.local_temp() pipeline_run = instance.create_run( PipelineRun( pipeline_name=composite_pipeline_with_config_mapping.name, run_id=run_id, selector=ExecutionSelector('nonce'), environment_dict=environment_dict, mode='default', reexecution_config=None, step_keys_to_execute=None, tags=None, status=PipelineRunStatus.NOT_STARTED, )) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, composite_pipeline_with_config_mapping, pipeline_run, instance, raise_on_error=False) execution_manager.join() assert instance.get_run(run_id).status == PipelineRunStatus.SUCCESS environment_dict = { 'solids': { 'composite_with_nested_config_solid_and_config_mapping': { 'config': { 'foo': 'baz', 'bar': 3 } } }, 'execution': { 'multiprocess': {} }, 'storage': { 'filesystem': {} }, } run_id = make_new_run_id() pipeline_run = instance.create_run( PipelineRun( pipeline_name=composite_pipeline.name, run_id=run_id, selector=ExecutionSelector('nonce'), environment_dict=environment_dict, mode='default', reexecution_config=None, step_keys_to_execute=None, tags=None, status=PipelineRunStatus.NOT_STARTED, )) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(handle, composite_pipeline, pipeline_run, instance, raise_on_error=False) execution_manager.join() assert instance.get_run(run_id).status == PipelineRunStatus.SUCCESS