def test_resource_init_failure(): @resource def failing_resource(_init_context): raise Exception("Uh oh") @solid(required_resource_keys={"failing_resource"}) def failing_resource_solid(_context): pass the_pipeline = PipelineDefinition( name="test_resource_init_failure", solid_defs=[failing_resource_solid], mode_defs=[ModeDefinition(resource_defs={"failing_resource": failing_resource})], ) res = execute_pipeline(the_pipeline, raise_on_error=False) event_types = [event.event_type_value for event in res.event_list] assert DagsterEventType.PIPELINE_INIT_FAILURE.value in event_types instance = DagsterInstance.ephemeral() execution_plan = create_execution_plan(the_pipeline) pipeline_run = instance.create_run_for_pipeline(the_pipeline, execution_plan=execution_plan) step_events = execute_plan( execution_plan, InMemoryPipeline(the_pipeline), pipeline_run=pipeline_run, instance=instance ) event_types = [event.event_type_value for event in step_events] assert DagsterEventType.PIPELINE_INIT_FAILURE.value in event_types # Test the pipeline init failure event fires even if we are raising errors events = [] try: for event in execute_pipeline_iterator(the_pipeline): events.append(event) except DagsterResourceFunctionError: pass event_types = [event.event_type_value for event in events] assert DagsterEventType.PIPELINE_INIT_FAILURE.value in event_types
def test_terminate_pipeline_on_celery(dagster_celery_worker, instance, tempdir): pipeline_def = ReconstructablePipeline.for_file(REPO_FILE, "interrupt_pipeline") run_config = { "resources": {"io_manager": {"config": {"base_dir": tempdir}}}, "execution": {"celery": {}}, } results = [] result_types = [] interrupt_thread = None for result in execute_pipeline_iterator( pipeline=pipeline_def, run_config=run_config, instance=instance, ): # Interrupt once the first step starts if result.event_type == DagsterEventType.STEP_START and not interrupt_thread: interrupt_thread = Thread(target=send_interrupt, args=()) interrupt_thread.start() results.append(result) result_types.append(result.event_type) interrupt_thread.join() # At least one step succeeded (the one that was running when the interrupt fired) assert DagsterEventType.STEP_SUCCESS in result_types # At least one step was revoked (and there were no step failure events) revoke_steps = [ result for result in results if result.event_type == DagsterEventType.ENGINE_EVENT and "was revoked." in result.message ] assert len(revoke_steps) > 0 # The overall pipeline failed assert DagsterEventType.PIPELINE_FAILURE in result_types
def run(self): """Runs this engine.""" self._state = SpineEngineState.RUNNING run_config = { "loggers": { "console": { "config": { "log_level": "CRITICAL" } } }, "execution": { "multithread": {} } } for event in execute_pipeline_iterator(self._pipeline, run_config=run_config): self._process_event(event) if self._state == SpineEngineState.RUNNING: self._state = SpineEngineState.COMPLETED self._queue.put(("dag_exec_finished", str(self._state)))
def test_bad_broker(): event_stream = execute_pipeline_iterator( ExecutionTargetHandle.for_pipeline_python_file( __file__, 'test_diamond_pipeline' ).build_pipeline_definition(), environment_dict={ 'storage': {'filesystem': {}}, 'execution': {'celery': {'config': {'config_source': {'broker_url': '*****@*****.**'}}}}, }, instance=DagsterInstance.local_temp(), ) # ensure an engine event with an error is yielded if we cant connect to the broker saw_engine_error = False try: for event in event_stream: if event.is_engine_event: saw_engine_error = bool(event.engine_event_data.error) except Exception: # pylint: disable=broad-except pass assert saw_engine_error
def test_resource_init_failure(): @resource def failing_resource(_init_context): raise Exception('Uh oh') @solid(required_resource_keys={'failing_resource'}) def failing_resource_solid(_context): pass pipeline = PipelineDefinition( name='test_resource_init_failure', solid_defs=[failing_resource_solid], mode_defs=[ModeDefinition(resource_defs={'failing_resource': failing_resource})], ) res = execute_pipeline(pipeline, raise_on_error=False) assert res.event_list[0].event_type_value == 'PIPELINE_INIT_FAILURE' execution_plan = create_execution_plan(pipeline) run_id = make_new_run_id() pipeline_run = PipelineRun.create_empty_run(pipeline.name, run_id) step_events = execute_plan( execution_plan, pipeline_run=pipeline_run, instance=DagsterInstance.ephemeral() ) assert step_events[0].event_type_value == 'PIPELINE_INIT_FAILURE' # Test the pipeline init failure event fires even if we are raising errors events = [] try: for event in execute_pipeline_iterator(pipeline): events.append(event) except DagsterResourceFunctionError: pass assert len(events) == 1 assert events[0].event_type_value == 'PIPELINE_INIT_FAILURE'
def test_bad_broker(): with pytest.raises(check.CheckError) as exc_info: event_stream = execute_pipeline_iterator( ExecutionTargetHandle.for_pipeline_python_file( __file__, 'test_diamond_pipeline').build_pipeline_definition(), environment_dict={ 'storage': { 'filesystem': {} }, 'execution': { 'celery': { 'config': { 'broker': 'notlocal.bad' } } }, }, instance=DagsterInstance.local_temp(), ) list(event_stream) assert 'Must use S3 or GCS storage with non-local Celery' in str( exc_info.value)
def test_single_proc_interrupt(): @pipeline def write_a_file_pipeline(): write_a_file() with safe_tempfile_path() as success_tempfile: # launch a thread the waits until the file is written to launch an interrupt Thread(target=_send_kbd_int, args=([success_tempfile], )).start() result_types = [] result_messages = [] # next time the launched thread wakes up it will send a keyboard # interrupt for result in execute_pipeline_iterator( write_a_file_pipeline, run_config={ "solids": { "write_a_file": { "config": { "tempfile": success_tempfile } } } }, ): result_types.append(result.event_type) result_messages.append(result.message) assert DagsterEventType.STEP_FAILURE in result_types assert DagsterEventType.PIPELINE_FAILURE in result_types assert any([ "Execution was interrupted unexpectedly. " "No user initiated termination request was found, treating as failure." in message for message in result_messages ])
def test_interrupt_multiproc(): with seven.TemporaryDirectory() as tempdir: file_1 = os.path.join(tempdir, 'file_1') file_2 = os.path.join(tempdir, 'file_2') file_3 = os.path.join(tempdir, 'file_3') file_4 = os.path.join(tempdir, 'file_4') # launch a thread the waits until the file is written to launch an interrupt Thread(target=_send_kbd_int, args=([file_1, file_2, file_3, file_4],)).start() results = [] try: # launch a pipeline that writes a file and loops infinitely # next time the launched thread wakes up it will send a keyboard # interrupt for result in execute_pipeline_iterator( ExecutionTargetHandle.for_pipeline_python_file( __file__, 'write_files_pipeline' ).build_pipeline_definition(), environment_dict={ 'solids': { 'write_1': {'config': {'tempfile': file_1}}, 'write_2': {'config': {'tempfile': file_2}}, 'write_3': {'config': {'tempfile': file_3}}, 'write_4': {'config': {'tempfile': file_4}}, }, 'execution': {'multiprocess': {'config': {'max_concurrent': 4}}}, 'storage': {'filesystem': {}}, }, instance=DagsterInstance.local_temp(tempdir=tempdir), ): results.append(result) assert False # should never reach except (DagsterSubprocessError, KeyboardInterrupt): pass assert [result.event_type for result in results].count(DagsterEventType.STEP_FAILURE) == 4 assert DagsterEventType.PIPELINE_FAILURE in [result.event_type for result in results]
def test_interrupt_multiproc(): with tempfile.TemporaryDirectory() as tempdir: with instance_for_test(temp_dir=tempdir) as instance: file_1 = os.path.join(tempdir, "file_1") file_2 = os.path.join(tempdir, "file_2") file_3 = os.path.join(tempdir, "file_3") file_4 = os.path.join(tempdir, "file_4") # launch a thread that waits until the file is written to launch an interrupt Thread(target=_send_kbd_int, args=([file_1, file_2, file_3, file_4],)).start() results = [] # launch a pipeline that writes a file and loops infinitely # next time the launched thread wakes up it will send a keyboard # interrupt for result in execute_pipeline_iterator( reconstructable(write_files_pipeline), run_config={ "solids": { "write_1": {"config": {"tempfile": file_1}}, "write_2": {"config": {"tempfile": file_2}}, "write_3": {"config": {"tempfile": file_3}}, "write_4": {"config": {"tempfile": file_4}}, }, "execution": {"multiprocess": {"config": {"max_concurrent": 4}}}, "intermediate_storage": {"filesystem": {}}, }, instance=instance, ): results.append(result) assert [result.event_type for result in results].count( DagsterEventType.STEP_FAILURE ) == 4 assert DagsterEventType.PIPELINE_FAILURE in [result.event_type for result in results]
def test_pipeline_subset(): @lambda_solid def return_one(): return 1 @lambda_solid def add_one(num): return num + 1 pipeline_def = PipelineDefinition( solid_defs=[return_one, add_one], dependencies={'add_one': { 'num': DependencyDefinition('return_one') }}, ) pipeline_result = execute_pipeline(pipeline_def) assert pipeline_result.success assert pipeline_result.result_for_solid('add_one').result_value() == 2 env_config = {'solids': {'add_one': {'inputs': {'num': {'value': 3}}}}} subset_result = execute_pipeline(pipeline_def.build_sub_pipeline( ['add_one']), environment_dict=env_config) assert subset_result.success assert len(subset_result.solid_result_list) == 1 assert subset_result.result_for_solid('add_one').result_value() == 4 events = execute_pipeline_iterator(pipeline_def.build_sub_pipeline( ['add_one']), environment_dict=env_config) for step_event in step_output_event_filter(events): assert step_event.is_step_success
def test_pipeline_subset(): @lambda_solid def return_one(): return 1 @lambda_solid(inputs=[InputDefinition('num')]) def add_one(num): return num + 1 pipeline_def = PipelineDefinition( solids=[return_one, add_one], dependencies={'add_one': { 'num': DependencyDefinition('return_one') }}, ) pipeline_result = execute_pipeline(pipeline_def) assert pipeline_result.success assert pipeline_result.result_for_solid('add_one').transformed_value() == 2 env_config = {'solids': {'add_one': {'inputs': {'num': {'value': 3}}}}} subset_result = execute_pipeline(pipeline_def, environment_dict=env_config, solid_subset=['add_one']) assert subset_result.success assert len(subset_result.solid_result_list) == 1 assert subset_result.result_for_solid('add_one').transformed_value() == 4 step_events = execute_pipeline_iterator(pipeline_def, environment_dict=env_config, solid_subset=['add_one']) for step_event in step_events: assert step_event.is_step_success
def test_terminate_pipeline_on_celery(rabbitmq): with start_celery_worker(): with tempfile.TemporaryDirectory() as tempdir: pipeline_def = ReconstructablePipeline.for_file( REPO_FILE, "interrupt_pipeline") with instance_for_test_tempdir(tempdir) as instance: run_config = { "intermediate_storage": { "filesystem": { "config": { "base_dir": tempdir } } }, "execution": { "celery": {} }, } results = [] result_types = [] interrupt_thread = None received_interrupt = False try: for result in execute_pipeline_iterator( pipeline=pipeline_def, run_config=run_config, instance=instance, ): # Interrupt once the first step starts if (result.event_type == DagsterEventType.STEP_START and not interrupt_thread): interrupt_thread = Thread(target=send_interrupt, args=()) interrupt_thread.start() results.append(result) result_types.append(result.event_type) assert False except DagsterExecutionInterruptedError: received_interrupt = True interrupt_thread.join() assert received_interrupt # At least one step succeeded (the one that was running when the interrupt fired) assert DagsterEventType.STEP_SUCCESS in result_types # At least one step was revoked (and there were no step failure events) revoke_steps = [ result for result in results if result.event_type == DagsterEventType.ENGINE_EVENT and "was revoked." in result.message ] assert len(revoke_steps) > 0 # The overall pipeline failed assert DagsterEventType.PIPELINE_FAILURE in result_types
def test_pipeline_execution_graph_diamond(): pipeline = PipelineDefinition(solids=create_diamond_solids(), dependencies=diamond_deps()) return _do_test(pipeline, lambda: execute_pipeline_iterator(pipeline))
def execute_pipeline_through_queue( handle, pipeline_name, solid_subset, environment_dict, mode, run_id, message_queue, reexecution_config, step_keys_to_execute, ): """ Execute pipeline using message queue as a transport """ check.opt_str_param(mode, 'mode') message_queue.put(ProcessStartedSentinel(os.getpid())) run_config = RunConfig( run_id, mode=mode, event_callback=message_queue.put, reexecution_config=reexecution_config, step_keys_to_execute=step_keys_to_execute, ) if 'execution' not in environment_dict or not environment_dict['execution']: environment_dict['execution'] = { 'in_process': { 'config': { 'raise_on_error': False } } } try: handle.build_repository_definition() pipeline_def = handle.with_pipeline_name( pipeline_name).build_pipeline_definition() except Exception: # pylint: disable=broad-except repo_error = sys.exc_info() message_queue.put( MultiprocessingError( serializable_error_info_from_exc_info(repo_error))) return try: event_list = [] for event in execute_pipeline_iterator( pipeline_def.build_sub_pipeline(solid_subset), environment_dict, run_config=run_config): # message_queue.put(event) event_list.append(event) return PipelineExecutionResult(pipeline_def, run_config.run_id, event_list, lambda: None) except Exception: # pylint: disable=broad-except error_info = serializable_error_info_from_exc_info(sys.exc_info()) message_queue.put(MultiprocessingError(error_info)) finally: message_queue.put(MultiprocessingDone()) message_queue.close()