def test_server_down(): with grpc_instance() as instance: repo_yaml = file_relative_path(__file__, "repo.yaml") recon_repo = ReconstructableRepository.from_legacy_repository_yaml( repo_yaml) loadable_target_origin = recon_repo.get_origin().loadable_target_origin server_process = GrpcServerProcess( loadable_target_origin=loadable_target_origin, max_workers=4, force_port=True) with server_process.create_ephemeral_client() as api_client: repository_location = GrpcServerRepositoryLocation( RepositoryLocationHandle.create_grpc_server_location( location_name="test", port=api_client.port, socket=api_client.socket, host=api_client.host, )) external_pipeline = repository_location.get_repository( "nope").get_full_external_pipeline("sleepy_pipeline") pipeline_run = instance.create_run_for_pipeline( pipeline_def=sleepy_pipeline, run_config=None) launcher = instance.run_launcher launcher.launch_run(instance, pipeline_run, external_pipeline) poll_for_step_start(instance, pipeline_run.run_id) assert launcher.can_terminate(pipeline_run.run_id) original_run_tags = instance.get_run_by_id( pipeline_run.run_id).tags[GRPC_INFO_TAG] # Replace run tags with an invalid port instance.add_run_tags( pipeline_run.run_id, { GRPC_INFO_TAG: seven.json.dumps( merge_dicts({"host": "localhost"}, {"port": find_free_port()})) }, ) assert not launcher.can_terminate(pipeline_run.run_id) instance.add_run_tags( pipeline_run.run_id, { GRPC_INFO_TAG: original_run_tags, }, ) assert launcher.terminate(pipeline_run.run_id) server_process.wait()
def test_server_down(): with instance_for_test() as instance: loadable_target_origin = LoadableTargetOrigin( executable_path=sys.executable, attribute="nope", python_file=file_relative_path(__file__, "test_default_run_launcher.py"), ) server_process = GrpcServerProcess( loadable_target_origin=loadable_target_origin, max_workers=4, force_port=True ) with server_process.create_ephemeral_client() as api_client: repository_location = GrpcServerRepositoryLocation( RepositoryLocationHandle.create_from_repository_location_origin( GrpcServerRepositoryLocationOrigin( location_name="test", port=api_client.port, socket=api_client.socket, host=api_client.host, ) ) ) external_pipeline = repository_location.get_repository( "nope" ).get_full_external_pipeline("sleepy_pipeline") pipeline_run = instance.create_run_for_pipeline( pipeline_def=sleepy_pipeline, run_config=None ) instance.launch_run(pipeline_run.run_id, external_pipeline) poll_for_step_start(instance, pipeline_run.run_id) launcher = instance.run_launcher assert launcher.can_terminate(pipeline_run.run_id) original_run_tags = instance.get_run_by_id(pipeline_run.run_id).tags[GRPC_INFO_TAG] # Replace run tags with an invalid port instance.add_run_tags( pipeline_run.run_id, { GRPC_INFO_TAG: seven.json.dumps( merge_dicts({"host": "localhost"}, {"port": find_free_port()}) ) }, ) assert not launcher.can_terminate(pipeline_run.run_id) instance.add_run_tags( pipeline_run.run_id, {GRPC_INFO_TAG: original_run_tags,}, ) assert launcher.terminate(pipeline_run.run_id) server_process.wait()
def test_terminated_run(): with instance_for_test() as instance: repo_yaml = file_relative_path(__file__, "repo.yaml") pipeline_run = instance.create_run_for_pipeline( pipeline_def=sleepy_pipeline, run_config=None) run_id = pipeline_run.run_id assert instance.get_run_by_id( run_id).status == PipelineRunStatus.NOT_STARTED external_pipeline = get_full_external_pipeline( repo_yaml, pipeline_run.pipeline_name) launcher = instance.run_launcher launcher.launch_run(instance, pipeline_run, external_pipeline) poll_for_step_start(instance, run_id) assert launcher.can_terminate(run_id) assert launcher.terminate(run_id) # Return false is already terminated assert not launcher.terminate(run_id) terminated_pipeline_run = poll_for_finished_run(instance, run_id, timeout=2) terminated_pipeline_run = instance.get_run_by_id(run_id) assert terminated_pipeline_run.status == PipelineRunStatus.FAILURE all_logs = instance.all_logs(run_id) events = [event.dagster_event.event_type_value for event in all_logs] assert events == [ "ENGINE_EVENT", "ENGINE_EVENT", "PIPELINE_START", "ENGINE_EVENT", "STEP_START", "ENGINE_EVENT", "STEP_FAILURE", "PIPELINE_FAILURE", "ENGINE_EVENT", "ENGINE_EVENT", "ENGINE_EVENT", ] expected_termination_events = [ "[CliApiRunLauncher] Received pipeline termination request.", "[CliApiRunLauncher] Pipeline was terminated successfully.", "[DefaultRunLauncher] Pipeline was not terminated since CliApiRunLauncher and GrpcRunLauncher could not find in-progress run.", ] actual_termination_events = [ event.dagster_event.message for event in all_logs if event.dagster_event.message in expected_termination_events ] assert expected_termination_events == actual_termination_events
def test_terminated_run(get_external_pipeline, run_config): # pylint: disable=redefined-outer-name with instance_for_test() as instance: pipeline_run = instance.create_run_for_pipeline( pipeline_def=sleepy_pipeline, run_config=run_config, ) with get_external_pipeline(pipeline_run.pipeline_name) as external_pipeline: run_id = pipeline_run.run_id assert instance.get_run_by_id(run_id).status == PipelineRunStatus.NOT_STARTED instance.launch_run(pipeline_run.run_id, external_pipeline) poll_for_step_start(instance, run_id) launcher = instance.run_launcher assert launcher.can_terminate(run_id) assert launcher.terminate(run_id) terminated_pipeline_run = poll_for_finished_run(instance, run_id, timeout=30) terminated_pipeline_run = instance.get_run_by_id(run_id) assert terminated_pipeline_run.status == PipelineRunStatus.CANCELED poll_for_event( instance, run_id, event_type="ENGINE_EVENT", message="Process for pipeline exited", ) run_logs = instance.all_logs(run_id) if _is_multiprocess(run_config): _check_event_log_contains( run_logs, [ ("PIPELINE_CANCELING", "Sending pipeline termination request."), ( "ENGINE_EVENT", "Multiprocess executor: received termination signal - forwarding to active child process", ), ( "ENGINE_EVENT", "Multiprocess executor: interrupted all active child processes", ), ("STEP_FAILURE", 'Execution of step "sleepy_solid" failed.'), ("PIPELINE_CANCELED", 'Execution of pipeline "sleepy_pipeline" canceled.',), ("ENGINE_EVENT", "Process for pipeline exited"), ], ) else: _check_event_log_contains( run_logs, [ ("PIPELINE_CANCELING", "Sending pipeline termination request."), ("STEP_FAILURE", 'Execution of step "sleepy_solid" failed.'), ("PIPELINE_CANCELED", 'Execution of pipeline "sleepy_pipeline" canceled.',), ("ENGINE_EVENT", "Pipeline execution terminated by interrupt"), ("ENGINE_EVENT", "Process for pipeline exited"), ], )
def test_terminated_run(get_external_pipeline, in_process): # pylint: disable=redefined-outer-name with instance_for_test() as instance: pipeline_run = instance.create_run_for_pipeline( pipeline_def=sleepy_pipeline, run_config=None) with get_external_pipeline( pipeline_run.pipeline_name) as external_pipeline: run_id = pipeline_run.run_id assert instance.get_run_by_id( run_id).status == PipelineRunStatus.NOT_STARTED launcher = instance.run_launcher launcher.launch_run(instance, pipeline_run, external_pipeline) poll_for_step_start(instance, run_id) assert launcher.can_terminate(run_id) assert launcher.terminate(run_id) terminated_pipeline_run = poll_for_finished_run(instance, run_id, timeout=30) terminated_pipeline_run = instance.get_run_by_id(run_id) assert terminated_pipeline_run.status == PipelineRunStatus.FAILURE poll_for_event(instance, run_id, event_type="ENGINE_EVENT", message="Process for pipeline exited") run_logs = instance.all_logs(run_id) event_types = [ event.dagster_event.event_type_value for event in run_logs ] if in_process: assert event_types == [ "ENGINE_EVENT", "ENGINE_EVENT", "PIPELINE_START", "ENGINE_EVENT", "STEP_START", "STEP_FAILURE", "PIPELINE_FAILURE", "ENGINE_EVENT", ] else: assert event_types == [ "ENGINE_EVENT", "PIPELINE_START", "ENGINE_EVENT", "STEP_START", "STEP_FAILURE", "PIPELINE_FAILURE", "ENGINE_EVENT", "ENGINE_EVENT", ]
def test_terminate_after_shutdown(): with instance_for_test() as instance: with WorkspaceProcessContext( instance, PythonFileTarget( python_file=file_relative_path(__file__, "test_default_run_launcher.py"), attribute="nope", working_directory=None, location_name="test", ), ) as workspace_process_context: workspace = workspace_process_context.create_request_context() external_pipeline = ( workspace.get_repository_location("test") .get_repository("nope") .get_full_external_pipeline("sleepy_pipeline") ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=sleepy_pipeline, run_config=None, external_pipeline_origin=external_pipeline.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), ) instance.launch_run(pipeline_run.run_id, workspace) poll_for_step_start(instance, pipeline_run.run_id) repository_location = workspace.get_repository_location("test") # Tell the server to shut down once executions finish repository_location.grpc_server_registry.get_grpc_endpoint( repository_location.origin ).create_client().shutdown_server() external_pipeline = ( workspace.get_repository_location("test") .get_repository("nope") .get_full_external_pipeline("math_diamond") ) doomed_to_fail_pipeline_run = instance.create_run_for_pipeline( pipeline_def=math_diamond, run_config=None, external_pipeline_origin=external_pipeline.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), ) with pytest.raises(DagsterLaunchFailedError): instance.launch_run(doomed_to_fail_pipeline_run.run_id, workspace) launcher = instance.run_launcher # Can terminate the run even after the shutdown event has been received assert launcher.can_terminate(pipeline_run.run_id) assert launcher.terminate(pipeline_run.run_id)
def test_terminate_launched_docker_run(): docker_image = get_test_project_docker_image() launcher_config = { "env_vars": ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY",], "network": "container:test-postgres-db-docker", } if IS_BUILDKITE: launcher_config["registry"] = get_buildkite_registry_config() else: find_local_test_image(docker_image) run_config = merge_yamls([os.path.join(get_test_project_environments_path(), "env_s3.yaml"),]) with docker_postgres_instance( overrides={ "run_launcher": { "class": "DockerRunLauncher", "module": "dagster_docker", "config": launcher_config, } } ) as instance: recon_pipeline = get_test_project_recon_pipeline("hanging_pipeline", docker_image) run = instance.create_run_for_pipeline( pipeline_def=recon_pipeline.get_definition(), run_config=run_config, ) run_id = run.run_id external_pipeline = ReOriginatedExternalPipelineForTest( get_test_project_external_pipeline("hanging_pipeline", container_image=docker_image), container_image=docker_image, ) instance.launch_run(run_id, external_pipeline) poll_for_step_start(instance, run_id) assert instance.run_launcher.can_terminate(run_id) assert instance.run_launcher.terminate(run_id) terminated_pipeline_run = poll_for_finished_run(instance, run_id, timeout=30) terminated_pipeline_run = instance.get_run_by_id(run_id) assert terminated_pipeline_run.status == PipelineRunStatus.CANCELED run_logs = instance.all_logs(run_id) _check_event_log_contains( run_logs, [ ("PIPELINE_CANCELING", "Sending pipeline termination request"), ("STEP_FAILURE", 'Execution of step "hanging_solid" failed.'), ("PIPELINE_CANCELED", 'Execution of pipeline "hanging_pipeline" canceled.'), ("ENGINE_EVENT", "Pipeline execution terminated by interrupt"), ("ENGINE_EVENT", "Process for pipeline exited"), ], )
def test_cleanup_after_force_terminate(run_config): with instance_for_test() as instance, get_managed_grpc_server_workspace( instance) as workspace: external_pipeline = ( workspace.get_repository_location("test").get_repository( "nope").get_full_external_pipeline("sleepy_pipeline")) pipeline_run = instance.create_run_for_pipeline( pipeline_def=sleepy_pipeline, run_config=run_config, external_pipeline_origin=external_pipeline.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), ) run_id = pipeline_run.run_id instance.launch_run(pipeline_run.run_id, workspace) poll_for_step_start(instance, run_id) # simulate the sequence of events that happen during force-termination: # run moves immediately into canceled status while termination happens instance.report_run_canceling(pipeline_run) instance.report_run_canceled(pipeline_run) reloaded_run = instance.get_run_by_id(run_id) grpc_info = json.loads(reloaded_run.tags.get(GRPC_INFO_TAG)) client = DagsterGrpcClient( port=grpc_info.get("port"), socket=grpc_info.get("socket"), host=grpc_info.get("host"), ) client.cancel_execution(CancelExecutionRequest(run_id=run_id)) # Wait for the run worker to clean up start_time = time.time() while True: if time.time() - start_time > 30: raise Exception("Timed out waiting for cleanup message") logs = instance.all_logs(run_id) if any([ "Computational resources were cleaned up after the run was forcibly marked as canceled." in str(event) for event in logs ]): break time.sleep(1) assert instance.get_run_by_id( run_id).status == PipelineRunStatus.CANCELED
def test_terminate_after_shutdown(): with instance_for_test() as instance: with RepositoryLocationHandle.create_from_repository_location_origin( ManagedGrpcPythonEnvRepositoryLocationOrigin( loadable_target_origin=LoadableTargetOrigin( executable_path=sys.executable, attribute="nope", python_file=file_relative_path(__file__, "test_default_run_launcher.py"), ), location_name="nope", ) ) as repository_location_handle: repository_location = GrpcServerRepositoryLocation(repository_location_handle) external_pipeline = repository_location.get_repository( "nope" ).get_full_external_pipeline("sleepy_pipeline") pipeline_run = instance.create_run_for_pipeline( pipeline_def=sleepy_pipeline, run_config=None ) instance.launch_run(pipeline_run.run_id, external_pipeline) poll_for_step_start(instance, pipeline_run.run_id) # Tell the server to shut down once executions finish repository_location_handle.client.cleanup_server() # Trying to start another run fails doomed_to_fail_external_pipeline = repository_location.get_repository( "nope" ).get_full_external_pipeline("math_diamond") doomed_to_fail_pipeline_run = instance.create_run_for_pipeline( pipeline_def=math_diamond, run_config=None ) with pytest.raises(DagsterLaunchFailedError): instance.launch_run( doomed_to_fail_pipeline_run.run_id, doomed_to_fail_external_pipeline ) launcher = instance.run_launcher # Can terminate the run even after the shutdown event has been received assert launcher.can_terminate(pipeline_run.run_id) assert launcher.terminate(pipeline_run.run_id)
def test_terminated_run(): with instance_for_test() as instance: repo_yaml = file_relative_path(__file__, "repo.yaml") pipeline_run = instance.create_run_for_pipeline( pipeline_def=sleepy_pipeline, run_config=None) run_id = pipeline_run.run_id assert instance.get_run_by_id( run_id).status == PipelineRunStatus.NOT_STARTED external_pipeline = get_full_external_pipeline( repo_yaml, pipeline_run.pipeline_name) launcher = instance.run_launcher launcher.launch_run(instance, pipeline_run, external_pipeline) poll_for_step_start(instance, run_id) assert launcher.can_terminate(run_id) assert launcher.terminate(run_id) # Return false is already terminated assert not launcher.terminate(run_id) terminated_pipeline_run = poll_for_finished_run(instance, run_id, timeout=2) terminated_pipeline_run = instance.get_run_by_id(run_id) assert terminated_pipeline_run.status == PipelineRunStatus.FAILURE events = [ event.dagster_event.event_type_value for event in instance.all_logs(run_id) ] assert events == [ "ENGINE_EVENT", "ENGINE_EVENT", "PIPELINE_START", "ENGINE_EVENT", "STEP_START", "STEP_FAILURE", "PIPELINE_FAILURE", "ENGINE_EVENT", ]
def test_terminate_kills_subproc(): with instance_for_test() as instance: with get_managed_grpc_server_workspace(instance) as workspace: external_pipeline = (workspace.get_repository_location( "test").get_repository("sleepy_repo"). get_full_external_pipeline("sleepy_pipeline")) pipeline_run = instance.create_run_for_pipeline( pipeline_def=sleepy_pipeline, external_pipeline_origin=external_pipeline.get_external_origin( ), pipeline_code_origin=external_pipeline.get_python_origin(), ) run_id = pipeline_run.run_id assert instance.get_run_by_id( run_id).status == PipelineRunStatus.NOT_STARTED instance.launch_run(pipeline_run.run_id, workspace) poll_for_step_start(instance, run_id) # find pid of subprocess subproc_pid = poll_for_pid(instance, run_id) assert psutil.pid_exists(subproc_pid) # simulate waiting a bit to terminate the pipeline time.sleep(0.5) launcher = instance.run_launcher assert launcher.can_terminate(run_id) assert launcher.terminate(run_id) terminated_pipeline_run = poll_for_finished_run(instance, run_id, timeout=30) terminated_pipeline_run = instance.get_run_by_id(run_id) assert terminated_pipeline_run.status == PipelineRunStatus.CANCELED # make sure the subprocess is killed after a short delay time.sleep(0.5) assert not psutil.pid_exists(subproc_pid)
def test_terminated_run(get_external_pipeline, run_config): # pylint: disable=redefined-outer-name with instance_for_test() as instance: pipeline_run = instance.create_run_for_pipeline( pipeline_def=sleepy_pipeline, run_config=run_config, ) with get_external_pipeline( pipeline_run.pipeline_name) as external_pipeline: run_id = pipeline_run.run_id assert instance.get_run_by_id( run_id).status == PipelineRunStatus.NOT_STARTED instance.launch_run(pipeline_run.run_id, external_pipeline) poll_for_step_start(instance, run_id) launcher = instance.run_launcher assert launcher.can_terminate(run_id) assert launcher.terminate(run_id) terminated_pipeline_run = poll_for_finished_run(instance, run_id, timeout=30) terminated_pipeline_run = instance.get_run_by_id(run_id) assert terminated_pipeline_run.status == PipelineRunStatus.FAILURE poll_for_event( instance, run_id, event_type="ENGINE_EVENT", message="Process for pipeline exited", ) run_logs = instance.all_logs(run_id) if _is_multiprocess(run_config): _check_event_log( run_logs, [ ("ENGINE_EVENT", "Started process for pipeline"), ("PIPELINE_START", 'Started execution of pipeline "sleepy_pipeline".'), ("ENGINE_EVENT", "Executing steps using multiprocess executor"), ("ENGINE_EVENT", "Launching subprocess for sleepy_solid.compute"), ("ENGINE_EVENT", "Executing step sleepy_solid.compute in subprocess"), ("STEP_START", 'Started execution of step "sleepy_solid.compute".'), ("ENGINE_EVENT", "Received pipeline termination request"), ( "ENGINE_EVENT", "Multiprocess executor: received termination signal - forwarding to active child process", ), ("STEP_FAILURE", 'Execution of step "sleepy_solid.compute" failed.'), ( "PIPELINE_FAILURE", 'Execution of pipeline "sleepy_pipeline" failed. An exception was thrown during execution.', ), ("ENGINE_EVENT", "Process for pipeline exited"), ], ) else: _check_event_log( run_logs, [ ("ENGINE_EVENT", "Started process for pipeline"), ("PIPELINE_START", 'Started execution of pipeline "sleepy_pipeline".'), ("ENGINE_EVENT", "Executing steps in process"), ("STEP_START", 'Started execution of step "sleepy_solid.compute".'), ("ENGINE_EVENT", "Received pipeline termination request"), ("STEP_FAILURE", 'Execution of step "sleepy_solid.compute" failed.'), ("PIPELINE_FAILURE", 'Execution of pipeline "sleepy_pipeline" failed.'), ("ENGINE_EVENT", "Pipeline execution terminated by interrupt"), ("ENGINE_EVENT", "Process for pipeline exited"), ], )
def test_cancel_run(): with instance_for_test() as instance: loadable_target_origin = LoadableTargetOrigin( executable_path=sys.executable, python_file=__file__, working_directory=None, ) server_process = GrpcServerProcess(loadable_target_origin, max_workers=10) with server_process.create_ephemeral_client() as api_client: streaming_results = [] pipeline_run = instance.create_run_for_pipeline( streaming_pipeline, run_config={ "solids": { "streamer": { "config": { "length": 20 } } } }, ) execute_run_args = ExecuteRunArgs( pipeline_origin=PipelineGrpcServerOrigin( pipeline_name="streaming_pipeline", repository_origin=RepositoryGrpcServerOrigin( host="localhost", socket=api_client.socket, port=api_client.port, repository_name="test_repository", ), ), pipeline_run_id=pipeline_run.run_id, instance_ref=instance.get_ref(), ) stream_events_result_thread = threading.Thread( target=_stream_events_target, args=[streaming_results, api_client, execute_run_args]) stream_events_result_thread.daemon = True stream_events_result_thread.start() poll_for_step_start(instance, pipeline_run.run_id) res = api_client.cancel_execution( cancel_execution_request=CancelExecutionRequest( run_id=pipeline_run.run_id)) assert res.success is True poll_for_finished_run(instance, pipeline_run.run_id) logs = instance.all_logs(pipeline_run.run_id) assert (len([ ev for ev in logs if ev.dagster_event.event_type_value == "STEP_MATERIALIZATION" ]) < 20) # soft termination assert [ ev for ev in logs if ev.dagster_event.event_type_value == "STEP_FAILURE" ] server_process.wait()