def terminate(self, run_id): check.str_param(run_id, "run_id") if not self._instance: return False run = self._instance.get_run_by_id(run_id) if not run: return False self._instance.report_engine_event( message="Received pipeline termination request.", pipeline_run=run, cls=self.__class__) client = self._get_grpc_client_for_termination(run_id) if not client: self._instance.report_engine_event( message= "Unable to get grpc client to send termination request to.", pipeline_run=run, cls=self.__class__, ) return False res = client.cancel_execution(CancelExecutionRequest(run_id=run_id)) return res.success
def test_terminate_failed(self, graphql_context): selector = infer_pipeline_selector(graphql_context, "infinite_loop_pipeline") with safe_tempfile_path() as path: old_terminate = graphql_context.instance.run_launcher.terminate graphql_context.instance.run_launcher.terminate = lambda _run_id: False result = execute_dagster_graphql( graphql_context, LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={ "executionParams": { "selector": selector, "mode": "default", "runConfigData": {"solids": {"loop": {"config": {"file": path}}}}, } }, ) assert not result.errors assert result.data # just test existence assert ( result.data["launchPipelineExecution"]["__typename"] == "LaunchPipelineRunSuccess" ) run_id = result.data["launchPipelineExecution"]["run"]["runId"] # ensure the execution has happened while not os.path.exists(path): time.sleep(0.1) result = execute_dagster_graphql( graphql_context, RUN_CANCELLATION_QUERY, variables={"runId": run_id} ) assert ( result.data["terminatePipelineExecution"]["__typename"] == "TerminatePipelineExecutionFailure" ) assert result.data["terminatePipelineExecution"]["message"].startswith( "Unable to terminate run" ) result = execute_dagster_graphql( graphql_context, RUN_CANCELLATION_QUERY, variables={"runId": run_id, "terminatePolicy": "MARK_AS_CANCELED_IMMEDIATELY"}, ) assert ( result.data["terminatePipelineExecution"]["__typename"] == "TerminatePipelineExecutionSuccess" ) assert result.data["terminatePipelineExecution"]["run"]["runId"] == run_id graphql_context.instance.run_launcher.terminate = old_terminate # Clean up the run process on the gRPC server repository_location_handle = graphql_context.repository_locations[0].location_handle repository_location_handle.client.cancel_execution( CancelExecutionRequest(run_id=run_id) )
def test_cancel_run(): with instance_for_test() as instance: loadable_target_origin = LoadableTargetOrigin( executable_path=sys.executable, python_file=__file__, working_directory=None, ) server_process = GrpcServerProcess(loadable_target_origin, max_workers=10) with server_process.create_ephemeral_client() as api_client: streaming_results = [] pipeline_run = instance.create_run_for_pipeline( streaming_pipeline, run_config={"solids": {"streamer": {"config": {"length": 20}}}}, ) execute_run_args = ExecuteRunArgs( pipeline_origin=PipelineGrpcServerOrigin( pipeline_name="streaming_pipeline", repository_origin=RepositoryGrpcServerOrigin( host="localhost", socket=api_client.socket, port=api_client.port, repository_name="test_repository", ), ), pipeline_run_id=pipeline_run.run_id, instance_ref=instance.get_ref(), ) stream_events_result_thread = threading.Thread( target=_stream_events_target, args=[streaming_results, api_client, execute_run_args] ) stream_events_result_thread.daemon = True stream_events_result_thread.start() poll_for_step_start(instance, pipeline_run.run_id) res = api_client.cancel_execution( cancel_execution_request=CancelExecutionRequest(run_id=pipeline_run.run_id) ) assert res.success is True poll_for_run(instance, pipeline_run.run_id) logs = instance.all_logs(pipeline_run.run_id) assert ( len( [ ev for ev in logs if ev.dagster_event.event_type_value == "STEP_MATERIALIZATION" ] ) < 20 ) # soft termination assert [ev for ev in logs if ev.dagster_event.event_type_value == "STEP_FAILURE"] server_process.wait()
def terminate(self, run_id): check.str_param(run_id, "run_id") client = self._get_grpc_client_for_termination(run_id) if not client: return False res = client.cancel_execution(CancelExecutionRequest(run_id=run_id)) return res.success
def terminate(self, run_id): check.str_param(run_id, 'run_id') if run_id not in self._run_id_to_repository_location_handle_cache: return False res = self._run_id_to_repository_location_handle_cache[ run_id].client.cancel_execution( CancelExecutionRequest(run_id=run_id)) return res.success
def test_cleanup_after_force_terminate(run_config): with instance_for_test() as instance, get_managed_grpc_server_workspace( instance) as workspace: external_pipeline = ( workspace.get_repository_location("test").get_repository( "nope").get_full_external_pipeline("sleepy_pipeline")) pipeline_run = instance.create_run_for_pipeline( pipeline_def=sleepy_pipeline, run_config=run_config, external_pipeline_origin=external_pipeline.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), ) run_id = pipeline_run.run_id instance.launch_run(pipeline_run.run_id, workspace) poll_for_step_start(instance, run_id) # simulate the sequence of events that happen during force-termination: # run moves immediately into canceled status while termination happens instance.report_run_canceling(pipeline_run) instance.report_run_canceled(pipeline_run) reloaded_run = instance.get_run_by_id(run_id) grpc_info = json.loads(reloaded_run.tags.get(GRPC_INFO_TAG)) client = DagsterGrpcClient( port=grpc_info.get("port"), socket=grpc_info.get("socket"), host=grpc_info.get("host"), ) client.cancel_execution(CancelExecutionRequest(run_id=run_id)) # Wait for the run worker to clean up start_time = time.time() while True: if time.time() - start_time > 30: raise Exception("Timed out waiting for cleanup message") logs = instance.all_logs(run_id) if any([ "Computational resources were cleaned up after the run was forcibly marked as canceled." in str(event) for event in logs ]): break time.sleep(1) assert instance.get_run_by_id( run_id).status == PipelineRunStatus.CANCELED
def terminate(self, run_id): check.str_param(run_id, "run_id") if not self._instance: return False run = self._instance.get_run_by_id(run_id) if not run: return False client = self._get_grpc_client_for_termination(run_id) if not client: self._instance.report_engine_event( message= "Unable to get grpc client to send termination request to.", pipeline_run=run, cls=self.__class__, ) return False self._instance.report_run_canceling(run) res = deserialize_json_to_dagster_namedtuple( client.cancel_execution(CancelExecutionRequest(run_id=run_id))) return res.success
def _ephemeral_launched_run_client( instance_ref, pipeline_origin, pipeline_run_id, cancellation_event ): '''Spins up an ephemeral client & server with two workers. This is to allow for cancellation to be processed as an interrupt rather than waiting for the launched run to complete.''' check.inst_param(instance_ref, 'instance_ref', InstanceRef) check.inst_param(pipeline_origin, 'pipeline_origin', PipelinePythonOrigin) check.str_param(pipeline_run_id, 'pipeline_run_id') check.inst_param(cancellation_event, 'cancellation_event', multiprocessing.synchronize.Event) instance = DagsterInstance.from_ref(instance_ref) pipeline_run = instance.get_run_by_id(pipeline_run_id) loadable_target_origin = LoadableTargetOrigin.from_python_origin( pipeline_origin.repository_origin ) with GrpcServerProcess(loadable_target_origin, max_workers=2) as server_process: api_client = server_process.create_ephemeral_client() execute_run_thread = threading.Thread( target=sync_execute_run_grpc, kwargs={ 'api_client': api_client, 'instance_ref': instance_ref, 'pipeline_origin': pipeline_origin, 'pipeline_run': pipeline_run, }, ) execute_run_thread.start() while execute_run_thread.is_alive(): if cancellation_event.is_set(): api_client.cancel_execution(CancelExecutionRequest(run_id=pipeline_run_id)) execute_run_thread.join() time.sleep(SUBPROCESS_TICK)