def __init__(self,start_services=True): # Initialize server status state self.__server_status_list = {} mpi_server_rank_list = MPIEnvironment.mpi_server_rank_list() for rank in mpi_server_rank_list: self.__server_status_list[rank] = {} self.__server_status_list[rank]['rank'] = rank self.__server_status_list[rank]['processor'] = None self.__server_status_list[rank]['pid'] = None self.__server_status_list[rank]['busy'] = False self.__server_status_list[rank]['command'] = None self.__server_status_list[rank]['command_start_time'] = None self.__server_status_list[rank]['pong_pending'] = False self.__server_status_list[rank]['ping_time'] = None self.__server_status_list[rank]['pong_time'] = None self.__server_status_list[rank]['timeout'] = False # Initialize monitor service state self.__monitor_status_service_on = False self.__monitor_status_service_running = False self.__monitor_status_service_thread = None # Initialize ping status response handler service state self.__ping_status_response_handler_service_on = False self.__ping_status_response_handler_service_running = False self.__ping_status_response_handler_service_thread = None # Instantiate MPICommunicator reference self.__communicator = MPICommunicator() # Automatically start services if start_services: self.start_services()
def __init__(self, start_services=True): # Initialize server status state self.__server_status_list = {} mpi_server_rank_list = MPIEnvironment.mpi_server_rank_list() for rank in mpi_server_rank_list: self.__server_status_list[rank] = {} self.__server_status_list[rank]['rank'] = rank self.__server_status_list[rank]['processor'] = None self.__server_status_list[rank]['pid'] = None self.__server_status_list[rank]['busy'] = False self.__server_status_list[rank]['command'] = None self.__server_status_list[rank]['command_start_time'] = None self.__server_status_list[rank]['pong_pending'] = False self.__server_status_list[rank]['ping_time'] = None self.__server_status_list[rank]['pong_time'] = None self.__server_status_list[rank]['timeout'] = False # Initialize monitor service state self.__monitor_status_service_on = False self.__monitor_status_service_running = False self.__monitor_status_service_thread = None # Initialize ping status response handler service state self.__ping_status_response_handler_service_on = False self.__ping_status_response_handler_service_running = False self.__ping_status_response_handler_service_thread = None # Instantiate MPICommunicator reference self.__communicator = MPICommunicator() # Automatically start services if start_services: self.start_services()
def __validate_target_servers(self,target_server): casalog_call_origin = "MPICommandClient::validate_target_servers" # Get list of valid MPIServer ranks mpi_server_rank_list = MPIEnvironment.mpi_server_rank_list() # Check if target server is a list of integers if isinstance(target_server,list) and (len(target_server)>=1) and all(isinstance(server, int) for server in target_server): # Check if server is within the server rank list for server in target_server: if server not in mpi_server_rank_list: casalog.post("Server #%s does not exist" % str(server),"SEVERE",casalog_call_origin) return None elif self.__monitor_client.get_server_status_keyword(server,'timeout'): casalog.post("Server #%s has timed out" % str(server),"SEVERE",casalog_call_origin) return None # Return input list validated return target_server # Check if target server is an integer elif isinstance(target_server,int): # Check if server is within the server rank list if target_server in mpi_server_rank_list: return [target_server] else: casalog.post("Server #%s does not exist" % str(target_server),"SEVERE",casalog_call_origin) return None else: casalog.post("target_server has wrong format (%s), accepted formats are int and list(int)" % str(type(target_server)),"SEVERE",casalog_call_origin) return None
def control_service_request_broadcast(self,request,logger=None): mpi_server_rank_list = MPIEnvironment.mpi_server_rank_list() for rank in mpi_server_rank_list: if logger is not None: logger.post("Sending %s service signal to server %s" % (request['signal'],str(rank)),"DEBUG","MPICommunicator::control_service_request_send_all") self.control_service_request_send(request=request,server=rank)
def __init__(self): # Initialize communicators try: self.__command_channel = 2 self.__ping_status_channel = 1 self.__control_service_channel = 0 self.__command_request_communicator = MPIEnvironment.mpi_comm_world_factory() self.__ping_status_request_communicator = MPIEnvironment.mpi_comm_world_factory() self.__command_response_communicator = MPIEnvironment.mpi_comm_world_factory() self.__ping_status_response_communicator = MPIEnvironment.mpi_comm_world_factory() self.__control_service_request_communicator = MPIEnvironment.mpi_comm_world_factory() self.__control_service_response_communicator = MPIEnvironment.mpi_comm_world_factory() # Register exit handler in case MPICommandClient exit handler is not initialized # NOTE: It is not guaranteed that __del__() methods are called # for objects that still exist when the interpreter exits. self.__finalize_mpi_environment = True # jagonzal: This exit function must be registered only for the client if MPIEnvironment.is_mpi_client: atexit.register(self.finalize_server_and_client_mpi_environment) except Exception, instance: self.__command_channel = -1 self.__ping_status_channel = -1 self.__control_service_channel = -1 self.__command_request_communicator = None self.__ping_status_request_communicator = None self.__command_response_communicator = None self.__ping_status_response_communicator = None self.__control_service_request_communicator = None self.__control_service_response_communicator = None msg = "Exception initializing MPICommunicator at processor with rank " msg = msg + "%s: %s" % (str(MPIEnvironment.mpi_processor_rank),str(instance)) raise Exception,msg
def __init__(self): # Initialize communicators try: self.__command_channel = 2 self.__ping_status_channel = 1 self.__control_service_channel = 0 self.__command_request_communicator = MPIEnvironment.mpi_comm_world_factory() self.__ping_status_request_communicator = MPIEnvironment.mpi_comm_world_factory() self.__command_response_communicator = MPIEnvironment.mpi_comm_world_factory() self.__ping_status_response_communicator = MPIEnvironment.mpi_comm_world_factory() self.__control_service_request_communicator = MPIEnvironment.mpi_comm_world_factory() self.__control_service_response_communicator = MPIEnvironment.mpi_comm_world_factory() # Register exit handler in case MPICommandClient exit handler is not initialized # NOTE: It is not guaranteed that __del__() methods are called # for objects that still exist when the interpreter exits. self.__finalize_mpi_environment = True # jagonzal: This exit function must be registered only for the client if MPIEnvironment.is_mpi_client: atexit.register(self.finalize_server_and_client_mpi_environment) except Exception, instance: self.__command_channel = -1 self.__ping_status_channel = -1 self.__control_service_channel = -1 self.__command_request_communicator = None self.__ping_status_request_communicator = None self.__command_response_communicator = None self.__ping_status_response_communicator = None self.__control_service_request_communicator = None self.__control_service_response_communicator = None msg = "Exception initializing MPICommunicator at processor with rank " msg = msg + "%s: %s" % (str(MPIEnvironment.mpi_processor_rank),str(instance)) raise Exception(msg)
def __monitor_status_service(self): casalog_call_origin = "MPIMonitorClient::monitor_status_service" # Mark service as running self.__monitor_status_service_running = True mpi_server_rank_list = MPIEnvironment.mpi_server_rank_list() while (self.__monitor_status_service_on): # Iterate over servers for rank in mpi_server_rank_list: # Send ping status request if there is none pending if not self.__server_status_list[rank]['pong_pending']: try: self.__communicator.ping_status_request_send( server=rank) self.__server_status_list[rank][ 'ping_time'] = time.time() self.__server_status_list[rank][ 'pong_pending'] = True self.__server_status_list[rank]['pong_checks'] = 0 except: formatted_traceback = traceback.format_exc() casalog.post( "Exception sending ping status request to server %s: %s" % (str(rank), str(formatted_traceback)), "SEVERE", casalog_call_origin) else: self.__server_status_list[rank]['pong_checks'] += 1 elapsed_time = MPIEnvironment.mpi_monitor_status_service_heartbeat elapsed_time *= self.__server_status_list[rank][ 'pong_checks'] # elapsed_time = int(round(time.time() - self.__server_status_list[rank]['ping_time'])) # Notify when a server reaches timeout condition if (MPIEnvironment. mpi_monitor_status_service_timeout_enabled and (elapsed_time > MPIEnvironment.mpi_monitor_status_service_timeout) and (not self.__server_status_list[rank]['timeout'])): casalog.post( "Ping status response from server %s not received " "in the last %ss. Setting its status to 'timeout'" % (str(rank), str(int(elapsed_time))), "SEVERE", casalog_call_origin) self.__server_status_list[rank]['timeout'] = True # Sleep before next round time.sleep(MPIEnvironment.mpi_monitor_status_service_heartbeat) # Mark service as not running self.__monitor_status_service_running = False
def get_nodes(self): # Get list of all server Ids servers_list = MPIEnvironment.mpi_server_rank_list() # Get list of hostnames for each server Id hostnames = [] for server in servers_list: hostnames.append(self.__monitor_client.get_server_status_keyword(server,'processor')) # Remove duplicates to get the list of host used in general hostnames_not_repeated = list(set(hostnames)) return hostnames_not_repeated
def push(self, variables, targets=None): """Set variables in a sub-set of engines""" # Determine target servers target_server = [] if targets is None or targets == 'all': target_server = MPIEnvironment.mpi_server_rank_list() else: target_server = list(targets) # Push variables ret = self.__command_client.push_command_request( "push", block=True, target_server=target_server, parameters=dict(variables)) # Return request result for further processing return ret
def __send_start_service_signal(self): casalog_call_origin = "MPICommandClient::send_start_service_signal" casalog.post("Sending start service signal to all servers", "INFO", casalog_call_origin) # Prepare stop service request request = {} request["signal"] = "start" request["casa"] = casa # The request contains the global casa dictionary to be used by the servers request["logmode"] = self.__log_mode # Send request to all servers self.__communicator.control_service_request_broadcast(request, casalog) # Then wait until all servers have handled the signal mpi_server_rank_list = MPIEnvironment.mpi_server_rank_list() while len(mpi_server_rank_list) > 0: response_available = False response_available = self.__communicator.control_service_response_probe() if response_available: # Receive start service response to know what server has started response = self.__communicator.control_service_response_recv() rank = response["rank"] # Store processor name and PID info in the MPIMonitorClient self.__monitor_client.set_server_status_keyword(rank, "processor", response["processor"]) self.__monitor_client.set_server_status_keyword(rank, "pid", response["pid"]) # Remove server from list mpi_server_rank_list.remove(rank) # Communicate that server response to start service signal has been received casalog.post( "Server with rank %s started at %s with PID %s" % (str(rank), str(response["processor"]), str(response["pid"])), "INFO", casalog_call_origin, ) else: time.sleep(MPIEnvironment.mpi_check_start_service_sleep_time) casalog.post("Received response from all servers to start service signal", "INFO", casalog_call_origin)
def __monitor_status_service(self): casalog_call_origin = "MPIMonitorClient::monitor_status_service" # Mark service as running self.__monitor_status_service_running = True mpi_server_rank_list = MPIEnvironment.mpi_server_rank_list() while (self.__monitor_status_service_on): # Iterate over servers for rank in mpi_server_rank_list: # Send ping status request if there is none pending if not self.__server_status_list[rank]['pong_pending']: try: self.__communicator.ping_status_request_send(server=rank) self.__server_status_list[rank]['ping_time'] = time.time() self.__server_status_list[rank]['pong_pending'] = True self.__server_status_list[rank]['pong_checks'] = 0 except: formatted_traceback = traceback.format_exc() casalog.post("Exception sending ping status request to server %s: %s" % (str(rank),str(formatted_traceback)),"SEVERE",casalog_call_origin) else: self.__server_status_list[rank]['pong_checks'] += 1 elapsed_time = MPIEnvironment.mpi_monitor_status_service_heartbeat elapsed_time *= self.__server_status_list[rank]['pong_checks'] # elapsed_time = int(round(time.time() - self.__server_status_list[rank]['ping_time'])) # Notify when a server reaches timeout condition if ((elapsed_time > MPIEnvironment.mpi_monitor_status_service_timeout) and (not self.__server_status_list[rank]['timeout'])): casalog.post("Ping status response from server %s not received in the last %ss" % (str(rank),str(int(elapsed_time))),"SEVERE",casalog_call_origin) self.__server_status_list[rank]['timeout'] = True # Sleep before next round time.sleep(MPIEnvironment.mpi_monitor_status_service_heartbeat) # Mark service as not running self.__monitor_status_service_running = False
def __validate_target_servers(self, target_server): casalog_call_origin = "MPICommandClient::validate_target_servers" # Get list of valid MPIServer ranks mpi_server_rank_list = MPIEnvironment.mpi_server_rank_list() # Check if target server is a list of integers if ( isinstance(target_server, list) and (len(target_server) >= 1) and all(isinstance(server, int) for server in target_server) ): # Check if server is within the server rank list for server in target_server: if server not in mpi_server_rank_list: casalog.post("Server #%s does not exist" % str(server), "SEVERE", casalog_call_origin) return None elif self.__monitor_client.get_server_status_keyword(server, "timeout"): casalog.post("Server #%s has timed out" % str(server), "SEVERE", casalog_call_origin) return None # Return input list validated return target_server # Check if target server is an integer elif isinstance(target_server, int): # Check if server is within the server rank list if target_server in mpi_server_rank_list: return [target_server] else: casalog.post("Server #%s does not exist" % str(target_server), "SEVERE", casalog_call_origin) return None else: casalog.post( "target_server has wrong format (%s), accepted formats are int and list(int)" % str(type(target_server)), "SEVERE", casalog_call_origin, ) return None
def pgc(self,commands,block=True): """This method has two modes: - When the input command is a dictionary of commands execute each command taking the dictionary key as target node (Equivalent to various calls to odo/execute) - When commands is a single command execute it in all engines """ # Get list of jobs and commands ret = None if isinstance(commands,dict): # Spawn jobs in non-blocking mode jobId_list = [] for server in commands: cmd = commands[server] jobId = self.__command_client.push_command_request( cmd, block=False, target_server=server) jobId_list.append(jobId[0]) # If user requests blocking mode wait until execution is completed ret = self.__command_client.get_command_response(jobId_list,block=block,verbose=True) else: cmd = commands # Get list of all servers all_servers_list = MPIEnvironment.mpi_server_rank_list() # Execute command in all servers ret = self.__command_client.push_command_request( cmd, block=block, target_server=all_servers_list) # Return result return ret
def __send_start_service_signal(self): casalog_call_origin = "MPICommandClient::send_start_service_signal" casalog.post("Sending start service signal to all servers","INFO",casalog_call_origin) # Prepare stop service request request = {} request['signal'] = 'start' request['casa'] = casa # The request contains the global casa dictionary to be used by the servers request['logmode'] = self.__log_mode # Send request to all servers self.__communicator.control_service_request_broadcast(request,casalog) # Then wait until all servers have handled the signal mpi_server_rank_list = MPIEnvironment.mpi_server_rank_list() while len(mpi_server_rank_list)>0: response_available = False response_available = self.__communicator.control_service_response_probe() if response_available: # Receive start service response to know what server has started response = self.__communicator.control_service_response_recv() rank = response['rank'] # Store processor name and PID info in the MPIMonitorClient self.__monitor_client.set_server_status_keyword(rank,'processor',response['processor']) self.__monitor_client.set_server_status_keyword(rank,'pid',response['pid']) # Remove server from list mpi_server_rank_list.remove(rank) # Communicate that server response to start service signal has been received casalog.post("Server with rank %s started at %s with PID %s" % (str(rank),str(response['processor']),str(response['pid'])), "INFO",casalog_call_origin) else: time.sleep(MPIEnvironment.mpi_check_start_service_sleep_time) casalog.post("Received response from all servers to start service signal","INFO",casalog_call_origin)
def serve(self): casalog_call_origin = "MPICommandServer::serve" # First start command and ping status services casalog.post("Starting services...","INFO",casalog_call_origin) self.start_services() # Notify to MPICommandClient that service is up and running self.__communicator.control_service_response_send(response=self.__monitor_server.get_status()) # Keep serving until a stop signal service is received control_service_request = {} stop_service_requested = False while ((not stop_service_requested) and (not self.__monitor_server.get_client_timeout())): # Check if there is an incoming control service msg msg_available = False try: msg_available = self.__communicator.control_service_request_probe() except: msg_available = False formatted_traceback = traceback.format_exc() casalog.post("Exception checking if control service msg is available: %s" % str(formatted_traceback),"SEVERE",casalog_call_origin) # Notify to MPICommandClient that control signal has been processed if msg_available: # Receive control service msg msg_received = False control_service_request = {} try: control_service_request = self.__communicator.control_service_request_recv() msg_received = True except: msg_received = False formatted_traceback = traceback.format_exc() casalog.post("Exception receiving control service msg: %s" % str(formatted_traceback),"SEVERE",casalog_call_origin) continue # Process control service msg cmd = None send_response = False if msg_received: try: cmd = control_service_request['command'] send_response = control_service_request['send_response'] code = compile(cmd, casalog_call_origin, 'exec') exec(code) casalog.post("Control signal %s successfully handled by server %s" % (str(cmd),str(MPIEnvironment.mpi_processor_rank)), "INFO",casalog_call_origin) except: formatted_traceback = traceback.format_exc() casalog.post("Exception handling control signal command %s in server %s: %s" % (str(control_service_request), str(MPIEnvironment.mpi_processor_rank), str(formatted_traceback)), "SEVERE",casalog_call_origin) # Notify to MPICommandClient that control signal has been processed if send_response: try: self.__communicator.control_service_response_send(response=self.__monitor_server.get_status()) except: formatted_traceback = traceback.format_exc() casalog.post("Exception sending response to control signal command %s in server %s: %s" % (str(cmd),str(MPIEnvironment.mpi_processor_rank),str(formatted_traceback)), "SEVERE",casalog_call_origin) time.sleep(MPIEnvironment.mpi_stop_service_sleep_time) # Process stop service request if stop_service_requested: # Check if force mode is needed force_command_request_interruption = control_service_request['force_command_request_interruption'] finalize_mpi_environment = control_service_request['finalize_mpi_environment'] busy = self.__monitor_server.get_status('busy') if force_command_request_interruption and busy: casalog.post("force-stop service signal received, stopping services, " + "command request handler service will be interrupted...","INFO",casalog_call_origin) else: force_command_request_interruption = False casalog.post("stop service signal received, stopping services...","INFO",casalog_call_origin) else: force_command_request_interruption = True casalog.post("client timeout, forcing disconnection, " + "command request handler service will be interrupted.." ,"INFO",casalog_call_origin) # Stop services self.stop_services(force_command_request_interruption) # Finalize MPI environment if finalize_mpi_environment: try: casalog.post("Going to finalize MPI environment","INFO",casalog_call_origin) MPIEnvironment.finalize_mpi_environment() except: formatted_traceback = traceback.format_exc() casalog.post("Exception finalizing MPI environment %s" % str(formatted_traceback),"SEVERE",casalog_call_origin) # Exit casalog.post("Exiting","INFO",casalog_call_origin)
def get_engines(self): return MPIEnvironment.mpi_server_rank_list()
def stop_services(self, force_command_request_interruption=False): # jagonzal: This method is called by the atexit module and if it fails it # causes ipython to crash, producing a report and waiting for user input # so we cannot risk under any circumstances such an event try: casalog_call_origin = "MPICommandClient::stop_services" if self.__life_cycle_state == 0: casalog.post("Services not started", "WARN", casalog_call_origin) return elif self.__life_cycle_state == 2: casalog.post("MPICommandClient life cycle finalized", "WARN", casalog_call_origin) return # Check if any server is in timeout condition before stopping the monitoring service server_rank_timeout = self.__monitor_client.get_server_timeout() finalize_mpi_environment = True if len(server_rank_timeout) > 0: finalize_mpi_environment = False force_command_request_interruption = True # Stop client monitoring services self.__monitor_client.stop_services() # Notify command requests which are going to be interrupted for command_request_id in self.__command_request_list: if not self.__command_response_list.has_key(command_request_id): server = self.__command_request_list[command_request_id]["server"] status = self.__command_request_list[command_request_id]["status"] casalog.post( "Aborting command request with id# %s: %s" % (str(command_request_id), str(self.__command_request_list[command_request_id])), "SEVERE", casalog_call_origin, ) # Stop client command request-response services self.__stop_command_request_queue_service() self.__stop_command_response_handler_service() # Shutdown plotms process self.__send_control_signal( {"command": "pm.killApp()", "signal": "process_control"}, check_response=True ) # Shutdown virtual frame buffer self.__send_control_signal( {"command": "self.stop_virtual_frame_buffer()", "signal": "process_control"}, check_response=True ) # Send stop signal to servers self.__send_control_signal( { "command": "stop_service_requested = True", "signal": "stop", "force_command_request_interruption": force_command_request_interruption, "finalize_mpi_environment": finalize_mpi_environment, }, check_response=False, ) # Finalize MPI environment if finalize_mpi_environment: try: casalog.post("Going to finalize MPI environment", "INFO", casalog_call_origin) MPIEnvironment.finalize_mpi_environment() except: formatted_traceback = traceback.format_exc() casalog.post( "Exception finalizing MPI environment %s" % str(formatted_traceback), "SEVERE", casalog_call_origin, ) else: casalog.post( "MPIServers with rank %s are in timeout condition, skipping MPI_Finalize()" % str(server_rank_timeout), "SEVERE", casalog_call_origin, ) # UnMark MPI environment to be finalized by the MPICommunicator destructor # (Either because it is already finalized or due to a # server not responsive that prevents graceful finalization) self.__communicator.set_finalize_mpi_environment(False) # Set life cycle state self.__life_cycle_state = 2 casalog.post("All services stopped", "INFO", casalog_call_origin) except: formatted_traceback = traceback.format_exc() print "Unhandled exception in MPICommandClient::stop_services %s" % (formatted_traceback)
def serve(self): casalog_call_origin = "MPICommandServer::serve" # First start command and ping status services casalog.post("Starting services...", "INFO", casalog_call_origin) self.start_services() # Notify to MPICommandClient that service is up and running self.__communicator.control_service_response_send(response=self.__monitor_server.get_status()) # Keep serving until a stop signal service is received control_service_request = {} stop_service_requested = False while (not stop_service_requested) and (not self.__monitor_server.get_client_timeout()): # Check if there is an incoming control service msg msg_available = False try: msg_available = self.__communicator.control_service_request_probe() except: msg_available = False formatted_traceback = traceback.format_exc() casalog.post( "Exception checking if control service msg is available: %s" % str(formatted_traceback), "SEVERE", casalog_call_origin, ) # Notify to MPICommandClient that control signal has been processed if msg_available: # Receive control service msg msg_received = False control_service_request = {} try: control_service_request = self.__communicator.control_service_request_recv() msg_received = True except: msg_received = False formatted_traceback = traceback.format_exc() casalog.post( "Exception receiving control service msg: %s" % str(formatted_traceback), "SEVERE", casalog_call_origin, ) continue # Process control service msg cmd = None send_response = False if msg_received: try: cmd = control_service_request["command"] send_response = control_service_request["send_response"] code = compile(cmd, casalog_call_origin, "exec") exec(code) casalog.post( "Control signal %s successfully handled by server %s" % (str(cmd), str(MPIEnvironment.mpi_processor_rank)), "INFO", casalog_call_origin, ) except: formatted_traceback = traceback.format_exc() casalog.post( "Exception handling control signal command %s in server %s: %s" % ( str(control_service_request), str(MPIEnvironment.mpi_processor_rank), str(formatted_traceback), ), "SEVERE", casalog_call_origin, ) # Notify to MPICommandClient that control signal has been processed if send_response: try: self.__communicator.control_service_response_send( response=self.__monitor_server.get_status() ) except: formatted_traceback = traceback.format_exc() casalog.post( "Exception sending response to control signal command %s in server %s: %s" % (str(cmd), str(MPIEnvironment.mpi_processor_rank), str(formatted_traceback)), "SEVERE", casalog_call_origin, ) time.sleep(MPIEnvironment.mpi_stop_service_sleep_time) # Process stop service request if stop_service_requested: # Check if force mode is needed force_command_request_interruption = control_service_request["force_command_request_interruption"] finalize_mpi_environment = control_service_request["finalize_mpi_environment"] busy = self.__monitor_server.get_status("busy") if force_command_request_interruption and busy: casalog.post( "force-stop service signal received, stopping services, " + "command request handler service will be interrupted...", "INFO", casalog_call_origin, ) else: force_command_request_interruption = False casalog.post("stop service signal received, stopping services...", "INFO", casalog_call_origin) else: force_command_request_interruption = True casalog.post( "client timeout, forcing disconnection, " + "command request handler service will be interrupted..", "INFO", casalog_call_origin, ) # Stop services self.stop_services(force_command_request_interruption) # Finalize MPI environment if finalize_mpi_environment: try: casalog.post("Going to finalize MPI environment", "INFO", casalog_call_origin) MPIEnvironment.finalize_mpi_environment() except: formatted_traceback = traceback.format_exc() casalog.post( "Exception finalizing MPI environment %s" % str(formatted_traceback), "SEVERE", casalog_call_origin, ) # Exit casalog.post("Exiting", "INFO", casalog_call_origin)
def stop_services(self,force_command_request_interruption=False): # jagonzal: This method is called by the atexit module and if it fails it # causes ipython to crash, producing a report and waiting for user input # so we cannot risk under any circumstances such an event try: casalog_call_origin = "MPICommandClient::stop_services" if self.__life_cycle_state == 0: casalog.post("Services not started","WARN",casalog_call_origin) return elif self.__life_cycle_state == 2: casalog.post("MPICommandClient life cycle finalized","WARN",casalog_call_origin) return # Check if any server is in timeout condition before stopping the monitoring service server_rank_timeout = self.__monitor_client.get_server_timeout() finalize_mpi_environment = True if len(server_rank_timeout) > 0: finalize_mpi_environment = False force_command_request_interruption = True # Stop client monitoring services self.__monitor_client.stop_services() # Notify command requests which are going to be interrupted for command_request_id in self.__command_request_list: if not self.__command_response_list.has_key(command_request_id): server = self.__command_request_list[command_request_id]['server'] status = self.__command_request_list[command_request_id]['status'] casalog.post("Aborting command request with id# %s: %s" % (str(command_request_id),str(self.__command_request_list[command_request_id])), "SEVERE",casalog_call_origin) # Stop client command request-response services self.__stop_command_request_queue_service() self.__stop_command_response_handler_service() # Shutdown plotms process self.__send_control_signal({'command':'pm.killApp()', 'signal':'process_control'}, check_response=True) # Shutdown virtual frame buffer self.__send_control_signal({'command':'self.stop_virtual_frame_buffer()', 'signal':'process_control'}, check_response=True) # Send stop signal to servers self.__send_control_signal({'command':'stop_service_requested = True', 'signal':'stop', 'force_command_request_interruption':force_command_request_interruption, 'finalize_mpi_environment':finalize_mpi_environment}, check_response=False) # Finalize MPI environment if finalize_mpi_environment: try: casalog.post("Going to finalize MPI environment","INFO",casalog_call_origin) MPIEnvironment.finalize_mpi_environment() except: formatted_traceback = traceback.format_exc() casalog.post("Exception finalizing MPI environment %s" % str(formatted_traceback),"SEVERE",casalog_call_origin) else: casalog.post("MPIServers with rank %s are in timeout condition, skipping MPI_Finalize()" % str(server_rank_timeout),"SEVERE",casalog_call_origin) # UnMark MPI environment to be finalized by the MPICommunicator destructor # (Either because it is already finalized or due to a # server not responsive that prevents graceful finalization) self.__communicator.set_finalize_mpi_environment(False) # Set life cycle state self.__life_cycle_state = 2 casalog.post("All services stopped","INFO",casalog_call_origin) except: formatted_traceback = traceback.format_exc() print "Unhandled exception in MPICommandClient::stop_services %s" %(formatted_traceback)