Esempio n. 1
0
 def __init__(self,start_services=True):
                          
     # Initialize server status state
     self.__server_status_list = {}
     mpi_server_rank_list = MPIEnvironment.mpi_server_rank_list()
     for rank in mpi_server_rank_list:
         self.__server_status_list[rank] = {}
         self.__server_status_list[rank]['rank'] = rank
         self.__server_status_list[rank]['processor'] = None
         self.__server_status_list[rank]['pid'] = None
         self.__server_status_list[rank]['busy'] = False
         self.__server_status_list[rank]['command'] = None
         self.__server_status_list[rank]['command_start_time'] = None
         self.__server_status_list[rank]['pong_pending'] = False
         self.__server_status_list[rank]['ping_time'] = None
         self.__server_status_list[rank]['pong_time'] = None          
         self.__server_status_list[rank]['timeout'] = False      
            
     # Initialize monitor service state
     self.__monitor_status_service_on = False
     self.__monitor_status_service_running = False
     self.__monitor_status_service_thread = None          
     
     # Initialize ping status response handler service state
     self.__ping_status_response_handler_service_on = False
     self.__ping_status_response_handler_service_running = False
     self.__ping_status_response_handler_service_thread = None  
     
     # Instantiate MPICommunicator reference
     self.__communicator = MPICommunicator()
     
     # Automatically start services
     if start_services:
         self.start_services()
Esempio n. 2
0
        def __init__(self, start_services=True):

            # Initialize server status state
            self.__server_status_list = {}
            mpi_server_rank_list = MPIEnvironment.mpi_server_rank_list()
            for rank in mpi_server_rank_list:
                self.__server_status_list[rank] = {}
                self.__server_status_list[rank]['rank'] = rank
                self.__server_status_list[rank]['processor'] = None
                self.__server_status_list[rank]['pid'] = None
                self.__server_status_list[rank]['busy'] = False
                self.__server_status_list[rank]['command'] = None
                self.__server_status_list[rank]['command_start_time'] = None
                self.__server_status_list[rank]['pong_pending'] = False
                self.__server_status_list[rank]['ping_time'] = None
                self.__server_status_list[rank]['pong_time'] = None
                self.__server_status_list[rank]['timeout'] = False

            # Initialize monitor service state
            self.__monitor_status_service_on = False
            self.__monitor_status_service_running = False
            self.__monitor_status_service_thread = None

            # Initialize ping status response handler service state
            self.__ping_status_response_handler_service_on = False
            self.__ping_status_response_handler_service_running = False
            self.__ping_status_response_handler_service_thread = None

            # Instantiate MPICommunicator reference
            self.__communicator = MPICommunicator()

            # Automatically start services
            if start_services:
                self.start_services()
Esempio n. 3
0
 def __validate_target_servers(self,target_server):
     
     casalog_call_origin = "MPICommandClient::validate_target_servers"
     
     # Get list of valid MPIServer ranks
     mpi_server_rank_list = MPIEnvironment.mpi_server_rank_list()
     
     # Check if target server is a list of integers
     if isinstance(target_server,list) and (len(target_server)>=1) and all(isinstance(server, int) for server in target_server):
         # Check if server is within the server rank list
         for server in target_server:
             if server not in mpi_server_rank_list:
                 casalog.post("Server #%s does not exist" % str(server),"SEVERE",casalog_call_origin)
                 return None
             elif self.__monitor_client.get_server_status_keyword(server,'timeout'):
                 casalog.post("Server #%s has timed out" % str(server),"SEVERE",casalog_call_origin)
                 return None
         # Return input list validated
         return target_server
     # Check if target server is an integer
     elif isinstance(target_server,int):
         # Check if server is within the server rank list
         if target_server in mpi_server_rank_list:
             return [target_server]
         else:
             casalog.post("Server #%s does not exist" % str(target_server),"SEVERE",casalog_call_origin)
             return None
     else:
         casalog.post("target_server has wrong format (%s), accepted formats are int and list(int)" 
                      % str(type(target_server)),"SEVERE",casalog_call_origin)                
         return None        
Esempio n. 4
0
 def control_service_request_broadcast(self,request,logger=None):
     
     mpi_server_rank_list = MPIEnvironment.mpi_server_rank_list()
     for rank in mpi_server_rank_list:
         if logger is not None:
             logger.post("Sending %s service signal to server %s" 
                         % (request['signal'],str(rank)),"DEBUG","MPICommunicator::control_service_request_send_all")
         self.control_service_request_send(request=request,server=rank)      
Esempio n. 5
0
 def control_service_request_broadcast(self,request,logger=None):
     
     mpi_server_rank_list = MPIEnvironment.mpi_server_rank_list()
     for rank in mpi_server_rank_list:
         if logger is not None:
             logger.post("Sending %s service signal to server %s" 
                         % (request['signal'],str(rank)),"DEBUG","MPICommunicator::control_service_request_send_all")
         self.control_service_request_send(request=request,server=rank)      
Esempio n. 6
0
 def __init__(self):
     
     # Initialize communicators
     try:
         self.__command_channel = 2
         self.__ping_status_channel = 1
         self.__control_service_channel = 0
         self.__command_request_communicator = MPIEnvironment.mpi_comm_world_factory()
         self.__ping_status_request_communicator = MPIEnvironment.mpi_comm_world_factory()
         self.__command_response_communicator = MPIEnvironment.mpi_comm_world_factory()
         self.__ping_status_response_communicator = MPIEnvironment.mpi_comm_world_factory()
         self.__control_service_request_communicator = MPIEnvironment.mpi_comm_world_factory()
         self.__control_service_response_communicator = MPIEnvironment.mpi_comm_world_factory()
         
         # Register exit handler in case MPICommandClient exit handler is not initialized
         # NOTE: It is not guaranteed that __del__() methods are called 
         #       for objects that still exist when the interpreter exits.
         self.__finalize_mpi_environment = True
         # jagonzal: This exit function must be registered only for the client
         if MPIEnvironment.is_mpi_client:                   
             atexit.register(self.finalize_server_and_client_mpi_environment)
     
     except Exception, instance:
         self.__command_channel = -1
         self.__ping_status_channel = -1
         self.__control_service_channel = -1               
         self.__command_request_communicator = None
         self.__ping_status_request_communicator = None
         self.__command_response_communicator = None
         self.__ping_status_response_communicator = None
         self.__control_service_request_communicator = None
         self.__control_service_response_communicator = None
         msg = "Exception initializing MPICommunicator at processor with rank "
         msg = msg + "%s: %s" % (str(MPIEnvironment.mpi_processor_rank),str(instance))
         raise Exception,msg
Esempio n. 7
0
 def __init__(self):
     
     # Initialize communicators
     try:
         self.__command_channel = 2
         self.__ping_status_channel = 1
         self.__control_service_channel = 0
         self.__command_request_communicator = MPIEnvironment.mpi_comm_world_factory()
         self.__ping_status_request_communicator = MPIEnvironment.mpi_comm_world_factory()
         self.__command_response_communicator = MPIEnvironment.mpi_comm_world_factory()
         self.__ping_status_response_communicator = MPIEnvironment.mpi_comm_world_factory()
         self.__control_service_request_communicator = MPIEnvironment.mpi_comm_world_factory()
         self.__control_service_response_communicator = MPIEnvironment.mpi_comm_world_factory()
         
         # Register exit handler in case MPICommandClient exit handler is not initialized
         # NOTE: It is not guaranteed that __del__() methods are called 
         #       for objects that still exist when the interpreter exits.
         self.__finalize_mpi_environment = True
         # jagonzal: This exit function must be registered only for the client
         if MPIEnvironment.is_mpi_client:                   
             atexit.register(self.finalize_server_and_client_mpi_environment)
     
     except Exception, instance:
         self.__command_channel = -1
         self.__ping_status_channel = -1
         self.__control_service_channel = -1               
         self.__command_request_communicator = None
         self.__ping_status_request_communicator = None
         self.__command_response_communicator = None
         self.__ping_status_response_communicator = None
         self.__control_service_request_communicator = None
         self.__control_service_response_communicator = None
         msg = "Exception initializing MPICommunicator at processor with rank "
         msg = msg + "%s: %s" % (str(MPIEnvironment.mpi_processor_rank),str(instance))
         raise Exception(msg)
Esempio n. 8
0
        def __monitor_status_service(self):

            casalog_call_origin = "MPIMonitorClient::monitor_status_service"

            # Mark service as running
            self.__monitor_status_service_running = True

            mpi_server_rank_list = MPIEnvironment.mpi_server_rank_list()

            while (self.__monitor_status_service_on):
                # Iterate over servers
                for rank in mpi_server_rank_list:
                    # Send ping status request if there is none pending
                    if not self.__server_status_list[rank]['pong_pending']:
                        try:
                            self.__communicator.ping_status_request_send(
                                server=rank)
                            self.__server_status_list[rank][
                                'ping_time'] = time.time()
                            self.__server_status_list[rank][
                                'pong_pending'] = True
                            self.__server_status_list[rank]['pong_checks'] = 0
                        except:
                            formatted_traceback = traceback.format_exc()
                            casalog.post(
                                "Exception sending ping status request to server %s: %s"
                                % (str(rank), str(formatted_traceback)),
                                "SEVERE", casalog_call_origin)
                    else:
                        self.__server_status_list[rank]['pong_checks'] += 1
                        elapsed_time = MPIEnvironment.mpi_monitor_status_service_heartbeat
                        elapsed_time *= self.__server_status_list[rank][
                            'pong_checks']
                        # elapsed_time = int(round(time.time() - self.__server_status_list[rank]['ping_time']))
                        # Notify when a server reaches timeout condition
                        if (MPIEnvironment.
                                mpi_monitor_status_service_timeout_enabled and
                            (elapsed_time >
                             MPIEnvironment.mpi_monitor_status_service_timeout)
                                and
                            (not self.__server_status_list[rank]['timeout'])):
                            casalog.post(
                                "Ping status response from server %s not received "
                                "in the last %ss. Setting its status to 'timeout'"
                                % (str(rank), str(int(elapsed_time))),
                                "SEVERE", casalog_call_origin)
                            self.__server_status_list[rank]['timeout'] = True
                # Sleep before next round
                time.sleep(MPIEnvironment.mpi_monitor_status_service_heartbeat)

            # Mark service as not running
            self.__monitor_status_service_running = False
Esempio n. 9
0
 def get_nodes(self):
     
     # Get list of all server Ids
     servers_list = MPIEnvironment.mpi_server_rank_list()
     
     # Get list of hostnames for each server Id
     hostnames = []
     for server in servers_list:
         hostnames.append(self.__monitor_client.get_server_status_keyword(server,'processor'))
         
     # Remove duplicates to get the list of host used in general
     hostnames_not_repeated = list(set(hostnames))
     
     return hostnames_not_repeated
Esempio n. 10
0
 def push(self, variables, targets=None):
     """Set variables in a sub-set of engines"""
     
     # Determine target servers
     target_server = []
     if targets is None or targets == 'all':
         target_server = MPIEnvironment.mpi_server_rank_list()
     else:
         target_server = list(targets)
         
     # Push variables
     ret = self.__command_client.push_command_request(   "push",
                                                         block=True,
                                                         target_server=target_server,
                                                         parameters=dict(variables))
     
     # Return request result for further processing
     return ret
Esempio n. 11
0
        def __send_start_service_signal(self):

            casalog_call_origin = "MPICommandClient::send_start_service_signal"

            casalog.post("Sending start service signal to all servers", "INFO", casalog_call_origin)

            # Prepare stop service request
            request = {}
            request["signal"] = "start"
            request["casa"] = casa  # The request contains the global casa dictionary to be used by the servers
            request["logmode"] = self.__log_mode

            # Send request to all servers
            self.__communicator.control_service_request_broadcast(request, casalog)

            # Then wait until all servers have handled the signal
            mpi_server_rank_list = MPIEnvironment.mpi_server_rank_list()
            while len(mpi_server_rank_list) > 0:
                response_available = False
                response_available = self.__communicator.control_service_response_probe()
                if response_available:
                    # Receive start service response to know what server has started
                    response = self.__communicator.control_service_response_recv()
                    rank = response["rank"]
                    # Store processor name and PID info in the MPIMonitorClient
                    self.__monitor_client.set_server_status_keyword(rank, "processor", response["processor"])
                    self.__monitor_client.set_server_status_keyword(rank, "pid", response["pid"])
                    # Remove server from list
                    mpi_server_rank_list.remove(rank)
                    # Communicate that server response to start service signal has been received
                    casalog.post(
                        "Server with rank %s started at %s with PID %s"
                        % (str(rank), str(response["processor"]), str(response["pid"])),
                        "INFO",
                        casalog_call_origin,
                    )
                else:
                    time.sleep(MPIEnvironment.mpi_check_start_service_sleep_time)

            casalog.post("Received response from all servers to start service signal", "INFO", casalog_call_origin)
Esempio n. 12
0
 def __monitor_status_service(self):
     
     casalog_call_origin = "MPIMonitorClient::monitor_status_service"
     
     # Mark service as running
     self.__monitor_status_service_running = True            
     
     mpi_server_rank_list = MPIEnvironment.mpi_server_rank_list()
                   
     while (self.__monitor_status_service_on):
         # Iterate over servers     
         for rank in mpi_server_rank_list:
             # Send ping status request if there is none pending
             if not self.__server_status_list[rank]['pong_pending']:
                 try:
                     self.__communicator.ping_status_request_send(server=rank)
                     self.__server_status_list[rank]['ping_time'] = time.time()
                     self.__server_status_list[rank]['pong_pending'] = True    
                     self.__server_status_list[rank]['pong_checks'] = 0           
                 except:
                     formatted_traceback = traceback.format_exc()
                     casalog.post("Exception sending ping status request to server %s: %s" % 
                                  (str(rank),str(formatted_traceback)),"SEVERE",casalog_call_origin)
             else:
                 self.__server_status_list[rank]['pong_checks'] += 1
                 elapsed_time = MPIEnvironment.mpi_monitor_status_service_heartbeat
                 elapsed_time *= self.__server_status_list[rank]['pong_checks']
                 # elapsed_time = int(round(time.time() - self.__server_status_list[rank]['ping_time']))                        
                 # Notify when a server reaches timeout condition
                 if ((elapsed_time > MPIEnvironment.mpi_monitor_status_service_timeout) and 
                     (not self.__server_status_list[rank]['timeout'])):
                     casalog.post("Ping status response from server %s not received in the last %ss" % 
                                  (str(rank),str(int(elapsed_time))),"SEVERE",casalog_call_origin)
                     self.__server_status_list[rank]['timeout'] = True
         # Sleep before next round
         time.sleep(MPIEnvironment.mpi_monitor_status_service_heartbeat)
     
     # Mark service as not running
     self.__monitor_status_service_running = False            
Esempio n. 13
0
        def __validate_target_servers(self, target_server):

            casalog_call_origin = "MPICommandClient::validate_target_servers"

            # Get list of valid MPIServer ranks
            mpi_server_rank_list = MPIEnvironment.mpi_server_rank_list()

            # Check if target server is a list of integers
            if (
                isinstance(target_server, list)
                and (len(target_server) >= 1)
                and all(isinstance(server, int) for server in target_server)
            ):
                # Check if server is within the server rank list
                for server in target_server:
                    if server not in mpi_server_rank_list:
                        casalog.post("Server #%s does not exist" % str(server), "SEVERE", casalog_call_origin)
                        return None
                    elif self.__monitor_client.get_server_status_keyword(server, "timeout"):
                        casalog.post("Server #%s has timed out" % str(server), "SEVERE", casalog_call_origin)
                        return None
                # Return input list validated
                return target_server
            # Check if target server is an integer
            elif isinstance(target_server, int):
                # Check if server is within the server rank list
                if target_server in mpi_server_rank_list:
                    return [target_server]
                else:
                    casalog.post("Server #%s does not exist" % str(target_server), "SEVERE", casalog_call_origin)
                    return None
            else:
                casalog.post(
                    "target_server has wrong format (%s), accepted formats are int and list(int)"
                    % str(type(target_server)),
                    "SEVERE",
                    casalog_call_origin,
                )
                return None
Esempio n. 14
0
 def pgc(self,commands,block=True):
     """This method has two modes:
     
        - When the input command is a dictionary of commands execute  
          each command taking the dictionary key as target node
          (Equivalent to various calls to odo/execute)  
          
        - When commands is a single command execute it in all engines          
     """
     
     # Get list of jobs and commands
     ret = None
     if isinstance(commands,dict):
         
         # Spawn jobs in non-blocking mode
         jobId_list = []
         for server in commands:
             cmd = commands[server]
             jobId = self.__command_client.push_command_request(   cmd,
                                                                   block=False,
                                                                   target_server=server)
             jobId_list.append(jobId[0])
         
         # If user requests blocking mode wait until execution is completed    
         ret = self.__command_client.get_command_response(jobId_list,block=block,verbose=True)
             
         
     else:
         cmd = commands
         # Get list of all servers
         all_servers_list = MPIEnvironment.mpi_server_rank_list()
         # Execute command in all servers          
         ret = self.__command_client.push_command_request(   cmd,
                                                             block=block,
                                                             target_server=all_servers_list)
     
     # Return result
     return ret        
Esempio n. 15
0
 def __send_start_service_signal(self):
     
     casalog_call_origin = "MPICommandClient::send_start_service_signal"
     
     casalog.post("Sending start service signal to all servers","INFO",casalog_call_origin)
     
     # Prepare stop service request
     request = {}
     request['signal'] = 'start'
     request['casa'] = casa # The request contains the global casa dictionary to be used by the servers
     request['logmode'] = self.__log_mode
     
     # Send request to all servers
     self.__communicator.control_service_request_broadcast(request,casalog)
         
     # Then wait until all servers have handled the signal
     mpi_server_rank_list = MPIEnvironment.mpi_server_rank_list()
     while len(mpi_server_rank_list)>0:
         response_available = False
         response_available = self.__communicator.control_service_response_probe()
         if response_available:
             # Receive start service response to know what server has started
             response = self.__communicator.control_service_response_recv()
             rank = response['rank']
             # Store processor name and PID info in the MPIMonitorClient
             self.__monitor_client.set_server_status_keyword(rank,'processor',response['processor'])
             self.__monitor_client.set_server_status_keyword(rank,'pid',response['pid'])
             # Remove server from list
             mpi_server_rank_list.remove(rank)
             # Communicate that server response to start service signal has been received
             casalog.post("Server with rank %s started at %s with PID %s" 
                          % (str(rank),str(response['processor']),str(response['pid'])),
                          "INFO",casalog_call_origin)
         else:
             time.sleep(MPIEnvironment.mpi_check_start_service_sleep_time)
                 
     casalog.post("Received response from all servers to start service signal","INFO",casalog_call_origin)
Esempio n. 16
0
        def serve(self):
            
            casalog_call_origin = "MPICommandServer::serve"        
            
            # First start command and ping status services
            casalog.post("Starting services...","INFO",casalog_call_origin) 
            self.start_services()
            
            # Notify to MPICommandClient that service is up and running
            self.__communicator.control_service_response_send(response=self.__monitor_server.get_status())
            
            # Keep serving until a stop signal service is received
            control_service_request = {}
            stop_service_requested = False
            while ((not stop_service_requested) and (not self.__monitor_server.get_client_timeout())):
                
                # Check if there is an incoming control service msg
                msg_available = False
                try:
                    msg_available = self.__communicator.control_service_request_probe()
                except:
                    msg_available = False
                    formatted_traceback = traceback.format_exc()
                    casalog.post("Exception checking if control service msg is available: %s" 
                                 % str(formatted_traceback),"SEVERE",casalog_call_origin)                        
                
                # Notify to MPICommandClient that control signal has been processed
                if msg_available:
                    
                    # Receive control service msg
                    msg_received = False                    
                    control_service_request = {}
                    try:
                        control_service_request = self.__communicator.control_service_request_recv()
                        msg_received = True
                    except:
                        msg_received = False
                        formatted_traceback = traceback.format_exc()
                        casalog.post("Exception receiving control service msg: %s"
                                     % str(formatted_traceback),"SEVERE",casalog_call_origin)
                        continue
                    
                    # Process control service msg
                    cmd = None
                    send_response = False
                    if msg_received:
                        try:
                            cmd = control_service_request['command']
                            send_response = control_service_request['send_response']
                            code = compile(cmd, casalog_call_origin, 'exec')                                                   
                            exec(code)
                            casalog.post("Control signal %s successfully handled by server %s" 
                                         % (str(cmd),str(MPIEnvironment.mpi_processor_rank)),
                                         "INFO",casalog_call_origin)                            
                        except:
                            formatted_traceback = traceback.format_exc()
                            casalog.post("Exception handling control signal command %s in server %s: %s" 
                                         % (str(control_service_request),
                                            str(MPIEnvironment.mpi_processor_rank),
                                            str(formatted_traceback)),
                                         "SEVERE",casalog_call_origin)
                            
                    # Notify to MPICommandClient that control signal has been processed
                    if send_response:
                        try:
                            self.__communicator.control_service_response_send(response=self.__monitor_server.get_status())
                        except:
                            formatted_traceback = traceback.format_exc()
                            casalog.post("Exception sending response to control signal command %s in server %s: %s" 
                                         % (str(cmd),str(MPIEnvironment.mpi_processor_rank),str(formatted_traceback)),
                                         "SEVERE",casalog_call_origin)
                    
                time.sleep(MPIEnvironment.mpi_stop_service_sleep_time)
            
            # Process stop service request
            if stop_service_requested:
                
                # Check if force mode is needed
                force_command_request_interruption = control_service_request['force_command_request_interruption']
                finalize_mpi_environment = control_service_request['finalize_mpi_environment']
                busy = self.__monitor_server.get_status('busy')
                if force_command_request_interruption and busy:
                    casalog.post("force-stop service signal received, stopping services, " + 
                                 "command request handler service will be interrupted...","INFO",casalog_call_origin)
                else:
                    force_command_request_interruption = False
                    casalog.post("stop service signal received, stopping services...","INFO",casalog_call_origin)
            else:
                force_command_request_interruption = True
                casalog.post("client timeout, forcing disconnection, " +
                             "command request handler service will be interrupted.." ,"INFO",casalog_call_origin)
            
            # Stop services
            self.stop_services(force_command_request_interruption)
            
            # Finalize MPI environment   
            if finalize_mpi_environment:    
                try:
                    casalog.post("Going to finalize MPI environment","INFO",casalog_call_origin)
                    MPIEnvironment.finalize_mpi_environment()
                except:
                    formatted_traceback = traceback.format_exc()
                    casalog.post("Exception finalizing MPI environment %s" 
                                 % str(formatted_traceback),"SEVERE",casalog_call_origin)

            # Exit
            casalog.post("Exiting","INFO",casalog_call_origin)
Esempio n. 17
0
        def get_engines(self):

            return MPIEnvironment.mpi_server_rank_list()
Esempio n. 18
0
        def stop_services(self, force_command_request_interruption=False):

            # jagonzal: This method is called by the atexit module and if it fails it
            # causes ipython to crash, producing a report and waiting for user input
            # so we cannot risk under any circumstances such an event
            try:

                casalog_call_origin = "MPICommandClient::stop_services"

                if self.__life_cycle_state == 0:
                    casalog.post("Services not started", "WARN", casalog_call_origin)
                    return
                elif self.__life_cycle_state == 2:
                    casalog.post("MPICommandClient life cycle finalized", "WARN", casalog_call_origin)
                    return

                # Check if any server is in timeout condition before stopping the monitoring service
                server_rank_timeout = self.__monitor_client.get_server_timeout()
                finalize_mpi_environment = True
                if len(server_rank_timeout) > 0:
                    finalize_mpi_environment = False
                    force_command_request_interruption = True

                # Stop client monitoring services
                self.__monitor_client.stop_services()

                # Notify command requests which are going to be interrupted
                for command_request_id in self.__command_request_list:
                    if not self.__command_response_list.has_key(command_request_id):
                        server = self.__command_request_list[command_request_id]["server"]
                        status = self.__command_request_list[command_request_id]["status"]
                        casalog.post(
                            "Aborting command request with id# %s: %s"
                            % (str(command_request_id), str(self.__command_request_list[command_request_id])),
                            "SEVERE",
                            casalog_call_origin,
                        )

                # Stop client command request-response services
                self.__stop_command_request_queue_service()
                self.__stop_command_response_handler_service()

                # Shutdown plotms process
                self.__send_control_signal(
                    {"command": "pm.killApp()", "signal": "process_control"}, check_response=True
                )

                # Shutdown virtual frame buffer
                self.__send_control_signal(
                    {"command": "self.stop_virtual_frame_buffer()", "signal": "process_control"}, check_response=True
                )

                # Send stop signal to servers
                self.__send_control_signal(
                    {
                        "command": "stop_service_requested = True",
                        "signal": "stop",
                        "force_command_request_interruption": force_command_request_interruption,
                        "finalize_mpi_environment": finalize_mpi_environment,
                    },
                    check_response=False,
                )

                # Finalize MPI environment
                if finalize_mpi_environment:
                    try:
                        casalog.post("Going to finalize MPI environment", "INFO", casalog_call_origin)
                        MPIEnvironment.finalize_mpi_environment()
                    except:
                        formatted_traceback = traceback.format_exc()
                        casalog.post(
                            "Exception finalizing MPI environment %s" % str(formatted_traceback),
                            "SEVERE",
                            casalog_call_origin,
                        )
                else:
                    casalog.post(
                        "MPIServers with rank %s are in timeout condition, skipping MPI_Finalize()"
                        % str(server_rank_timeout),
                        "SEVERE",
                        casalog_call_origin,
                    )

                # UnMark MPI environment to be finalized by the MPICommunicator destructor
                # (Either because it is already finalized or due to a
                # server not responsive that prevents graceful finalization)
                self.__communicator.set_finalize_mpi_environment(False)

                # Set life cycle state
                self.__life_cycle_state = 2

                casalog.post("All services stopped", "INFO", casalog_call_origin)

            except:
                formatted_traceback = traceback.format_exc()
                print "Unhandled exception in MPICommandClient::stop_services %s" % (formatted_traceback)
Esempio n. 19
0
 def get_engines(self):
     
     return MPIEnvironment.mpi_server_rank_list()
Esempio n. 20
0
        def serve(self):

            casalog_call_origin = "MPICommandServer::serve"

            # First start command and ping status services
            casalog.post("Starting services...", "INFO", casalog_call_origin)
            self.start_services()

            # Notify to MPICommandClient that service is up and running
            self.__communicator.control_service_response_send(response=self.__monitor_server.get_status())

            # Keep serving until a stop signal service is received
            control_service_request = {}
            stop_service_requested = False
            while (not stop_service_requested) and (not self.__monitor_server.get_client_timeout()):

                # Check if there is an incoming control service msg
                msg_available = False
                try:
                    msg_available = self.__communicator.control_service_request_probe()
                except:
                    msg_available = False
                    formatted_traceback = traceback.format_exc()
                    casalog.post(
                        "Exception checking if control service msg is available: %s" % str(formatted_traceback),
                        "SEVERE",
                        casalog_call_origin,
                    )

                # Notify to MPICommandClient that control signal has been processed
                if msg_available:

                    # Receive control service msg
                    msg_received = False
                    control_service_request = {}
                    try:
                        control_service_request = self.__communicator.control_service_request_recv()
                        msg_received = True
                    except:
                        msg_received = False
                        formatted_traceback = traceback.format_exc()
                        casalog.post(
                            "Exception receiving control service msg: %s" % str(formatted_traceback),
                            "SEVERE",
                            casalog_call_origin,
                        )
                        continue

                    # Process control service msg
                    cmd = None
                    send_response = False
                    if msg_received:
                        try:
                            cmd = control_service_request["command"]
                            send_response = control_service_request["send_response"]
                            code = compile(cmd, casalog_call_origin, "exec")
                            exec(code)
                            casalog.post(
                                "Control signal %s successfully handled by server %s"
                                % (str(cmd), str(MPIEnvironment.mpi_processor_rank)),
                                "INFO",
                                casalog_call_origin,
                            )
                        except:
                            formatted_traceback = traceback.format_exc()
                            casalog.post(
                                "Exception handling control signal command %s in server %s: %s"
                                % (
                                    str(control_service_request),
                                    str(MPIEnvironment.mpi_processor_rank),
                                    str(formatted_traceback),
                                ),
                                "SEVERE",
                                casalog_call_origin,
                            )

                    # Notify to MPICommandClient that control signal has been processed
                    if send_response:
                        try:
                            self.__communicator.control_service_response_send(
                                response=self.__monitor_server.get_status()
                            )
                        except:
                            formatted_traceback = traceback.format_exc()
                            casalog.post(
                                "Exception sending response to control signal command %s in server %s: %s"
                                % (str(cmd), str(MPIEnvironment.mpi_processor_rank), str(formatted_traceback)),
                                "SEVERE",
                                casalog_call_origin,
                            )

                time.sleep(MPIEnvironment.mpi_stop_service_sleep_time)

            # Process stop service request
            if stop_service_requested:

                # Check if force mode is needed
                force_command_request_interruption = control_service_request["force_command_request_interruption"]
                finalize_mpi_environment = control_service_request["finalize_mpi_environment"]
                busy = self.__monitor_server.get_status("busy")
                if force_command_request_interruption and busy:
                    casalog.post(
                        "force-stop service signal received, stopping services, "
                        + "command request handler service will be interrupted...",
                        "INFO",
                        casalog_call_origin,
                    )
                else:
                    force_command_request_interruption = False
                    casalog.post("stop service signal received, stopping services...", "INFO", casalog_call_origin)
            else:
                force_command_request_interruption = True
                casalog.post(
                    "client timeout, forcing disconnection, " + "command request handler service will be interrupted..",
                    "INFO",
                    casalog_call_origin,
                )

            # Stop services
            self.stop_services(force_command_request_interruption)

            # Finalize MPI environment
            if finalize_mpi_environment:
                try:
                    casalog.post("Going to finalize MPI environment", "INFO", casalog_call_origin)
                    MPIEnvironment.finalize_mpi_environment()
                except:
                    formatted_traceback = traceback.format_exc()
                    casalog.post(
                        "Exception finalizing MPI environment %s" % str(formatted_traceback),
                        "SEVERE",
                        casalog_call_origin,
                    )

            # Exit
            casalog.post("Exiting", "INFO", casalog_call_origin)
Esempio n. 21
0
 def stop_services(self,force_command_request_interruption=False):
     
     # jagonzal: This method is called by the atexit module and if it fails it
     # causes ipython to crash, producing a report and waiting for user input
     # so we cannot risk under any circumstances such an event
     try:
         
         casalog_call_origin = "MPICommandClient::stop_services"
         
         if self.__life_cycle_state == 0:
             casalog.post("Services not started","WARN",casalog_call_origin)
             return       
         elif self.__life_cycle_state == 2:
             casalog.post("MPICommandClient life cycle finalized","WARN",casalog_call_origin)
             return      
         
         # Check if any server is in timeout condition before stopping the monitoring service
         server_rank_timeout = self.__monitor_client.get_server_timeout()
         finalize_mpi_environment = True
         if len(server_rank_timeout) > 0:
             finalize_mpi_environment = False
             force_command_request_interruption = True                               
         
         # Stop client monitoring services
         self.__monitor_client.stop_services()
         
         # Notify command requests which are going to be interrupted
         for command_request_id in self.__command_request_list:
                 if not self.__command_response_list.has_key(command_request_id):
                     server = self.__command_request_list[command_request_id]['server']
                     status = self.__command_request_list[command_request_id]['status']
                     casalog.post("Aborting command request with id# %s: %s" 
                                  % (str(command_request_id),str(self.__command_request_list[command_request_id])),
                                  "SEVERE",casalog_call_origin)
         
         # Stop client command request-response services
         self.__stop_command_request_queue_service()
         self.__stop_command_response_handler_service()          
         
         # Shutdown plotms process
         self.__send_control_signal({'command':'pm.killApp()',
                                     'signal':'process_control'},
                                    check_response=True)
         
         # Shutdown virtual frame buffer
         self.__send_control_signal({'command':'self.stop_virtual_frame_buffer()',
                                     'signal':'process_control'},
                                    check_response=True)      
             
         # Send stop signal to servers
         self.__send_control_signal({'command':'stop_service_requested = True',
                                     'signal':'stop',
                                     'force_command_request_interruption':force_command_request_interruption,
                                     'finalize_mpi_environment':finalize_mpi_environment},
                                    check_response=False)    
         
         # Finalize MPI environment            
         if finalize_mpi_environment:
             try:
                 casalog.post("Going to finalize MPI environment","INFO",casalog_call_origin)
                 MPIEnvironment.finalize_mpi_environment()
             except:
                 formatted_traceback = traceback.format_exc()
                 casalog.post("Exception finalizing MPI environment %s" 
                              % str(formatted_traceback),"SEVERE",casalog_call_origin)
         else:
             casalog.post("MPIServers with rank %s are in timeout condition, skipping MPI_Finalize()" 
                          % str(server_rank_timeout),"SEVERE",casalog_call_origin)
         
         # UnMark MPI environment to be finalized by the MPICommunicator destructor
         # (Either because it is already finalized or due to a 
         # server not responsive that prevents graceful finalization)  
         self.__communicator.set_finalize_mpi_environment(False)         
                           
         # Set life cycle state
         self.__life_cycle_state = 2            
         
         casalog.post("All services stopped","INFO",casalog_call_origin)
         
     except:
         formatted_traceback = traceback.format_exc()
         print "Unhandled exception in MPICommandClient::stop_services %s" %(formatted_traceback)