class __MPIInterfaceCoreImpl: """ Implementation of the MPIInterfaceCore singleton interface """ def __init__(self): self.__command_client = MPICommandClient() self.__monitor_client = MPIMonitorClient() def start_cluster(self, cl_file=None): self.__command_client.set_log_mode(MPIInterface._log_mode) self.__command_client.start_services() def stop_cluster(self): self.__command_client.stop_services() def get_engines(self): return MPIEnvironment.mpi_server_rank_list() def get_nodes(self): # Get list of all server Ids servers_list = MPIEnvironment.mpi_server_rank_list() # Get list of hostnames for each server Id hostnames = [] for server in servers_list: hostnames.append( self.__monitor_client.get_server_status_keyword( server, 'processor')) # Remove duplicates to get the list of host used in general hostnames_not_repeated = list(set(hostnames)) return hostnames_not_repeated def pgc(self, commands, block=True): """This method has two modes: - When the input command is a dictionary of commands execute each command taking the dictionary key as target node (Equivalent to various calls to odo/execute) - When commands is a single command execute it in all engines """ # Get list of jobs and commands ret = None if isinstance(commands, dict): # Spawn jobs in non-blocking mode jobId_list = [] for server in commands: cmd = commands[server] jobId = self.__command_client.push_command_request( cmd, block=False, target_server=server) jobId_list.append(jobId[0]) # If user requests blocking mode wait until execution is completed ret = self.__command_client.get_command_response(jobId_list, block=block, verbose=True) else: cmd = commands # Get list of all servers all_servers_list = MPIEnvironment.mpi_server_rank_list() # Execute command in all servers ret = self.__command_client.push_command_request( cmd, block=block, target_server=all_servers_list) # Return result return ret def odo(self, job, nodes): """Execute a job on a subset of engines in non-blocking mode""" cmd_ids = self.__command_client.push_command_request( job, block=False, target_server=nodes) return cmd_ids def execute(self, job, nodes): """Execute a job on a subset of engines in blocking mode""" result_list = self.__command_client.push_command_request( job, block=True, target_server=nodes) return result_list def push(self, variables, targets=None): """Set variables in a sub-set of engines""" # Determine target servers target_server = [] if targets is None or targets == 'all': target_server = MPIEnvironment.mpi_server_rank_list() else: target_server = list(targets) # Push variables ret = self.__command_client.push_command_request( "push", block=True, target_server=target_server, parameters=dict(variables)) # Return request result for further processing return ret def pull(self, varname="", targets=None): """Retrieve a variable from a sub-set of engines""" # Pull variable from all servers result_list = [] if targets is None: result_list = self.pgc(varname) else: result_list = self.execute(varname, nodes=targets) # Extract result vars and corresponding server Ids res = {} for result in result_list: res[result['server']] = result['ret'] # Return result return res def check_job(self, jobId, verbose=True): """Check the status of a non-blocking job""" jobId_list = list(jobId) command_response_list = self.__command_client.get_command_response( jobId_list, block=False, verbose=verbose) # Aggregate exceptions and completed jobIds error_msg = '' completed_jobs = [] for command_response in command_response_list: if not command_response['successful']: if len(error_msg) > 0: error_msg += "\n" error_msg += "Exception executing command in server %s: %s" % ( command_response['server'], command_response['traceback']) else: completed_jobs.append(command_response['id']) # Re-throw aggregated exception if len(error_msg) > 0: casalog.post(error_msg, "SEVERE", "MPIInterfaceCore::check_job") raise Exception(error_msg) # Check that all jobs have been completed completed = True for jobId in jobId_list: if jobId not in completed_jobs: completed = False break # Return completion status return completed def get_server_status(self): return self.__command_client.get_server_status() def get_command_request_list(self): return self.__command_client.get_command_request_list() def get_command_response_list(self): return self.__command_client.get_command_response_list()
class __MPIInterfaceCoreImpl: """ Implementation of the MPIInterfaceCore singleton interface """ def __init__(self): self.__command_client = MPICommandClient() self.__monitor_client = MPIMonitorClient() def start_cluster(self, cl_file=None): self.__command_client.set_log_mode(MPIInterface._log_mode) self.__command_client.start_services() def stop_cluster(self): self.__command_client.stop_services() def get_engines(self): return MPIEnvironment.mpi_server_rank_list() def get_nodes(self): # Get list of all server Ids servers_list = MPIEnvironment.mpi_server_rank_list() # Get list of hostnames for each server Id hostnames = [] for server in servers_list: hostnames.append(self.__monitor_client.get_server_status_keyword(server,'processor')) # Remove duplicates to get the list of host used in general hostnames_not_repeated = list(set(hostnames)) return hostnames_not_repeated def pgc(self,commands,block=True): """This method has two modes: - When the input command is a dictionary of commands execute each command taking the dictionary key as target node (Equivalent to various calls to odo/execute) - When commands is a single command execute it in all engines """ # Get list of jobs and commands ret = None if isinstance(commands,dict): # Spawn jobs in non-blocking mode jobId_list = [] for server in commands: cmd = commands[server] jobId = self.__command_client.push_command_request( cmd, block=False, target_server=server) jobId_list.append(jobId[0]) # If user requests blocking mode wait until execution is completed ret = self.__command_client.get_command_response(jobId_list,block=block,verbose=True) else: cmd = commands # Get list of all servers all_servers_list = MPIEnvironment.mpi_server_rank_list() # Execute command in all servers ret = self.__command_client.push_command_request( cmd, block=block, target_server=all_servers_list) # Return result return ret def odo(self, job, nodes): """Execute a job on a subset of engines in non-blocking mode""" cmd_ids = self.__command_client.push_command_request(job,block=False,target_server=nodes) return cmd_ids def execute(self, job, nodes): """Execute a job on a subset of engines in blocking mode""" result_list = self.__command_client.push_command_request(job,block=True,target_server=nodes) return result_list def push(self, variables, targets=None): """Set variables in a sub-set of engines""" # Determine target servers target_server = [] if targets is None or targets == 'all': target_server = MPIEnvironment.mpi_server_rank_list() else: target_server = list(targets) # Push variables ret = self.__command_client.push_command_request( "push", block=True, target_server=target_server, parameters=dict(variables)) # Return request result for further processing return ret def pull(self, varname="", targets=None): """Retrieve a variable from a sub-set of engines""" # Pull variable from all servers result_list = [] if targets is None: result_list = self.pgc(varname) else: result_list = self.execute(varname,nodes=targets) # Extract result vars and corresponding server Ids res = {} for result in result_list: res[result['server']] = result['ret'] # Return result return res def check_job(self, jobId, verbose=True): """Check the status of a non-blocking job""" jobId_list = list(jobId) command_response_list = self.__command_client.get_command_response(jobId_list,block=False,verbose=verbose) # Aggregate exceptions and completed jobIds error_msg = '' completed_jobs = [] for command_response in command_response_list: if not command_response['successful']: if len(error_msg) > 0: error_msg += "\n" error_msg += "Exception executing command in server %s: %s" % (command_response['server'], command_response['traceback']) else: completed_jobs.append(command_response['id']) # Re-throw aggregated exception if len(error_msg) > 0: casalog.post(error_msg,"SEVERE","MPIInterfaceCore::check_job") raise Exception(error_msg) # Check that all jobs have been completed completed = True for jobId in jobId_list: if jobId not in completed_jobs: completed = False break # Return completion status return completed def get_server_status(self): return self.__command_client.get_server_status() def get_command_request_list(self): return self.__command_client.get_command_request_list() def get_command_response_list(self): return self.__command_client.get_command_response_list()