def init_handlers(self): """Initialize the message handlers and the record handler.""" # Set up message handlers. self.msg_handlers = { DIE_MSG: self.die_handler, QUIT_MSG: self.quit_handler, OUT_MSG: self.out_handler, VARPRINT_RES_MSG: self.varprint_res_handler, LOAD_FILE: self.load_file_handler } # Now record handlers. self.record_handler = GDBMIRecordHandler()
def init_handlers(self): """Initialize the message handlers and the record handler.""" # Set up message handlers. self.msg_handlers = { DIE_MSG: self.die_handler, QUIT_MSG: self.quit_handler, OUT_MSG: self.out_handler, VARPRINT_RES_MSG: self.varprint_res_handler, } # Now record handlers. self.record_handler = GDBMIRecordHandler(self.identifier)
class GDBFE (GDBMICmd): """The front-end to PGDB.""" def init_handlers(self): """Initialize the message handlers and the record handler.""" # Set up message handlers. self.msg_handlers = { DIE_MSG: self.die_handler, QUIT_MSG: self.quit_handler, OUT_MSG: self.out_handler, VARPRINT_RES_MSG: self.varprint_res_handler, } # Now record handlers. self.record_handler = GDBMIRecordHandler(self.identifier) def remote_init(self): """Initialize things related to the remote communication and back-end daemons.""" self.comm = CommunicatorFE(True) # Initialize with locking. # One of {pid} and {launcher, launcher_args} will not be none, based # upon the command line input parsing. ret = self.comm.init_lmon(self.lmon_attach, pid = self.lmon_pid, launcher = self.lmon_launcher, launcher_args = self.lmon_launcher_argv) if not ret: # Terminate. Note at this point main is still waiting on the remote_up event, # so we have to set it. self.remote_up.set() self.interrupt_main() return False ret = self.comm.init_mrnet(local = self.local_launch) if not ret: # Terminate. See prior comment about remote_up. self.remote_up.set() self.interrupt_main() return False self.identifier = GDBMIRecordIdentifier() self.varobjs = {} for rank in self.comm.get_mpiranks(): self.varobjs[rank] = VariableObjectManager() self.init_handlers() self.pprinter = GDBMIPrettyPrinter(self.identifier) self.sleep_time = 0.1 self.blocks = [] try: self.blocks += gdbconf.default_blocks except AttributeError: pass return True def __init__(self): """Initialize some local things; the remote initialization must be done seperately.""" GDBMICmd.__init__(self) self.quit = False self.is_shutdown = False # Need to disable readline. self.completekey = None # Event triggered when remote_init completes in the remote thread.. self.remote_up = threading.Event() # Temporary list for building up aggregated records from OUT messages. self.arec_list = [] # Output history for expanding commands. self.output_history = [] # Get our PID for signals. self.my_pid = os.getpid() def interrupt_main(self): """Interrupt the main thread. This works because in Python, the main thread is the one that processes signals. If using Python 3, this could be replaced with signal.pthread_kill (but this will work in Python 3). """ os.kill(self.my_pid, signal.SIGINT) def parse_args(self): """Parse the command-line arguments and set appropriate variables.""" # Optparse unfortunately doesn't work here. self.lmon_attach = None self.lmon_pid = None self.lmon_launcher = None self.lmon_launcher_argv = None self.local_launch = False for i in range(1, len(sys.argv)): if sys.argv[i] == "-p" or sys.argv[i] == "--pid": self.lmon_attach = True if len(sys.argv) == i: print "Must provide a PID with {0}.".format(sys.argv[i]) sys.exit(0) try: self.lmon_pid = int(sys.argv[i + 1]) except ValueError: print "Must provide a valid PID." sys.exit(0) i += 1 elif sys.argv[i] == "--launcher": if len(sys.argv) == i: print "Must provide a launcher with --launcher." sys.exit(0) self.lmon_launcher = sys.argv[i + 1] i += 1 elif sys.argv[i] == "--local": self.local_launch = True elif sys.argv[i] == "-a": if not hasattr(self, "lmon_launcher"): self.lmon_launcher = "srun" self.lmon_attach = False self.lmon_launcher_argv = sys.argv[i + 1:] break if self.lmon_attach is None: print "Arguments: (one of -p/--pid and -a is required)" print "-p, --pid <pid>: attach to srun process <pid>" print "-a <options>: pass <options> verbatim to the resource manager for launching." print "--launcher <launcher>: use binary <launcher> to launch." print "--local: deploy for debugging just on the local node" sys.exit(0) def shutdown(self): """Shut down the network if not already shut down.""" if not self.comm.is_shutdown(): self.comm.shutdown() def __del__(self): """Invoke shutdown().""" # Need to catch a potential exception when comm does not exist. # This occurs if there is an error before comm init. try: self.shutdown() except AttributeError: pass def die_handler(self, msg): """Handle a die message. Presently does nothing.""" pass def quit_handler(self, msg): """Handle a quit message. Presently does nothing.""" pass def out_handler(self, msg): """Handle an out message by adding the arec to the temporary list.""" if self.arec_list: self.arec_list = combine_aggregation_lists(self.arec_list, msg.record) else: self.arec_list = msg.record def process_out_messages(self): """Go through the temporary arec_list and pretty-print records.""" for arec in self.arec_list: # Add the record to the history. self.output_history = [arec] + self.output_history if len(self.output_history) > gdbconf.history_length: # Remove the last (oldest) element. self.output_history.pop() subst_classes = arec.get_substitution_classes() subst_key = 0 if len(subst_classes) > 1: # For multiple substitution classes, we only print the smallest. # This gets the key of the smallest substitution class. subst_key = min(enumerate(subst_classes), key = lambda x: len(x[1]))[0] # Just get the first VID, since all substitutions for it are the same. record = arec.get_record(subst_classes[subst_key][0]) ranks = Interval(lis = subst_classes[subst_key]) # Note that this may not work if things don't support lists of ranks. if self.record_handler.handle(record, rank = ranks): self.pprinter.pretty_print(record, ranks) if len(subst_classes) > 1: print "Some results from {0} omitted; use expand to view.".format(arec.get_ids()) # for subst_class in subst_classes: # # Just get first VID, since all subsitutions for it are the same. # record = arec.get_record(subst_class[0]) # ranks = Interval(lis = subst_class) # # Note that this may not work if things don't support lists of ranks. # if self.record_handler.handle(record, rank = ranks): # self.pprinter.pretty_print(record, ranks) self.arec_list = [] def varprint_res_handler(self, msg): """Handle a varprint result message by pretty-printing the variable objects.""" if msg.err: print "[{0}] {1}".format(msg.rank, msg.msg) elif msg.varobj: self.varobjs[msg.rank].add_var_obj(msg.varobj) print self.pprinter.varobj_pretty_print(msg.varobj, tag = msg.rank)[:-1] else: print "[{0}] Received a bad varobj!".format(msg.rank) def parse_filter_spec(self, spec): """Parse a filter specification into a record type and class.""" split = spec.lower().split() if len(split) == 0: print "Bad filter specification." return None, None record_type = split[0] record_class = None if len(split) > 1: record_class = split[1] return record_type, record_class def do_filter(self, cmd, targets = None): """Tell the back-end daemons to filter something.""" record_type, record_class = self.parse_filter_spec(cmd) if not record_type: return self.comm.send(GDBMessage(FILTER_MSG, filter_type = record_type, filter_class = record_class), self.comm.broadcast) def do_unfilter(self, cmd, targets = None): """Tell the back-end daemons to unfilter something.""" record_type, record_class = self.parse_filter_spec(cmd) if not record_type: return self.comm.send(GDBMessage(UNFILTER_MSG, filter_type = record_type, filter_class = record_class), self.comm.broadcast) def parse_proc_spec(self, proc_spec): """Parse a processor specification.""" targets = [] # Handle some special cases for sending to all processors. if proc_spec.lower() == "all" or proc_spec == "-1": return -1 for group in proc_spec.split(","): tup = group.split("-") try: if len(tup) == 1: targets.append((int(tup[0]), int(tup[0]))) else: targets.append((int(tup[0]), int(tup[1]))) except ValueError: print "Bad processor specification." return return Interval(intervals = targets) def do_proc(self, cmd, targets = None): """Handle the "proc" command to send commands to a subset of remote nodes based on MPI rank.""" if targets: print "Recursive proc is not recursive." return proc_spec = None for i, char in enumerate(cmd): if char == " ": proc_spec = cmd[0:i] line = cmd[i + 1:].strip() break if not proc_spec: print "Bad processor specification." return targets = self.parse_proc_spec(proc_spec) if not (targets - self.comm.get_mpiranks()).empty(): print "Out-of-range processor specification." return cmd = self.resolve_gdbmi_command(line, err = False) if cmd: self.comm.send(GDBMessage(CMD_MSG, command = cmd, ranks = targets), targets) else: split = line.split() cmd = split[0] rest = " ".join(split[1:]) if hasattr(self, "do_" + cmd): func = getattr(self, "do_" + cmd) func(rest, targets = targets) def do_block(self, cmd, targets = None): """Block all output from a subset of nodes.""" to_block = self.parse_proc_spec(cmd) if not to_block: return # This is quite inefficient and will not scale. for target in to_block.members(): if target not in self.blocks and target in self.comm.get_mpiranks(): self.blocks.append(target) def do_unblock(self, cmd, targets = None): """Unblock output from a subset of nodes.""" to_unblock = self.parse_proc_spec(cmd) if not to_unblock: return keys = [] for k, v in enumerate(self.blocks): if v in to_unblock: keys.append(k) for k in keys: del self.blocks[k] def do_varprint(self, cmd, targets = None): """Run the varprint command.""" if not targets: targets = self.comm.get_mpiranks() cmd_split = cmd.split(" ") var = cmd # Strip quotes, if present. if var[0] == '"' and var[-1] == '"': var = var[1:-1] self.comm.send(GDBMessage(VARPRINT_MSG, name = var, ranks = targets), targets) def do_varassign(self, cmd, targets = None): """Run the varassign command.""" if not targets: targets = self.comm.get_mpiranks() split = cmd.split("=") if len(split) != 2: print "varassign format is: var = val" return var = split[0].strip() if var[0] == '"' and var[-1] == '"': var = var[1:-1] val = split[1].strip() for rank in targets.members(): full_name = self.varobjs[rank].get_full_name(var) if not full_name: print "Variable not found on rank {0}.".format(rank) continue self.comm.send(GDBMessage(CMD_MSG, command = Command("var-assign", args = ('"' + full_name + '"', '"' + val + '"')), ranks = rank), rank) def do_help(self, cmd, targets = None): """Run the help command.""" if not targets: # Because this makes the most sense, unless told otherwise, we run this on one processor. targets = 0 self.comm.send(GDBMessage(CMD_MSG, command = Command("interpreter-exec", args = ("console", '"help ' + cmd + '"')), ranks = targets), targets) def do_kill(self, cmd, targets = None): """Kill all targets being debugged.""" # This always sends to all targets, for now. print "Sending SIGTERM to all inferiors. (May need to step them for them to die.)" self.comm.send(GDBMessage(KILL_MSG), self.comm.broadcast) def do_quit(self, cmd, targets = None): """Gracefully quit PGDB.""" self.quit = True self.comm.send(GDBMessage(CMD_MSG, command = Command("gdb-exit")), self.comm.broadcast) def do_expand(self, cmd, targets = None): """Expand output. Use: [proc <processor-spec>] expand [history-item] Expand history-item for the given processors. """ if not targets: targets = self.comm.get_mpiranks() split = cmd.split(" ") history_item = 0 if len(split) > 1: if not split[1].isdigit(): print "Incorrect history specificiation." return history_item = int(split[1]) if history_item >= len(self.output_history): print "No such history item {0}".format(history_item) return arec = self.output_history[history_item] # We only care about the IDs that are present in both. ids = targets.intersect(arec.get_ids()) for vid in ids: self.pprinter.pretty_print(arec.get_record(vid), Interval(lis = [vid])) def dispatch_gdbmi_command(self, command): """Send a GDB command.""" if self.comm.is_shutdown(): return False return self.comm.send(GDBMessage(CMD_MSG, command = command), self.comm.broadcast) def handle_msg(self, msg): """Handle a received message.""" if msg.msg_type in self.msg_handlers: self.msg_handlers[msg.msg_type](msg) else: print "Got a message {0} with no handler.".format(msg.msg_type) def remote_body(self): """The main remote body thread. This initializes the remote infrastructure, and receives and processes data. """ # Must do the init inside of this thread, or else LaunchMON steals stdin. if not self.remote_init(): return False # Signal main thread we can use stdin. self.remote_up.set() print "GDB deployed to {0} hosts and {1} processors.".format(self.comm.get_mrnet_network_size(), self.comm.get_proctab_size()) recvd = False while not self.quit and not self.comm.all_nodes_exited(): # Receive data, if any. msg = self.comm.recv(blocking = False) if msg is not None: # Received data. self.handle_msg(msg) recvd = True else: recvd = False # Keep from beating up the CPU too much. if not recvd: self.process_out_messages() time.sleep(self.sleep_time) self.shutdown() print "Remote shut down." self.interrupt_main() def local_body(self): """The local command input loop.""" # Wait until we can use stdin. try: self.remote_up.wait() os.dup2(self.stdin_copy, 0) os.close(self.stdin_copy) self.cmdloop() except KeyboardInterrupt: print "Terminating." sys.exit(0) def run(self): """Start the remote thread and run the local command input loop.""" self.parse_args() # This is part of a hack to keep LaunchMON from stealing stdin. self.stdin_copy = os.dup(0) os.close(0) self.remote_thread = threading.Thread(target = self.remote_body) self.remote_thread.daemon = True self.remote_thread.start() self.local_body()
def init_gdb(self): """Initialize GDB-related things, and launch the GDB process.""" # Indexed by MPI rank. self.varobjs = {} # Maps tokens to MPI rank. self.token_rank_map = {} self.record_handler = GDBMIRecordHandler() self.record_handler.add_type_handler( self._watch_thread_created, set([mi.gdbmi_records.ASYNC_NOTIFY_THREAD_CREATED])) self.startup_stop_hid = self.record_handler.add_type_handler( self._watch_startup_stop, set([mi.gdbmi_records.ASYNC_EXEC_STOPPED])) gdb_env = {} if gdbconf.use_sbd: self.sbd = SBDBE(self.comm) gdb_env["LD_PRELOAD"] = gdbconf.sbd_bin else: self.sbd = None enable_pprint_cmd = Command("enable-pretty-printing") enable_target_async_cmd = Command("gdb-set", args=["target-async", "on"]) disable_pagination_cmd = Command("gdb-set", args=["pagination", "off"]) enable_non_stop_cmd = Command("gdb-set", args=["non-stop", "on"]) add_inferior_cmd = Command("add-inferior") self.gdb = GDBMachineInterface(gdb=gdbconf.gdb_path, gdb_args=["-x", gdbconf.gdb_init_path], env=gdb_env) procs = self.comm.get_proctab() # Set up GDB. if not self.run_gdb_command(enable_pprint_cmd): raise RuntimeError("Could not enable pretty printing!") if not self.run_gdb_command(enable_target_async_cmd): raise RuntimeError("Could not enable target-async!") if not self.run_gdb_command(disable_pagination_cmd): raise RuntimeError("Could not disable pagination!") if not self.run_gdb_command(enable_non_stop_cmd): raise RuntimeError("Could not enable non-stop!") # Create inferiors and set up MPI rank/inferior map. # First inferior is created by default. self.rank_inferior_map = {procs[0].mpirank: 'i1'} self.inferior_rank_map = {'i1': procs[0].mpirank} i = 2 for proc in procs[1:]: # Hackish: Assume that the inferiors follow the iN naming scheme. self.rank_inferior_map[proc.mpirank] = 'i' + str(i) self.inferior_rank_map['i' + str(i)] = proc.mpirank i += 1 if not self.run_gdb_command(add_inferior_cmd, no_thread=True): raise RuntimeError('Cound not add inferior i{0}!'.format(i - 1)) # Maps MPI ranks to associated threads and vice-versa. self.rank_thread_map = {} self.thread_rank_map = {} if self.sbd: # Set up the list of executables for load file checking. self.sbd.set_executable_names( [os.path.basename(proc.pd.executable_name) for proc in procs]) # Attach processes. for proc in procs: if not self.run_gdb_command(Command( "target-attach", opts={ '--thread-group': self.rank_inferior_map[proc.mpirank] }, args=[proc.pd.pid]), proc.mpirank, no_thread=True): raise RuntimeError("Could not attach to rank {0}!".format( proc.mpirank)) self.varobjs[proc.mpirank] = VariableObjectManager() # Cludge to fix GDB not outputting records for the i1 attach. if self.rank_inferior_map[proc.mpirank] == 'i1': time.sleep(0.1)
class GDBBE: """The back-end GDB daemon process.""" def init_gdb(self): """Initialize GDB-related things, and launch the GDB process.""" # Indexed by MPI rank. self.varobjs = {} # Maps tokens to MPI rank. self.token_rank_map = {} self.record_handler = GDBMIRecordHandler() self.record_handler.add_type_handler( self._watch_thread_created, set([mi.gdbmi_records.ASYNC_NOTIFY_THREAD_CREATED])) self.startup_stop_hid = self.record_handler.add_type_handler( self._watch_startup_stop, set([mi.gdbmi_records.ASYNC_EXEC_STOPPED])) gdb_env = {} if gdbconf.use_sbd: self.sbd = SBDBE(self.comm) gdb_env["LD_PRELOAD"] = gdbconf.sbd_bin else: self.sbd = None enable_pprint_cmd = Command("enable-pretty-printing") enable_target_async_cmd = Command("gdb-set", args=["target-async", "on"]) disable_pagination_cmd = Command("gdb-set", args=["pagination", "off"]) enable_non_stop_cmd = Command("gdb-set", args=["non-stop", "on"]) add_inferior_cmd = Command("add-inferior") self.gdb = GDBMachineInterface(gdb=gdbconf.gdb_path, gdb_args=["-x", gdbconf.gdb_init_path], env=gdb_env) procs = self.comm.get_proctab() # Set up GDB. if not self.run_gdb_command(enable_pprint_cmd): raise RuntimeError("Could not enable pretty printing!") if not self.run_gdb_command(enable_target_async_cmd): raise RuntimeError("Could not enable target-async!") if not self.run_gdb_command(disable_pagination_cmd): raise RuntimeError("Could not disable pagination!") if not self.run_gdb_command(enable_non_stop_cmd): raise RuntimeError("Could not enable non-stop!") # Create inferiors and set up MPI rank/inferior map. # First inferior is created by default. self.rank_inferior_map = {procs[0].mpirank: 'i1'} self.inferior_rank_map = {'i1': procs[0].mpirank} i = 2 for proc in procs[1:]: # Hackish: Assume that the inferiors follow the iN naming scheme. self.rank_inferior_map[proc.mpirank] = 'i' + str(i) self.inferior_rank_map['i' + str(i)] = proc.mpirank i += 1 if not self.run_gdb_command(add_inferior_cmd, no_thread=True): raise RuntimeError('Cound not add inferior i{0}!'.format(i - 1)) # Maps MPI ranks to associated threads and vice-versa. self.rank_thread_map = {} self.thread_rank_map = {} if self.sbd: # Set up the list of executables for load file checking. self.sbd.set_executable_names( [os.path.basename(proc.pd.executable_name) for proc in procs]) # Attach processes. for proc in procs: if not self.run_gdb_command(Command( "target-attach", opts={ '--thread-group': self.rank_inferior_map[proc.mpirank] }, args=[proc.pd.pid]), proc.mpirank, no_thread=True): raise RuntimeError("Could not attach to rank {0}!".format( proc.mpirank)) self.varobjs[proc.mpirank] = VariableObjectManager() # Cludge to fix GDB not outputting records for the i1 attach. if self.rank_inferior_map[proc.mpirank] == 'i1': time.sleep(0.1) def _watch_thread_created(self, record, **kwargs): """Handle watching thread creation.""" inferior = record.thread_group_id thread_id = int(record.thread_id) rank = self.inferior_rank_map[inferior] if rank in self.rank_thread_map: self.rank_thread_map[rank].append(thread_id) else: self.rank_thread_map[rank] = [thread_id] # Always ensure smallest thread is first. self.rank_thread_map[rank].sort() self.thread_rank_map[thread_id] = rank def _watch_startup_stop(self, record, **kwargs): """Handle watching for initial inferior stops during startup.""" self.startup_done_count += 1 if self.startup_done_count == self.comm.get_proctab_size(): self.doing_startup = False self.record_handler.remove_handler(self.startup_stop_hid) # Reset token counts to sync with front-end. self.token_rank_map = {} Command._cur_token = 0 def kill_inferiors(self): """Terminate all targets being debugged. This sends SIGTERM. """ for proc in self.comm.get_proctab(): os.kill(proc.pd.pid, signal.SIGTERM) def run_gdb_command(self, command, ranks=None, no_thread=False): """Run a GDB command. command is a Command object representing the command. ranks is an Interval of the ranks to run the command on. If ranks is None, run on the current inferior. If no_thread is True, this does not specify a particular thread. Returns True on success, False on error. """ if isinstance(ranks, int): # Special case for a single int. # Toss it in a list; don't need a full Interval. ranks = Interval(ranks) if ranks is None: self.token_rank_map[command.token] = self.comm.get_mpiranks() return self.gdb.send(command) else: if command.get_opt('--thread') is not None: # If --thread provided, don't override it. no_thread = True for rank in ranks: if rank in self.rank_inferior_map: # Most recent option with same name takes precedence. if (not no_thread and rank in self.rank_thread_map): command.add_opt('--thread', self.rank_thread_map[rank][0]) if not self.gdb.send(command): return False self.token_rank_map[command.token] = rank return True def init_handlers(self): """Initialize message handlers used on data we receive over MRNet.""" self.msg_handlers = { DIE_MSG: self.die_handler, CMD_MSG: self.cmd_handler, FILTER_MSG: self.filter_handler, UNFILTER_MSG: self.unfilter_handler, VARPRINT_MSG: self.varprint_handler, KILL_MSG: self.kill_handler, FILE_DATA: self.file_data_handler, } def init_filters(self): """Initialize default filters.""" self.filters = set() #an_lower = ASYNC_NOTIFY.lower() #self.filters = [ # (an_lower, "shlibs-updated"), # (an_lower, "shlibs-added"), # (an_lower, "shlibs-removed"), # (an_lower, "library-loaded"), # (an_lower, "thread-created"), # (an_lower, "thread-group-added"), # (an_lower, "thread-group-started"), # (RESULT.lower(), "exit") # ] def __init__(self): """Initialize LaunchMON, MRNet, GDB, and other things.""" self.is_shutdown = False self.quit = False self.doing_startup = True self.startup_done_count = 0 self.startup_arecs = [] self.token_handlers = {} self.comm = CommunicatorBE() if not self.comm.init_lmon(sys.argv): sys.exit(1) if not self.comm.init_mrnet(): # TODO: This should cleanly terminate LaunchMON, but does not. sys.exit(1) self.init_gdb() self.init_handlers() self.init_filters() self.variable_printer = VariablePrinter(self) def shutdown(self): """Cleanly shut things down if we have not already done so.""" if not self.comm.is_shutdown(): self.comm.shutdown() if self.sbd: self.sbd.cleanup() def __del__(self): """Invoke shutdown().""" # Exception guard if we have an error before comm init. try: self.shutdown() except AttributeError: pass def die_handler(self, msg): """Handle a die message by exiting.""" sys.exit("Told to die.") def cmd_handler(self, msg): """Handle a CMD message by running the command. The message contains the following fields: command - A Command object to run. ranks - An optional interval of ranks on which to run. """ if self.doing_startup: print("Ignoring command during startup.") return if msg.command.command == "gdb-exit": # Special case for quit. self.quit = True ranks = self.comm.get_mpiranks() if hasattr(msg, "ranks"): ranks = msg.ranks if not self.run_gdb_command(msg.command, ranks): # TODO: Send die message. print("Managed to get a bad command '{0}'.".format(msg.command)) def kill_handler(self, msg): """Handle a kill message, killing all processes.""" self.kill_inferiors() def filter_handler(self, msg): """Handle a filter message by adding the filter.""" self.filters.update(msg.filter_types) def unfilter_handler(self, msg): """Handle an unfilter message by removing the filter.""" self.filters.difference_update(msg.filter_types) def varprint_handler(self, msg): """Handle the varprint message and begin sequence.""" self.variable_printer.varprint_handler(msg) def is_filterable(self, record): """Check whether a given record can be filtered.""" record_set = record.record_subtypes.union([record.record_type]) if record_set.intersection(self.filters): return True return False def file_data_handler(self, msg): """Handle a response with file data.""" if self.sbd: self.sbd.file_data_handler(msg) else: print("Got SBD file data when SBD is not enabled") def main(self): """Main send/receive loop. This receives data on MRNet (non-blocking), processes the messages, and then sends any data that was read from GDB. This then sleeps for a short while to avoid heavy CPU use. """ while True: if self.quit: break if self.sbd: # Check for data from the GDB process for LOAD_FILE. self.sbd.sbd_check() msg = self.comm.recv(blocking=False) if msg is not None: # Received data. if msg.msg_type in self.msg_handlers: self.msg_handlers[msg.msg_type](msg) else: print("Got a message {0} with no handler.".format( msg.msg_type)) records = [] ranks = [] for record in self.gdb.read(): self.record_handler.handle(record) if not self.is_filterable(record): records.append(record) if (record.token is not None and record.token in self.token_rank_map): ranks.append(self.token_rank_map[record.token]) elif (hasattr(record, "thread_id") and record.thread_id in self.thread_rank_map): ranks.append(self.thread_rank_map[record.thread_id]) else: ranks.append(self.comm.get_mpiranks()) if records: arecs = combine_records(records, ranks) if self.doing_startup: self.startup_arecs = combine_aggregated_records( self.startup_arecs + arecs) else: if not self.doing_startup and self.startup_arecs: arecs = combine_aggregated_records(self.startup_arecs + arecs) self.comm.send(GDBMessage(OUT_MSG, record=arecs), self.comm.frontend) self.startup_arecs = None else: self.comm.send(GDBMessage(OUT_MSG, record=arecs), self.comm.frontend) # Sleep a bit to reduce banging on the CPU. time.sleep(0.01) # Wait for GDB to exit. exited = False while not exited: exited = not self.gdb.is_running() # Shut everything else down. self.shutdown()
def init_gdb(self): """Initialize GDB-related things, and launch the GDB process.""" # Indexed by MPI rank. self.varobjs = {} # Maps tokens to MPI rank. self.token_rank_map = {} self.record_handler = GDBMIRecordHandler() self.record_handler.add_type_handler( self._watch_thread_created, set([mi.gdbmi_records.ASYNC_NOTIFY_THREAD_CREATED])) self.startup_stop_hid = self.record_handler.add_type_handler( self._watch_startup_stop, set([mi.gdbmi_records.ASYNC_EXEC_STOPPED])) gdb_env = {} if gdbconf.use_sbd: self.sbd = SBDBE(self.comm) gdb_env["LD_PRELOAD"] = gdbconf.sbd_bin else: self.sbd = None enable_pprint_cmd = Command("enable-pretty-printing") enable_target_async_cmd = Command("gdb-set", args=["target-async", "on"]) disable_pagination_cmd = Command("gdb-set", args=["pagination", "off"]) enable_non_stop_cmd = Command("gdb-set", args=["non-stop", "on"]) add_inferior_cmd = Command("add-inferior") self.gdb = GDBMachineInterface(gdb=gdbconf.gdb_path, gdb_args=["-x", gdbconf.gdb_init_path], env=gdb_env) procs = self.comm.get_proctab() # Set up GDB. if not self.run_gdb_command(enable_pprint_cmd): raise RuntimeError("Could not enable pretty printing!") if not self.run_gdb_command(enable_target_async_cmd): raise RuntimeError("Could not enable target-async!") if not self.run_gdb_command(disable_pagination_cmd): raise RuntimeError("Could not disable pagination!") if not self.run_gdb_command(enable_non_stop_cmd): raise RuntimeError("Could not enable non-stop!") # Create inferiors and set up MPI rank/inferior map. # First inferior is created by default. self.rank_inferior_map = {procs[0].mpirank: 'i1'} self.inferior_rank_map = {'i1': procs[0].mpirank} i = 2 for proc in procs[1:]: # Hackish: Assume that the inferiors follow the iN naming scheme. self.rank_inferior_map[proc.mpirank] = 'i' + str(i) self.inferior_rank_map['i' + str(i)] = proc.mpirank i += 1 if not self.run_gdb_command(add_inferior_cmd, no_thread=True): raise RuntimeError('Cound not add inferior i{0}!'.format(i - 1)) # Maps MPI ranks to associated threads and vice-versa. self.rank_thread_map = {} self.thread_rank_map = {} if self.sbd: # Set up the list of executables for load file checking. self.sbd.set_executable_names( [os.path.basename(proc.pd.executable_name) for proc in procs]) # Attach processes. for proc in procs: if not self.run_gdb_command( Command("target-attach", opts={'--thread-group': self.rank_inferior_map[proc.mpirank]}, args=[proc.pd.pid]), proc.mpirank, no_thread=True): raise RuntimeError("Could not attach to rank {0}!".format( proc.mpirank)) self.varobjs[proc.mpirank] = VariableObjectManager() # Cludge to fix GDB not outputting records for the i1 attach. if self.rank_inferior_map[proc.mpirank] == 'i1': time.sleep(0.1)
class GDBBE: """The back-end GDB daemon process.""" def init_gdb(self): """Initialize GDB-related things, and launch the GDB process.""" # Indexed by MPI rank. self.varobjs = {} # Maps tokens to MPI rank. self.token_rank_map = {} self.record_handler = GDBMIRecordHandler() self.record_handler.add_type_handler( self._watch_thread_created, set([mi.gdbmi_records.ASYNC_NOTIFY_THREAD_CREATED])) self.startup_stop_hid = self.record_handler.add_type_handler( self._watch_startup_stop, set([mi.gdbmi_records.ASYNC_EXEC_STOPPED])) gdb_env = {} if gdbconf.use_sbd: self.sbd = SBDBE(self.comm) gdb_env["LD_PRELOAD"] = gdbconf.sbd_bin else: self.sbd = None enable_pprint_cmd = Command("enable-pretty-printing") enable_target_async_cmd = Command("gdb-set", args=["target-async", "on"]) disable_pagination_cmd = Command("gdb-set", args=["pagination", "off"]) enable_non_stop_cmd = Command("gdb-set", args=["non-stop", "on"]) add_inferior_cmd = Command("add-inferior") self.gdb = GDBMachineInterface(gdb=gdbconf.gdb_path, gdb_args=["-x", gdbconf.gdb_init_path], env=gdb_env) procs = self.comm.get_proctab() # Set up GDB. if not self.run_gdb_command(enable_pprint_cmd): raise RuntimeError("Could not enable pretty printing!") if not self.run_gdb_command(enable_target_async_cmd): raise RuntimeError("Could not enable target-async!") if not self.run_gdb_command(disable_pagination_cmd): raise RuntimeError("Could not disable pagination!") if not self.run_gdb_command(enable_non_stop_cmd): raise RuntimeError("Could not enable non-stop!") # Create inferiors and set up MPI rank/inferior map. # First inferior is created by default. self.rank_inferior_map = {procs[0].mpirank: 'i1'} self.inferior_rank_map = {'i1': procs[0].mpirank} i = 2 for proc in procs[1:]: # Hackish: Assume that the inferiors follow the iN naming scheme. self.rank_inferior_map[proc.mpirank] = 'i' + str(i) self.inferior_rank_map['i' + str(i)] = proc.mpirank i += 1 if not self.run_gdb_command(add_inferior_cmd, no_thread=True): raise RuntimeError('Cound not add inferior i{0}!'.format(i - 1)) # Maps MPI ranks to associated threads and vice-versa. self.rank_thread_map = {} self.thread_rank_map = {} if self.sbd: # Set up the list of executables for load file checking. self.sbd.set_executable_names( [os.path.basename(proc.pd.executable_name) for proc in procs]) # Attach processes. for proc in procs: if not self.run_gdb_command( Command("target-attach", opts={'--thread-group': self.rank_inferior_map[proc.mpirank]}, args=[proc.pd.pid]), proc.mpirank, no_thread=True): raise RuntimeError("Could not attach to rank {0}!".format( proc.mpirank)) self.varobjs[proc.mpirank] = VariableObjectManager() # Cludge to fix GDB not outputting records for the i1 attach. if self.rank_inferior_map[proc.mpirank] == 'i1': time.sleep(0.1) def _watch_thread_created(self, record, **kwargs): """Handle watching thread creation.""" inferior = record.thread_group_id thread_id = int(record.thread_id) rank = self.inferior_rank_map[inferior] if rank in self.rank_thread_map: self.rank_thread_map[rank].append(thread_id) else: self.rank_thread_map[rank] = [thread_id] # Always ensure smallest thread is first. self.rank_thread_map[rank].sort() self.thread_rank_map[thread_id] = rank def _watch_startup_stop(self, record, **kwargs): """Handle watching for initial inferior stops during startup.""" self.startup_done_count += 1 if self.startup_done_count == self.comm.get_proctab_size(): self.doing_startup = False self.record_handler.remove_handler(self.startup_stop_hid) # Reset token counts to sync with front-end. self.token_rank_map = {} Command._cur_token = 0 def kill_inferiors(self): """Terminate all targets being debugged. This sends SIGTERM. """ for proc in self.comm.get_proctab(): os.kill(proc.pd.pid, signal.SIGTERM) def run_gdb_command(self, command, ranks=None, no_thread=False): """Run a GDB command. command is a Command object representing the command. ranks is an Interval of the ranks to run the command on. If ranks is None, run on the current inferior. If no_thread is True, this does not specify a particular thread. Returns True on success, False on error. """ if isinstance(ranks, int): # Special case for a single int. # Toss it in a list; don't need a full Interval. ranks = Interval(ranks) if ranks is None: self.token_rank_map[command.token] = self.comm.get_mpiranks() return self.gdb.send(command) else: if command.get_opt('--thread') is not None: # If --thread provided, don't override it. no_thread = True for rank in ranks: if rank in self.rank_inferior_map: # Most recent option with same name takes precedence. if (not no_thread and rank in self.rank_thread_map): command.add_opt('--thread', self.rank_thread_map[rank][0]) if not self.gdb.send(command): return False self.token_rank_map[command.token] = rank return True def init_handlers(self): """Initialize message handlers used on data we receive over MRNet.""" self.msg_handlers = { DIE_MSG: self.die_handler, CMD_MSG: self.cmd_handler, FILTER_MSG: self.filter_handler, UNFILTER_MSG: self.unfilter_handler, VARPRINT_MSG: self.varprint_handler, KILL_MSG: self.kill_handler, FILE_DATA: self.file_data_handler, } def init_filters(self): """Initialize default filters.""" self.filters = set() #an_lower = ASYNC_NOTIFY.lower() #self.filters = [ # (an_lower, "shlibs-updated"), # (an_lower, "shlibs-added"), # (an_lower, "shlibs-removed"), # (an_lower, "library-loaded"), # (an_lower, "thread-created"), # (an_lower, "thread-group-added"), # (an_lower, "thread-group-started"), # (RESULT.lower(), "exit") # ] def __init__(self): """Initialize LaunchMON, MRNet, GDB, and other things.""" self.is_shutdown = False self.quit = False self.doing_startup = True self.startup_done_count = 0 self.startup_arecs = [] self.token_handlers = {} self.comm = CommunicatorBE() if not self.comm.init_lmon(sys.argv): sys.exit(1) if not self.comm.init_mrnet(): # TODO: This should cleanly terminate LaunchMON, but does not. sys.exit(1) self.init_gdb() self.init_handlers() self.init_filters() self.variable_printer = VariablePrinter(self) def shutdown(self): """Cleanly shut things down if we have not already done so.""" if not self.comm.is_shutdown(): self.comm.shutdown() if self.sbd: self.sbd.cleanup() def __del__(self): """Invoke shutdown().""" # Exception guard if we have an error before comm init. try: self.shutdown() except AttributeError: pass def die_handler(self, msg): """Handle a die message by exiting.""" sys.exit("Told to die.") def cmd_handler(self, msg): """Handle a CMD message by running the command. The message contains the following fields: command - A Command object to run. ranks - An optional interval of ranks on which to run. """ if self.doing_startup: print("Ignoring command during startup.") return if msg.command.command == "gdb-exit": # Special case for quit. self.quit = True ranks = self.comm.get_mpiranks() if hasattr(msg, "ranks"): ranks = msg.ranks if not self.run_gdb_command(msg.command, ranks): # TODO: Send die message. print("Managed to get a bad command '{0}'.".format(msg.command)) def kill_handler(self, msg): """Handle a kill message, killing all processes.""" self.kill_inferiors() def filter_handler(self, msg): """Handle a filter message by adding the filter.""" self.filters.update(msg.filter_types) def unfilter_handler(self, msg): """Handle an unfilter message by removing the filter.""" self.filters.difference_update(msg.filter_types) def varprint_handler(self, msg): """Handle the varprint message and begin sequence.""" self.variable_printer.varprint_handler(msg) def is_filterable(self, record): """Check whether a given record can be filtered.""" record_set = record.record_subtypes.union([record.record_type]) if record_set.intersection(self.filters): return True return False def file_data_handler(self, msg): """Handle a response with file data.""" if self.sbd: self.sbd.file_data_handler(msg) else: print("Got SBD file data when SBD is not enabled") def main(self): """Main send/receive loop. This receives data on MRNet (non-blocking), processes the messages, and then sends any data that was read from GDB. This then sleeps for a short while to avoid heavy CPU use. """ while True: if self.quit: break if self.sbd: # Check for data from the GDB process for LOAD_FILE. self.sbd.sbd_check() msg = self.comm.recv(blocking=False) if msg is not None: # Received data. if msg.msg_type in self.msg_handlers: self.msg_handlers[msg.msg_type](msg) else: print("Got a message {0} with no handler.".format( msg.msg_type)) records = [] ranks = [] for record in self.gdb.read(): self.record_handler.handle(record) if not self.is_filterable(record): records.append(record) if (record.token is not None and record.token in self.token_rank_map): ranks.append(self.token_rank_map[record.token]) elif (hasattr(record, "thread_id") and record.thread_id in self.thread_rank_map): ranks.append(self.thread_rank_map[record.thread_id]) else: ranks.append(self.comm.get_mpiranks()) if records: arecs = combine_records(records, ranks) if self.doing_startup: self.startup_arecs = combine_aggregated_records( self.startup_arecs + arecs) else: if not self.doing_startup and self.startup_arecs: arecs = combine_aggregated_records( self.startup_arecs + arecs) self.comm.send(GDBMessage(OUT_MSG, record=arecs), self.comm.frontend) self.startup_arecs = None else: self.comm.send(GDBMessage(OUT_MSG, record=arecs), self.comm.frontend) # Sleep a bit to reduce banging on the CPU. time.sleep(0.01) # Wait for GDB to exit. exited = False while not exited: exited = not self.gdb.is_running() # Shut everything else down. self.shutdown()
class GDBFE(GDBMICmd): """The front-end to PGDB.""" def init_handlers(self): """Initialize the message handlers and the record handler.""" # Set up message handlers. self.msg_handlers = { DIE_MSG: self.die_handler, QUIT_MSG: self.quit_handler, OUT_MSG: self.out_handler, VARPRINT_RES_MSG: self.varprint_res_handler, LOAD_FILE: self.load_file_handler } # Now record handlers. self.record_handler = GDBMIRecordHandler() def remote_init(self): """Initialize things related to the remote communication and back-end daemons.""" self.comm = CommunicatorFE(True) # Initialize with locking. # One of {pid} and {launcher, launcher_args} will not be none, based # upon the command line input parsing. ret = self.comm.init_lmon(self.lmon_attach, pid=self.lmon_pid, launcher=self.lmon_launcher, launcher_args=self.lmon_launcher_argv, host=self.lmon_host) if not ret: # Terminate. Note at this point main is still waiting on the remote_up event, # so we have to set it. self.remote_up.set() self.interrupt_main() return False ret = self.comm.init_mrnet(local=self.local_launch) if not ret: # Terminate. See prior comment about remote_up. self.remote_up.set() self.interrupt_main() return False self.varobjs = {} for rank in self.comm.get_mpiranks(): self.varobjs[rank] = VariableObjectManager() self.init_handlers() self.pprinter = GDBMIPrettyPrinter() self.sleep_time = 0.1 self.blocks = [] try: self.blocks += gdbconf.default_blocks except AttributeError: pass # Initialize the SBD system if needed. if gdbconf.use_sbd: self.sbd = SBDFE(self.comm) else: self.sbd = None return True def __init__(self): """Initialize some local things; the remote initialization must be done seperately.""" GDBMICmd.__init__(self) self.quit = False self.is_shutdown = False # Need to disable readline. self.completekey = None # Event triggered when remote_init completes in the remote thread.. self.remote_up = threading.Event() # Temporary list for building up aggregated records from OUT messages. self.arec_list = [] # Output history for expanding commands. self.output_history = [] # Get our PID for signals. self.my_pid = os.getpid() def interrupt_main(self): """Interrupt the main thread. This works because in Python, the main thread is the one that processes signals. If using Python 3, this could be replaced with signal.pthread_kill (but this will work in Python 3). """ os.kill(self.my_pid, signal.SIGINT) def parse_args(self): """Parse the command-line arguments and set appropriate variables.""" # Optparse unfortunately doesn't work here. self.lmon_attach = None self.lmon_pid = None self.lmon_launcher = None self.lmon_launcher_argv = None self.lmon_host = None self.local_launch = False for i in range(1, len(sys.argv)): if sys.argv[i] == "-p" or sys.argv[i] == "--pid": self.lmon_attach = True if len(sys.argv) == i + 1: print "Must provide a PID with {0}.".format(sys.argv[i]) sys.exit(0) try: self.lmon_pid = int(sys.argv[i + 1]) except ValueError: print "Must provide a valid PID." sys.exit(0) i += 1 elif sys.argv[i] == "--launcher": if len(sys.argv) == i + 1: print "Must provide a launcher with --launcher." sys.exit(0) self.lmon_launcher = sys.argv[i + 1] i += 1 elif sys.argv[i] == "--local": self.local_launch = True elif sys.argv[i] == "-h" or sys.argv[i] == "--host": if len(sys.argv) == i + 1: print "Must provide a host with --host." sys.exit(0) self.lmon_host = sys.argv[i + 1] i += 1 elif sys.argv[i] == "-a": if not hasattr(self, "lmon_launcher"): self.lmon_launcher = "srun" self.lmon_attach = False self.lmon_launcher_argv = sys.argv[i + 1:] break elif sys.argv[i] == "--sbd": # Override the configuration option. gdbconf.use_sbd = True if self.lmon_attach is None: print "Arguments: (one of -p/--pid and -a is required)" print "-p, --pid <pid>: attach to mpirun process <pid>" print "-a <options>: pass <options> verbatim to the resource manager for launching." print "--launcher <launcher>: use binary <launcher> to launch." print "--local: deploy for debugging just on the local node" print "-h/--host: the host the mpirun process is running on" print "--sbd: use the Scalable Binary Deployment system" sys.exit(0) def shutdown(self): """Shut down the network if not already shut down.""" if not self.comm.is_shutdown(): self.comm.shutdown() def __del__(self): """Invoke shutdown().""" # Need to catch a potential exception when comm does not exist. # This occurs if there is an error before comm init. try: self.shutdown() except AttributeError: pass def die_handler(self, msg): """Handle a die message. Presently does nothing.""" pass def quit_handler(self, msg): """Handle a quit message. Presently does nothing.""" pass def out_handler(self, msg): """Handle an out message by adding the arec to the temporary list.""" if self.arec_list: self.arec_list = combine_aggregated_records(self.arec_list + msg.record) else: self.arec_list = msg.record def process_out_messages(self): """Go through the temporary arec_list and pretty-print records.""" for arec in self.arec_list: # Add the record to the history. self.output_history = [arec] + self.output_history if len(self.output_history) > gdbconf.history_length: # Remove the last (oldest) element. self.output_history.pop() record_classes = arec.get_record_classes() class_key = max(record_classes, key=lambda x: len(record_classes[x])) # Only print the lowest-rank entry in the class. ranks = record_classes[class_key] record = arec.get_record(ranks.get_smallest()) # Note that this may not work if things don't support lists of ranks. if all(self.record_handler.handle(record, rank=ranks)): self.pprinter.pretty_print(record, ranks) if len(record_classes) > 1: print "Some results from {0} omitted; use expand to view.".format( arec.get_ranks()) self.arec_list = [] def varprint_res_handler(self, msg): """Handle a varprint result message by pretty-printing the variable objects.""" if msg.err: print "[{0}] {1}".format(msg.rank, msg.msg) elif msg.varobj: self.varobjs[msg.rank].add_var_obj(msg.varobj) print self.pprinter.varobj_pretty_print(msg.varobj, tag=msg.rank)[:-1] else: print "[{0}] Received a bad varobj!".format(msg.rank) def load_file_handler(self, msg): """Handle a load file message by loading the file and broadcasting it.""" if self.sbd: self.sbd.load_file(msg.filename) else: print "Received SBD LOAD_FILE request when SBD is not enabled." def parse_filter_spec(self, spec): """Parse a filter specification into a list of record type.""" split = spec.lower().split() if len(split) == 0: print "Bad filter specification." return None return split def do_filter(self, cmd, targets=None): """Tell the back-end daemons to filter something. The input is a list of record types and subtypes. A record containing any of these will be filtered. """ record_types = set(self.parse_filter_spec(cmd)) if not record_types: return self.comm.send(GDBMessage(FILTER_MSG, filter_types=record_types), self.comm.broadcast) def do_unfilter(self, cmd, targets=None): """Tell the back-end daemons to unfilter something.""" record_types = set(self.parse_filter_spec(cmd)) if not record_types: return self.comm.send(GDBMessage(UNFILTER_MSG, filter_types=record_types), self.comm.broadcast) def parse_proc_spec(self, proc_spec): """Parse a processor specification.""" targets = [] # Handle some special cases for sending to all processors. if proc_spec.lower() == "all" or proc_spec == "-1": return -1 for group in proc_spec.split(","): tup = group.split("-") try: if len(tup) == 1: targets.append((int(tup[0]), int(tup[0]))) else: targets.append((int(tup[0]), int(tup[1]))) except ValueError: print "Bad processor specification." return return Interval(targets) def do_proc(self, cmd, targets=None): """Handle the "proc" command to send commands to a subset of remote nodes based on MPI rank.""" if targets: print "Recursive proc is not recursive." return proc_spec = None for i, char in enumerate(cmd): if char == " ": proc_spec = cmd[0:i] line = cmd[i + 1:].strip() break if not proc_spec: print "Bad processor specification." return targets = self.parse_proc_spec(proc_spec) if not (targets - self.comm.get_mpiranks()).empty(): print "Out-of-range processor specification." return cmd = self.resolve_gdbmi_command(line, err=False) if cmd: self.comm.send(GDBMessage(CMD_MSG, command=cmd, ranks=targets), targets) else: split = line.split() cmd = split[0] rest = " ".join(split[1:]) if hasattr(self, "do_" + cmd): func = getattr(self, "do_" + cmd) func(rest, targets=targets) def do_block(self, cmd, targets=None): """Block all output from a subset of nodes.""" to_block = self.parse_proc_spec(cmd) if not to_block: return # This is quite inefficient and will not scale. for target in to_block.members(): if target not in self.blocks and target in self.comm.get_mpiranks( ): self.blocks.append(target) def do_unblock(self, cmd, targets=None): """Unblock output from a subset of nodes.""" to_unblock = self.parse_proc_spec(cmd) if not to_unblock: return keys = [] for k, v in enumerate(self.blocks): if v in to_unblock: keys.append(k) for k in keys: del self.blocks[k] def do_varprint(self, cmd, targets=None): """Run the varprint command.""" if not targets: targets = self.comm.get_mpiranks() cmd_split = cmd.split(" ") var = cmd # Strip quotes, if present. if var[0] == '"' and var[-1] == '"': var = var[1:-1] self.comm.send(GDBMessage(VARPRINT_MSG, name=var, ranks=targets), targets) def do_varassign(self, cmd, targets=None): """Run the varassign command.""" if not targets: targets = self.comm.get_mpiranks() split = cmd.split("=") if len(split) != 2: print "varassign format is: var = val" return var = split[0].strip() if var[0] == '"' and var[-1] == '"': var = var[1:-1] val = split[1].strip() for rank in targets.members(): full_name = self.varobjs[rank].get_full_name(var) if not full_name: print "Variable not found on rank {0}.".format(rank) continue self.comm.send( GDBMessage(CMD_MSG, command=Command("var-assign", args=('"' + full_name + '"', '"' + val + '"')), ranks=rank), rank) def do_help(self, cmd, targets=None): """Run the help command.""" if not targets: # Because this makes the most sense, unless told otherwise, we run this on one processor. targets = 0 self.comm.send( GDBMessage(CMD_MSG, command=Command("interpreter-exec", args=("console", '"help ' + cmd + '"')), ranks=targets), targets) def do_kill(self, cmd, targets=None): """Kill all targets being debugged.""" # This always sends to all targets, for now. print "Sending SIGTERM to all inferiors. (May need to step them for them to die.)" self.comm.send(GDBMessage(KILL_MSG), self.comm.broadcast) def do_quit(self, cmd, targets=None): """Gracefully quit PGDB.""" self.quit = True self.comm.send(GDBMessage(CMD_MSG, command=Command("gdb-exit")), self.comm.broadcast) def do_expand(self, cmd, targets=None): """Expand output. Use: [proc <processor-spec>] expand [history-item] Expand history-item for the given processors. """ if not targets: targets = self.comm.get_mpiranks() split = cmd.split(" ") history_item = 0 if len(split) > 1: if not split[1].isdigit(): print "Incorrect history specificiation." return history_item = int(split[1]) if history_item >= len(self.output_history): print "No such history item {0}".format(history_item) return arec = self.output_history[history_item] # We only care about the IDs that are present in both. ids = targets.intersect(arec.get_ranks()) for vid in ids: self.pprinter.pretty_print(arec.get_record(vid), Interval(vid)) def dispatch_gdbmi_command(self, command): """Send a GDB command to every rank (use proc to send to subsets).""" if self.comm.is_shutdown(): return False return self.comm.send(GDBMessage(CMD_MSG, command=command), self.comm.broadcast) def handle_msg(self, msg): """Handle a received message.""" if msg.msg_type in self.msg_handlers: self.msg_handlers[msg.msg_type](msg) else: print "Got a message {0} with no handler.".format(msg.msg_type) def remote_body(self): """The main remote body thread. This initializes the remote infrastructure, and receives and processes data. """ # Must do the init inside of this thread, or else LaunchMON steals stdin. if not self.remote_init(): return False # Signal main thread we can use stdin. self.remote_up.set() print "PGDB deployed to {0} hosts and {1} processors.".format( self.comm.get_mrnet_network_size(), self.comm.get_proctab_size()) recvd = False while not self.quit and not self.comm.all_nodes_exited(): # Receive data, if any. msg = self.comm.recv(blocking=False) if msg is not None: # Received data. self.handle_msg(msg) recvd = True else: recvd = False # Keep from beating up the CPU too much. if not recvd: self.process_out_messages() time.sleep(self.sleep_time) self.shutdown() print "Remote shut down." self.interrupt_main() def local_body(self): """The local command input loop.""" # Wait until we can use stdin. try: self.remote_up.wait() os.dup2(self.stdin_copy, 0) os.close(self.stdin_copy) self.cmdloop() except KeyboardInterrupt: print "Terminating." sys.exit(0) def run(self): """Start the remote thread and run the local command input loop.""" self.parse_args() # This is part of a hack to keep LaunchMON from stealing stdin. self.stdin_copy = os.dup(0) os.close(0) self.remote_thread = threading.Thread(target=self.remote_body) self.remote_thread.daemon = True self.remote_thread.start() self.local_body()