def create_branch(s_name): """Create and switch to a new branch. This call is blocking.""" global gn_index_suffix if branch_exists(s_name): fredutil.fred_error("Branch named '%s' already exists.") return fredutil.fred_debug("Creating branch '%s'" % s_name) # CREATE the branch: Take the branch base checkpoint. When it # finishes, record/replay will have already reopened the log # files, or created new ones. n_branched_index = gn_index_suffix checkpoint() # Copy the whole dmtcp_tmpdir to the new branch location and # symlink to the new location. relocate_dmtcp_tmpdir(s_name) load_dmtcp_tmpdir(s_name) # SWITCH to the branch: Remove all checkpoint images in the new # branch except for the most recently created one (the branch base # checkpoint). Rename that base checkpoint to index 0. remove_checkpoints_except_index(n_branched_index) rename_index_to_base(n_branched_index) # Reset checkpoint indexing past the base checkpoint. reset_checkpoint_indexing() # Restart from the base checkpoint. This is required so the logs # will be closed and reopened in the new branch. restart(0)
def switch_to_branch(self, s_name): """Switch to the named branch.""" if not dmtcpmanager.branch_exists(s_name): fredutil.fred_error("Branch '%s' does not exist." % s_name) return for b in self.l_branches: if b.get_name() == s_name: self.branch = b dmtcpmanager.switch_branch(s_name) # Switching to branches always restarts in ckpt 0: self.branch.set_current_checkpoint(self.branch.get_checkpoint(0)) self.update_state() fredutil.fred_info("Switched to branch '%s'." % s_name)
def do_branch(self, s_name): """Create and switch to a new branch named s_name.""" if dmtcpmanager.branch_exists(s_name): fredutil.fred_error("Branch '%s' already exists." % s_name) return self.branch = Branch(s_name) self.l_branches.append(self.branch) dmtcpmanager.create_branch(s_name) # Creating branches always creates ckpt 0: self.branch.add_checkpoint(Checkpoint(0)) self.branch.set_current_checkpoint(self.branch.get_checkpoint(0)) self.update_state() fredutil.fred_info("Now in new branch '%s'." % s_name)
def is_running(): """Return True if dmtcp_command reports RUNNING as 'yes'.""" cmd = ["dmtcp_command", "s"] output = fredutil.execute_shell_command(cmd) if output != None: m = re.search("RUNNING=(\w+)", output, re.MULTILINE) if m != None: running = m.group(1) return running == "yes" else: return False else: fredutil.fred_error("ERROR: Can't get RUNNING. " "Did the coordinator die?") return False
def switch_branch(s_name): """Switch to the specified branch.""" if not branch_exists(s_name): fredutil.fred_error("Branch '%s' does not exist." % s_name) # Kill peers: force log flush. kill_peers() # Symlink dmtcp_tmpdir to branch tmpdir. load_dmtcp_tmpdir(s_name) reset_checkpoint_indexing() # Restart from branch base checkpoint restart(0)
def is_running(): """Return True if dmtcp_command reports RUNNING as 'yes'.""" cmd = ["dmtcp_command", "s"] output = fredutil.execute_shell_command(cmd) if output != None: m = re.search('RUNNING=(\w+)', output, re.MULTILINE) if m != None: running = m.group(1) return running == 'yes' else: return False else: fredutil.fred_error("ERROR: Can't get RUNNING. " "Did the coordinator die?") return False
def get_num_peers(): """Return NUM_PEERS from 'dmtcp_command s' as an integer.""" cmd = ["dmtcp_command", "s"] output = fredutil.execute_shell_command(cmd) if output != None: exp = "^NUM_PEERS=(\d+)" m = re.search(exp, output, re.MULTILINE) if m != None: n_peers = m.group(1) return int(n_peers) else: if output == "": fredutil.fred_error("Output was NULL string") return 0 # Heuristically guessing 0 peers, could be a problem else: fredutil.fred_error("ERROR: Can't get NUM_PEERS. " "Did the coordinator die?") return 0
def get_num_peers(): """Return NUM_PEERS from 'dmtcp_command s' as an integer.""" cmd = ['dmtcp_command', 's'] output = fredutil.execute_shell_command(cmd) if output != None: exp = '^NUM_PEERS=(\d+)' m = re.search(exp, output, re.MULTILINE) if m != None: n_peers = m.group(1) return int(n_peers) else: if output == "": fredutil.fred_error("Output was NULL string") return 0 # Heuristically guessing 0 peers, could be a problem else: fredutil.fred_error("ERROR: Can't get NUM_PEERS. " "Did the coordinator die?") return 0
def relocate_dmtcp_tmpdir(s_name): """Copy the current DMTCP_TMPDIR to a new location with suffix s_name.""" if not os.path.islink(os.environ["DMTCP_TMPDIR"]): # When creating the master branch, the tmpdir is a directory not a link # Just move it and return. fredutil.fred_debug("DMTCP_TMPDIR is a directory, not a link.") s_new_path = get_dmtcp_tmpdir_path(s_name) os.rename(os.environ["DMTCP_TMPDIR"], s_new_path) return s_current_path = os.path.normpath(os.readlink(os.environ["DMTCP_TMPDIR"])) if not os.path.isabs(s_current_path): # Resolve relative to absolute path. s_current_path = os.path.join(os.path.dirname(os.environ["DMTCP_TMPDIR"]), s_current_path) s_new_path = get_dmtcp_tmpdir_path(s_name) if os.path.exists(s_new_path): fredutil.fred_error("Requested new path '%s' already exists." % s_new_path) return shutil.copytree(s_current_path, s_new_path) fredutil.fred_debug("Copied DMTCP_TMPDIR from '%s' to '%s'." % (s_current_path, s_new_path))
def do_restart(self, n_index, b_clear_history, reset_fnc): """Restart from the specified checkpoint, calling the provided reset_fnc before restarting, if provided.""" if self.get_num_checkpoints() == 0: fredutil.fred_error("No checkpoints found.") return if reset_fnc != None: reset_fnc() if n_index == -1: fredutil.fred_debug("Restarting from checkpoint index %d." % \ self.get_last_checkpoint().get_index()) dmtcpmanager.restart(self.get_last_checkpoint().get_index()) else: if n_index > self.get_num_checkpoints() - 1: fredutil.fred_error("No such checkpoint index %d." % n_index) return fredutil.fred_debug("Restarting from checkpoint index %d " "in branch %s" % \ (n_index, self.get_name())) dmtcpmanager.restart(n_index) self.set_current_checkpoint(self.get_checkpoint(n_index)) if b_clear_history: self.get_current_checkpoint().clear_history()
def relocate_dmtcp_tmpdir(s_name): """Copy the current DMTCP_TMPDIR to a new location with suffix s_name.""" if not os.path.islink(os.environ["DMTCP_TMPDIR"]): # When creating the master branch, the tmpdir is a directory not a link # Just move it and return. fredutil.fred_debug("DMTCP_TMPDIR is a directory, not a link.") s_new_path = get_dmtcp_tmpdir_path(s_name) os.rename(os.environ["DMTCP_TMPDIR"], s_new_path) return s_current_path = os.path.normpath(os.readlink(os.environ["DMTCP_TMPDIR"])) if not os.path.isabs(s_current_path): # Resolve relative to absolute path. s_current_path = os.path.join( os.path.dirname(os.environ["DMTCP_TMPDIR"]), s_current_path) s_new_path = get_dmtcp_tmpdir_path(s_name) if os.path.exists(s_new_path): fredutil.fred_error("Requested new path '%s' already exists." % s_new_path) return shutil.copytree(s_current_path, s_new_path) fredutil.fred_debug("Copied DMTCP_TMPDIR from '%s' to '%s'." % (s_current_path, s_new_path))
def do_restart(self, n_index, b_clear_history, reset_fnc): """Restart from the specified checkpoint, calling the provided reset_fnc before restarting, if provided.""" if self.get_num_checkpoints() == 0: fredutil.fred_error("No checkpoints found.") return if reset_fnc != None: reset_fnc() if n_index == -1: fredutil.fred_debug("Restarting from checkpoint index %d." % \ self.get_current_checkpoint().get_index()) dmtcpmanager.restart(self.get_current_checkpoint().get_index()) else: if n_index > self.get_num_checkpoints() - 1: fredutil.fred_error("No such checkpoint index %d." % n_index) return fredutil.fred_debug("Restarting from checkpoint index %d " "in branch %s" % \ (n_index, self.get_name())) dmtcpmanager.restart(n_index) self.set_current_checkpoint(self.get_checkpoint(n_index)) if b_clear_history: self.get_current_checkpoint().clear_history()