Ejemplo n.º 1
0
    def do_step_no_deadlock(self, n=1):
        """Perform 'step' command, avoiding a deadlock. Returns the 2-tuple
        (b_deadlock, output) where b_deadlock is true if the command timed
        out."""
        # Make sure we can interrupt inferior with SIGSTOP:
        self.enable_sigstop()
        output = ""
        try:
            cmd = fred_step_cmd()
            cmd.set_native(self._p.get_native(cmd))
            cmd.set_count_cmd(self._p.b_has_count_commands)
            cmd.set_count(n)
            # TODO: Special case for gdb so we don't step into libc. Think of
            # more portable way to do this.
            output = self._step(n, b_timeout=True)
            if output == "DO-NOT-STEP":
                # Log a next instead of step so we don't step into libc again.
                cmd = fred_next_cmd()
                cmd.set_native(self._p.get_native(cmd))
                cmd.set_count_cmd(self._p.b_has_count_commands)
                cmd.set_count(1)
            self.log_fred_command(cmd)
            self.update_state()
        except fredutil.PromptTimeoutException:
            fredutil.fred_debug("'step' command timed out (probably a deadlock).")
            return (True, output)

        return (False, output)
Ejemplo n.º 2
0
def create_branch(s_name):
    """Create and switch to a new branch. This call is blocking."""
    global gn_index_suffix

    if branch_exists(s_name):
        fredutil.fred_error("Branch named '%s' already exists.")
        return
    fredutil.fred_debug("Creating branch '%s'" % s_name)

    # CREATE the branch: Take the branch base checkpoint. When it
    # finishes, record/replay will have already reopened the log
    # files, or created new ones.
    n_branched_index = gn_index_suffix
    checkpoint()

    # Copy the whole dmtcp_tmpdir to the new branch location and
    # symlink to the new location.
    relocate_dmtcp_tmpdir(s_name)
    load_dmtcp_tmpdir(s_name)

    # SWITCH to the branch: Remove all checkpoint images in the new
    # branch except for the most recently created one (the branch base
    # checkpoint). Rename that base checkpoint to index 0.
    remove_checkpoints_except_index(n_branched_index)
    rename_index_to_base(n_branched_index)

    # Reset checkpoint indexing past the base checkpoint.
    reset_checkpoint_indexing()

    # Restart from the base checkpoint. This is required so the logs
    # will be closed and reopened in the new branch.
    restart(0)
Ejemplo n.º 3
0
def load_dmtcp_tmpdir(s_name):
    """Change the DMTCP_TMPDIR symlink to point at the given tmpdir name."""
    if os.path.exists(os.environ["DMTCP_TMPDIR"]):
        os.remove(os.environ["DMTCP_TMPDIR"])
    s_path = get_dmtcp_tmpdir_path(s_name)
    os.symlink(s_path, os.environ["DMTCP_TMPDIR"])
    fredutil.fred_debug("Symlinked DMTCP_TMPDIR to: %s" % s_path)
Ejemplo n.º 4
0
def load_dmtcp_tmpdir(s_name):
    """Change the DMTCP_TMPDIR symlink to point at the given tmpdir name."""
    if os.path.exists(os.environ["DMTCP_TMPDIR"]):
        os.remove(os.environ["DMTCP_TMPDIR"])
    s_path = get_dmtcp_tmpdir_path(s_name)
    os.symlink(s_path, os.environ["DMTCP_TMPDIR"])
    fredutil.fred_debug("Symlinked DMTCP_TMPDIR to: %s" % s_path)
Ejemplo n.º 5
0
    def do_step_no_deadlock(self, n=1):
        """Perform 'step' command, avoiding a deadlock. Returns the 2-tuple
        (b_deadlock, output) where b_deadlock is true if the command timed
        out."""
        # Make sure we can interrupt inferior with SIGSTOP:
        self.enable_sigstop()
        output = ""
        try:
            cmd = fred_step_cmd()
            cmd.set_native(self._p.get_native(cmd))
            cmd.set_count_cmd(self._p.b_has_count_commands)
            cmd.set_count(n)
            # TODO: Special case for gdb so we don't step into libc. Think of
            # more portable way to do this.
            output = self._step(n, b_timeout=True)
            if output == "DO-NOT-STEP":
                # Log a next instead of step so we don't step into libc again.
                cmd = fred_next_cmd()
                cmd.set_native(self._p.get_native(cmd))
                cmd.set_count_cmd(self._p.b_has_count_commands)
                cmd.set_count(1)
            self.log_fred_command(cmd)
            self.update_state()
        except fredutil.PromptTimeoutException:
            fredutil.fred_debug(
                "'step' command timed out (probably a deadlock).")
            return (True, output)

        return (False, output)
Ejemplo n.º 6
0
def checkpoint():
    """Perform a blocking checkpoint request and rename the checkpoint files."""
    global gn_index_suffix

    remove_stale_ptrace_files()
    
    s_checkpoint_re = "ckpt_.+\.dmtcp\..*"
    l_ckpts_before = [os.path.join(os.environ["DMTCP_TMPDIR"], x) \
                      for x in os.listdir(os.environ["DMTCP_TMPDIR"]) \
                      if re.search(s_checkpoint_re, x) != None]
    #fredutil.fred_debug("List ckpts before: %s" % str(l_ckpts_before))
    # Request the checkpoint.
    n_peers = get_num_peers()
    cmdstr = ["dmtcp_command", "--quiet", "bc"]
    fredutil.execute_shell_command_and_wait(cmdstr)
    fredutil.fred_debug("After blocking checkpoint command.")

    l_new_ckpts = []
    # There is what seems to be a DMTCP bug: the blocking checkpoint
    # can actually return before the checkpoints are written. It is
    # rare.
    while len(l_new_ckpts) < n_peers:
        l_ckpts_after = [os.path.join(os.environ["DMTCP_TMPDIR"], x) \
                         for x in os.listdir(os.environ["DMTCP_TMPDIR"]) \
                         if x.startswith("ckpt_") and x.endswith("dmtcp")]
        #fredutil.fred_debug("List ckpts after: %s" % str(l_ckpts_after))
        l_new_ckpts = [x for x in l_ckpts_after if x not in l_ckpts_before]
        time.sleep(0.001)
    for f in l_new_ckpts:
        fredutil.fred_debug("Renaming ckpt file from '%s' to '%s.%d'" %
                            (f, f, gn_index_suffix))
        os.rename(f, "%s.%d" % (f, gn_index_suffix))
    gn_index_suffix += 1
Ejemplo n.º 7
0
def create_branch(s_name):
    """Create and switch to a new branch. This call is blocking."""
    global gn_index_suffix

    if branch_exists(s_name):
        fredutil.fred_error("Branch named '%s' already exists.")
        return
    fredutil.fred_debug("Creating branch '%s'" % s_name)

    # CREATE the branch: Take the branch base checkpoint. When it
    # finishes, record/replay will have already reopened the log
    # files, or created new ones.
    n_branched_index = gn_index_suffix
    checkpoint()

    # Copy the whole dmtcp_tmpdir to the new branch location and
    # symlink to the new location.
    relocate_dmtcp_tmpdir(s_name)
    load_dmtcp_tmpdir(s_name)

    # SWITCH to the branch: Remove all checkpoint images in the new
    # branch except for the most recently created one (the branch base
    # checkpoint). Rename that base checkpoint to index 0.
    remove_checkpoints_except_index(n_branched_index)
    rename_index_to_base(n_branched_index)

    # Reset checkpoint indexing past the base checkpoint.
    reset_checkpoint_indexing()

    # Restart from the base checkpoint. This is required so the logs
    # will be closed and reopened in the new branch.
    restart(0)
Ejemplo n.º 8
0
def checkpoint():
    """Perform a blocking checkpoint request and rename the checkpoint files."""
    global gn_index_suffix

    remove_stale_ptrace_files()

    s_checkpoint_re = "ckpt_.+\.dmtcp\..*"
    l_ckpts_before = [os.path.join(os.environ["DMTCP_TMPDIR"], x) \
                      for x in os.listdir(os.environ["DMTCP_TMPDIR"]) \
                      if re.search(s_checkpoint_re, x) != None]
    #fredutil.fred_debug("List ckpts before: %s" % str(l_ckpts_before))
    # Request the checkpoint.
    n_peers = get_num_peers()
    cmdstr = ["dmtcp_command", "--quiet", "bc"]
    fredutil.execute_shell_command_and_wait(cmdstr)
    fredutil.fred_debug("After blocking checkpoint command.")

    l_new_ckpts = []
    # There is what seems to be a DMTCP bug: the blocking checkpoint
    # can actually return before the checkpoints are written. It is
    # rare.
    while len(l_new_ckpts) < n_peers:
        l_ckpts_after = [os.path.join(os.environ["DMTCP_TMPDIR"], x) \
                         for x in os.listdir(os.environ["DMTCP_TMPDIR"]) \
                         if x.startswith("ckpt_") and x.endswith("dmtcp")]
        #fredutil.fred_debug("List ckpts after: %s" % str(l_ckpts_after))
        l_new_ckpts = [x for x in l_ckpts_after if x not in l_ckpts_before]
        time.sleep(0.001)
    for f in l_new_ckpts:
        fredutil.fred_debug("Renaming ckpt file from '%s' to '%s.%d'" %
                            (f, f, gn_index_suffix))
        os.rename(f, "%s.%d" % (f, gn_index_suffix))
    gn_index_suffix += 1
Ejemplo n.º 9
0
def kill_child():
    """Kill the child process."""
    global gn_child_fd
    if gn_child_pid == -1:
        return
    fredutil.fred_debug("Killing child process pid %d" % gn_child_pid)
    signal_child(signal.SIGKILL)
    os.close(gn_child_fd)
Ejemplo n.º 10
0
def kill_child():
    """Kill the child process."""
    global gn_child_fd
    if gn_child_pid == -1:
      return
    fredutil.fred_debug("Killing child process pid %d" % gn_child_pid)
    signal_child(signal.SIGKILL)
    os.close(gn_child_fd)
Ejemplo n.º 11
0
def get_current_entry_index():
    """Return the index of the current entry."""
    s_index_re = "Current entry index = (\d+)"
    s_output = _execute_fred_command("status")
    m = re.search(s_index_re, s_output)
    if m != None:
        fredutil.fred_debug("Current entry index is: %d" % int(m.group(1)))
        return int(m.group(1))
    else:
        return None
Ejemplo n.º 12
0
def get_current_thread():
    """Return the clone id of the current entry's thread."""
    s_clone_id_re = "Current clone id = (\d+)"
    s_output = _execute_fred_command("status")
    m = re.search(s_clone_id_re, s_output)
    if m != None:
        fredutil.fred_debug("Current clone id is: %d" % int(m.group(1)))
        return int(m.group(1))
    else:
        return None
Ejemplo n.º 13
0
def get_current_entry_index():
    """Return the index of the current entry."""
    s_index_re = "Current entry index = (\d+)"
    s_output = _execute_fred_command("status")
    m = re.search(s_index_re, s_output)
    if m != None:
        fredutil.fred_debug("Current entry index is: %d" % int(m.group(1)))
        return int(m.group(1))
    else:
        return None
Ejemplo n.º 14
0
def get_current_thread():
    """Return the clone id of the current entry's thread."""
    s_clone_id_re = "Current clone id = (\d+)"
    s_output = _execute_fred_command("status")
    m = re.search(s_clone_id_re, s_output)
    if m != None:
        fredutil.fred_debug("Current clone id is: %d" % int(m.group(1)))
        return int(m.group(1))
    else:
        return None
Ejemplo n.º 15
0
def _spawn_child(argv):
    """Spawn a child process using the given command array."""
    global gn_child_pid, gn_child_fd, gb_output_thread_alive
    if not gb_output_thread_alive:
        _start_output_thread()
    fredutil.fred_debug("Starting child '%s'" % str(argv))
    (gn_child_pid, gn_child_fd) = pty.fork()
    if gn_child_pid == 0:
        sys.stderr = sys.stdout
        os.execvp(argv[0], argv)
Ejemplo n.º 16
0
def get_total_threads():
    """Return the total number of log threads."""
    s_total_threads_re = "Total number of threads = (\d+)"
    s_output = _execute_fred_command("info")
    m = re.search(s_total_threads_re, s_output)
    if m != None:
        fredutil.fred_debug("Total threads are: %d" % int(m.group(1)))
        return int(m.group(1))
    else:
        return None
Ejemplo n.º 17
0
def get_total_threads():
    """Return the total number of log threads."""
    s_total_threads_re = "Total number of threads = (\d+)"
    s_output = _execute_fred_command("info")
    m = re.search(s_total_threads_re, s_output)
    if m != None:
        fredutil.fred_debug("Total threads are: %d" % int(m.group(1)))
        return int(m.group(1))
    else:
        return None
Ejemplo n.º 18
0
def _spawn_child(argv):
    """Spawn a child process using the given command array."""
    global gn_child_pid, gn_child_fd, gb_output_thread_alive
    if not gb_output_thread_alive:
        _start_output_thread()
    fredutil.fred_debug("Starting child '%s'" % str(argv))
    (gn_child_pid, gn_child_fd) = pty.fork()
    if gn_child_pid == 0:
        sys.stderr = sys.stdout
        os.execvp(argv[0], argv)
Ejemplo n.º 19
0
def remove_checkpoints_except_index(n_index):
    """Remove all checkpoint images in the current DMTCP_TMPDIR except
    the specified index."""
    s_path = os.environ["DMTCP_TMPDIR"]
    s_checkpoint_re = "ckpt_.+\.dmtcp.*"
    l_files = [os.path.join(os.environ["DMTCP_TMPDIR"], x) \
               for x in os.listdir(os.environ["DMTCP_TMPDIR"]) \
               if re.search(s_checkpoint_re, x) != None and \
                  not x.endswith(".%d" % n_index)]
    fredutil.fred_debug("Removing files: %s" % str(l_files))
    map(os.remove, l_files)
Ejemplo n.º 20
0
def remove_checkpoints_except_index(n_index):
    """Remove all checkpoint images in the current DMTCP_TMPDIR except
    the specified index."""
    s_path = os.environ["DMTCP_TMPDIR"]
    s_checkpoint_re = "ckpt_.+\.dmtcp.*"
    l_files = [os.path.join(os.environ["DMTCP_TMPDIR"], x) \
               for x in os.listdir(os.environ["DMTCP_TMPDIR"]) \
               if re.search(s_checkpoint_re, x) != None and \
                  not x.endswith(".%d" % n_index)]
    fredutil.fred_debug("Removing files: %s" % str(l_files))
    map(os.remove, l_files)
Ejemplo n.º 21
0
def kill_peers():
    """Send 'k' command to coordinator."""
    cmd = ["dmtcp_command", "k"]
    fredutil.fred_debug("Sending command '%s'" % ' '.join(cmd))
    if fredio.GB_FRED_DEMO:
        print "===================== KILLING gdb ====================="
    pid = os.fork()
    if pid == 0:
        sys.stderr = sys.stdout
        os.execvp('dmtcp_command', ['dmtcp_command', '--quiet', 'k'])
    else:
        os.waitpid(pid, 0)
Ejemplo n.º 22
0
def rename_index_to_base(n_index):
    """Rename all checkpoint images of the given index to index 0 ("*.0")."""
    l_files = [
        os.path.join(os.environ["DMTCP_TMPDIR"], x)
        for x in os.listdir(os.environ["DMTCP_TMPDIR"])
        if x.endswith(".%d" % n_index)
    ]
    s_checkpoint_re = "(ckpt_.+\.dmtcp)\..*"
    for f in l_files:
        s_new_name = os.path.join(os.environ["DMTCP_TMPDIR"], "%s.0" % re.search(s_checkpoint_re, f).group(1))
        fredutil.fred_debug("Renaming ckpt %s to base ckpt %s." % (f, s_new_name))
        os.rename(f, s_new_name)
Ejemplo n.º 23
0
def kill_inferior():
    """Kill the inferior process, if it exists."""
    n_pid = get_real_inferior_pid()
    if n_pid == -1:
        fredutil.fred_debug("Not sending inferior SIGKILL: pid is -1.")
        return
    try:
        fredutil.fred_debug("Sending inferior %d SIGKILL." % n_pid)
        os.kill(n_pid, signal.SIGKILL)
        os.waitpid(n_pid, 0)
    except OSError:
        pass
Ejemplo n.º 24
0
def kill_inferior():
    """Kill the inferior process, if it exists."""
    n_pid = get_real_inferior_pid()
    if n_pid == -1:
        fredutil.fred_debug("Not sending inferior SIGKILL: pid is -1.")
        return
    try:
        fredutil.fred_debug("Sending inferior %d SIGKILL." % n_pid)
        os.kill(n_pid, signal.SIGKILL)
        os.waitpid(n_pid, 0)
    except OSError:
        pass
Ejemplo n.º 25
0
def rename_index_to_base(n_index):
    """Rename all checkpoint images of the given index to index 0 ("*.0")."""
    l_files = [os.path.join(os.environ["DMTCP_TMPDIR"], x) \
               for x in os.listdir(os.environ["DMTCP_TMPDIR"]) \
               if x.endswith(".%d" % n_index)]
    s_checkpoint_re = "(ckpt_.+\.dmtcp)\..*"
    for f in l_files:
        s_new_name = os.path.join(
            os.environ["DMTCP_TMPDIR"],
            "%s.0" % re.search(s_checkpoint_re, f).group(1))
        fredutil.fred_debug("Renaming ckpt %s to base ckpt %s." %
                            (f, s_new_name))
        os.rename(f, s_new_name)
Ejemplo n.º 26
0
 def replay_history(self, l_history=[], n=-1):
     """Issue the commands in given or current checkpoint's history to
     debugger."""
     if len(l_history) == 0:
         l_history = self.copy_current_checkpoint_history()
     if n == -1:
         l_temp = self._coalesce_history(l_history)
     else:
         l_temp = self.first_n_commands(self._coalesce_history(l_history), n)
     fredutil.fred_debug("Replaying the following history: %s" % \
                         str(l_temp))
     for cmd in l_temp:
         self.execute_fred_command(cmd, b_update=False)
     self.update_state()
Ejemplo n.º 27
0
def kill_peers():
    """Send 'k' command to coordinator."""
    cmd = ["dmtcp_command", "k"]
    fredutil.fred_debug("Sending command '%s'" % " ".join(cmd))
    if fredio.GB_FRED_DEMO:
        print "===================== KILLING gdb ====================="
    pid = os.fork()
    if pid == 0:
        sys.stderr = sys.stdout
        os.execvp("dmtcp_command", ["dmtcp_command", "--quiet", "k"])
    else:
        os.waitpid(pid, 0)

    while get_num_peers() != 0:
        time.sleep(0.01)
Ejemplo n.º 28
0
def reset_checkpoint_indexing():
    """Set gn_index_suffix to the appropriate value based on existent
    checkpoint files."""
    global gn_index_suffix
    s_checkpoint_re = "ckpt_.+\.dmtcp\.(.*)"
    l_files = [os.path.join(os.environ["DMTCP_TMPDIR"], x) \
               for x in os.listdir(os.environ["DMTCP_TMPDIR"]) \
               if re.search(s_checkpoint_re, x) != None]
    if len(l_files) == 0:
        gn_index_suffix = 0
    else:
        n_max_index = max([int(re.search(s_checkpoint_re, x).group(1)) \
                           for x in l_files])
        gn_index_suffix = n_max_index + 1
    fredutil.fred_debug("Reset ckpt index to %d" % gn_index_suffix)
Ejemplo n.º 29
0
 def replay_history_helper(self, l_history=[], n=-1):
     """Issue the commands in given or current checkpoint's history to
     debugger."""
     if len(l_history) == 0:
         l_history = self.copy_current_checkpoint_history()
     if n == -1:
         l_temp = self._coalesce_history(l_history)
     else:
         l_temp = self.first_n_commands(self._coalesce_history(l_history),
                                        n)
     fredutil.fred_debug("Replaying the following history: %s" % \
                         str(l_temp))
     for cmd in l_temp:
         self.execute_fred_command(cmd, b_update=False)
     self.update_state()
Ejemplo n.º 30
0
def kill_peers():
    """Send 'k' command to coordinator."""
    cmd = ["dmtcp_command", "k"]
    fredutil.fred_debug("Sending command '%s'" % ' '.join(cmd))
    if fredio.GB_FRED_DEMO:
        print "===================== KILLING gdb ====================="
    pid = os.fork()
    if pid == 0:
        sys.stderr = sys.stdout
        os.execvp('dmtcp_command', ['dmtcp_command', '--quiet', 'k'])
    else:
        os.waitpid(pid, 0)

    while get_num_peers() != 0:
        time.sleep(0.01)
Ejemplo n.º 31
0
def restart(n_index):
    """Restart from the given index."""
    # Kill inferior first because it is being traced, and cannot
    # handle signals or DMTCP KILL messages.
    fredmanager.kill_inferior()
    kill_peers()
    fredio.kill_child()

    # Wait until the peers are really gone
    while get_num_peers() != 0:
        time.sleep(0.01)

    remove_stale_ptrace_files()

    l_ckpt_files = [
        os.path.join(os.environ["DMTCP_TMPDIR"], x)
        for x in os.listdir(os.environ["DMTCP_TMPDIR"])
        if x.endswith(".dmtcp.%d" % n_index)
    ]
    if len(l_ckpt_files) > 2:
        # XXX: I think this is a Python bug.... sometimes even when there are
        # physically only two checkpoint files on disk, l_ckpt_files will
        # contain 4, with the two unique ones being duplicated:
        # (Pdb) p l_ckpt_files
        # ['/tmp/fred.tyler/dmtcp_tmpdir/ckpt_test_list_X-3089-4db5c59e.dmtcp',
        #'/tmp/fred.tyler/dmtcp_tmpdir/ckpt_gdb_X-3081-4db5c59c.dmtcp',
        #'/tmp/fred.tyler/dmtcp_tmpdir/ckpt_test_list_X-3089-4db5c59e.dmtcp',
        #'/tmp/fred.tyler/dmtcp_tmpdir/ckpt_gdb_X-3081-4db5c59c.dmtcp']
        # I have replaced the hostname with X for readability.
        l_ckpt_files = list(set(l_ckpt_files))

    # Due to what is arguably a bug in DMTCP, checkpoint files must
    # end in "*.dmtcp" in order for DMTCP to restart from them. So we
    # symlink to conform to that pattern before restarting.
    l_symlinks = []
    for f in l_ckpt_files:
        s_new_path = os.path.join(os.environ["DMTCP_TMPDIR"], re.search("(ckpt_.*\.dmtcp)\..*", f).group(1))
        if os.path.exists(s_new_path):
            os.remove(s_new_path)
        os.symlink(f, s_new_path)
        l_symlinks.append(s_new_path)
    fredutil.fred_debug("Restarting checkpoint files: %s" % str(l_symlinks))
    cmdstr = ["dmtcp_restart"]
    map(cmdstr.append, l_symlinks)
    fredio.reexec(cmdstr)
    # Wait until every peer has finished resuming:
    while get_num_peers() < len(l_symlinks) or not is_running():
        time.sleep(0.01)
Ejemplo n.º 32
0
def restart(n_index):
    """Restart from the given index."""
    # Kill inferior first because it is being traced, and cannot
    # handle signals or DMTCP KILL messages.
    fredmanager.kill_inferior()
    kill_peers()
    fredio.kill_child()

    # Wait until the peers are really gone
    while get_num_peers() != 0:
        time.sleep(0.01)

    remove_stale_ptrace_files()

    l_ckpt_files = [os.path.join(os.environ["DMTCP_TMPDIR"], x) \
                    for x in os.listdir(os.environ["DMTCP_TMPDIR"]) \
                    if x.endswith(".dmtcp.%d" % n_index)]
    if (len(l_ckpt_files) > 2):
        # XXX: I think this is a Python bug.... sometimes even when there are
        # physically only two checkpoint files on disk, l_ckpt_files will
        # contain 4, with the two unique ones being duplicated:
        # (Pdb) p l_ckpt_files
        #['/tmp/fred.tyler/dmtcp_tmpdir/ckpt_test_list_X-3089-4db5c59e.dmtcp',
        #'/tmp/fred.tyler/dmtcp_tmpdir/ckpt_gdb_X-3081-4db5c59c.dmtcp',
        #'/tmp/fred.tyler/dmtcp_tmpdir/ckpt_test_list_X-3089-4db5c59e.dmtcp',
        #'/tmp/fred.tyler/dmtcp_tmpdir/ckpt_gdb_X-3081-4db5c59c.dmtcp']
        # I have replaced the hostname with X for readability.
        l_ckpt_files = list(set(l_ckpt_files))

    # Due to what is arguably a bug in DMTCP, checkpoint files must
    # end in "*.dmtcp" in order for DMTCP to restart from them. So we
    # symlink to conform to that pattern before restarting.
    l_symlinks = []
    for f in l_ckpt_files:
        s_new_path = os.path.join(
            os.environ["DMTCP_TMPDIR"],
            re.search("(ckpt_.*\.dmtcp)\..*", f).group(1))
        if os.path.exists(s_new_path):
            os.remove(s_new_path)
        os.symlink(f, s_new_path)
        l_symlinks.append(s_new_path)
    fredutil.fred_debug("Restarting checkpoint files: %s" % str(l_symlinks))
    cmdstr = ["dmtcp_restart"]
    map(cmdstr.append, l_symlinks)
    fredio.reexec(cmdstr)
    # Wait until every peer has finished resuming:
    while get_num_peers() < len(l_symlinks) or not is_running():
        time.sleep(0.01)
Ejemplo n.º 33
0
def remove_checkpoint_files_of_index(n_index):
    """Remove the checkpoint image in the current DMTCP_TMPDIR with
    the specified index."""
    s_path = os.environ["DMTCP_TMPDIR"]
    s_checkpoint_re = "ckpt_.+\.dmtcp.*"
    l_files = [os.path.join(os.environ["DMTCP_TMPDIR"], x) \
               for x in os.listdir(os.environ["DMTCP_TMPDIR"]) \
               if re.search(s_checkpoint_re, x) != None and \
                  x.endswith(".%d" % n_index)]
    fredutil.fred_debug("Removing files: %s" % str(l_files))
    map(os.remove, l_files)
    # Also remove any symbolic links.  They have same filename without ".%d".
    for x in l_files:
        y = x[0:-len(".%d" % n_index)]  # strip index number at end
        if os.path.lexists(y):
            os.remove(y)
Ejemplo n.º 34
0
def reset_checkpoint_indexing():
    """Set gn_index_suffix to the appropriate value based on existent
    checkpoint files."""
    global gn_index_suffix
    s_checkpoint_re = "ckpt_.+\.dmtcp\.(.*)"
    l_files = [
        os.path.join(os.environ["DMTCP_TMPDIR"], x)
        for x in os.listdir(os.environ["DMTCP_TMPDIR"])
        if re.search(s_checkpoint_re, x) != None
    ]
    if len(l_files) == 0:
        gn_index_suffix = 0
    else:
        n_max_index = max([int(re.search(s_checkpoint_re, x).group(1)) for x in l_files])
        gn_index_suffix = n_max_index + 1
    fredutil.fred_debug("Reset ckpt index to %d" % gn_index_suffix)
Ejemplo n.º 35
0
 def replay_history(self, l_history=[], n=-1):
     import signal
     class TimeoutException(Exception):
         pass
     def timeout_handler(signum, frame):
         raise TimeoutException()
     n_timeout = 30
     signal.signal(signal.SIGALRM, timeout_handler)
     signal.alarm(n_timeout)
     while True:
         try:
             self.replay_history_helper(l_history, n)
             signal.alarm(0)
             return
         except TimeoutException:
             fredutil.fred_debug("Replay history timed out.")
             self.do_restart()
Ejemplo n.º 36
0
 def execute_fred_command(self, cmd, b_update=True):
     """Execute the given FredCommand."""
     if cmd.b_ignore:
         fredutil.fred_debug("Skipping ignore command '%s'" % \
                             (cmd.s_native + " " + cmd.s_args))
         return
     # Special handling for "log-breakpoint X" and "log-continue" cmds.
     if cmd.is_log_breakpoint():
         self.set_log_breakpoint(int(cmd.s_args))
     elif cmd.is_log_continue():
         self.do_log_continue()
     else:
         fredutil.fred_assert(cmd.s_native != "")
         self._p.execute_command(cmd.s_native + " " + cmd.s_args + "\n",
                                 b_prompt=cmd.b_wait_for_prompt)
     if b_update:
         self.update_state()
Ejemplo n.º 37
0
 def execute_fred_command(self, cmd, b_update=True):
     """Execute the given FredCommand."""
     if cmd.b_ignore:
         fredutil.fred_debug("Skipping ignore command '%s'" % \
                             (cmd.s_native + " " + cmd.s_args))
         return
     # Special handling for "log-breakpoint X" and "log-continue" cmds.
     if cmd.is_log_breakpoint():
         self.set_log_breakpoint(int(cmd.s_args))
     elif cmd.is_log_continue():
         self.do_log_continue()
     else:
         fredutil.fred_assert(cmd.s_native != "")
         self._p.execute_command(cmd.s_native + " " + cmd.s_args + "\n",
                                 cmd.b_wait_for_prompt)
     if b_update:
         self.update_state()
Ejemplo n.º 38
0
def remove_checkpoint_files_of_index(n_index):
    """Remove the checkpoint image in the current DMTCP_TMPDIR with
    the specified index."""
    s_path = os.environ["DMTCP_TMPDIR"]
    s_checkpoint_re = "ckpt_.+\.dmtcp.*"
    l_files = [
        os.path.join(os.environ["DMTCP_TMPDIR"], x)
        for x in os.listdir(os.environ["DMTCP_TMPDIR"])
        if re.search(s_checkpoint_re, x) != None and x.endswith(".%d" % n_index)
    ]
    fredutil.fred_debug("Removing files: %s" % str(l_files))
    map(os.remove, l_files)
    # Also remove any symbolic links.  They have same filename without ".%d".
    for x in l_files:
        y = x[0 : -len(".%d" % n_index)]  # strip index number at end
        if os.path.lexists(y):
            os.remove(y)
Ejemplo n.º 39
0
def _execute_fred_command(s_cmd, s_arg=None):
    """Execute the given fred_command command and return its output."""
    global g_child_subprocess
    fredutil.fred_assert(s_cmd in ["status", "info", "break", "continue"])
    l_cmd = ["%s/fred_command" % GS_FREDHIJACK_PATH]
    fredutil.fred_assert(get_pid() != -1)
    s_path = "%s/fred-shm.%d" % (os.environ["DMTCP_TMPDIR"], get_pid())
    l_cmd.append("--%s" % s_cmd)
    if s_arg != None:
        l_cmd.append(s_arg)
    l_cmd.append(s_path)
    fredutil.fred_debug("Executing fred_command: %s" % l_cmd)
    if s_cmd == "break":
        fredutil.fred_assert(g_child_subprocess == None)
        g_child_subprocess = fredutil.execute_background_shell_command(l_cmd)
        s_output = ""
    else:
        s_output = fredutil.execute_shell_command(l_cmd)
    return s_output
Ejemplo n.º 40
0
 def log_command(self, s_command):
     """Convert given command to FredCommand instance and add to current
     history."""
     # XXX: Figure out a more elegant way to do this. We can't set the
     # inferior pids until we know the inferior is alive, so we keep trying
     # to update them with every command issued until it succeeds.
     if fredmanager.get_real_inferior_pid() == -1:
         fredmanager.reset_real_inferior_pid(self.get_real_debugger_pid())
     if fredmanager.get_virtual_inferior_pid() == -1:
         s_virt_pid = self.evaluate_expression("getpid()")
         if s_virt_pid != GS_NO_SYMBOL_ERROR:
             fredmanager.set_virtual_inferior_pid(int(s_virt_pid))
         else:
             fredutil.fred_debug("Can't set virtual pid; no getpid() " +
                                 "symbol available.")
     if self.current_checkpoint() != None:
         # identify_command() sets native representation
         cmd = self._p.identify_command(s_command)
         self.current_checkpoint().log_command(cmd)
Ejemplo n.º 41
0
 def do_next_no_deadlock(self, n=1):
     """Perform 'next' command, avoiding a deadlock. Returns the 2-tuple
     (b_deadlock, output) where b_deadlock is true if the command timed
     out."""
     # Make sure we can interrupt inferior with SIGSTOP:
     self.enable_sigstop()
     output = ""
     try:
         cmd = fred_next_cmd()
         cmd.set_native(self._p.get_native(cmd))
         cmd.set_count_cmd(self._p.b_has_count_commands)
         cmd.set_count(n)
         self.log_fred_command(cmd)
         output = self._next(n, b_timeout=True)
         self.update_state()
     except fredutil.PromptTimeoutException:
         fredutil.fred_debug("'next' command timed out (probably a deadlock).")
         return (True, output)
     return (False, output)
Ejemplo n.º 42
0
 def do_next_no_deadlock(self, n=1):
     """Perform 'next' command, avoiding a deadlock. Returns the 2-tuple
     (b_deadlock, output) where b_deadlock is true if the command timed
     out."""
     # Make sure we can interrupt inferior with SIGSTOP:
     self.enable_sigstop()
     output = ""
     try:
         cmd = fred_next_cmd()
         cmd.set_native(self._p.get_native(cmd))
         cmd.set_count_cmd(self._p.b_has_count_commands)
         cmd.set_count(n)
         self.log_fred_command(cmd)
         output = self._next(n, b_timeout=True)
         self.update_state()
     except fredutil.PromptTimeoutException:
         fredutil.fred_debug(
             "'next' command timed out (probably a deadlock).")
         return (True, output)
     return (False, output)
Ejemplo n.º 43
0
    def replay_history(self, l_history=[], n=-1):
        import signal

        class TimeoutException(Exception):
            pass

        def timeout_handler(signum, frame):
            raise TimeoutException()

        n_timeout = 30
        signal.signal(signal.SIGALRM, timeout_handler)
        signal.alarm(n_timeout)
        while True:
            try:
                self.replay_history_helper(l_history, n)
                signal.alarm(0)
                return
            except TimeoutException:
                fredutil.fred_debug("Replay history timed out.")
                self.do_restart()
Ejemplo n.º 44
0
def relocate_dmtcp_tmpdir(s_name):
    """Copy the current DMTCP_TMPDIR to a new location with suffix s_name."""
    if not os.path.islink(os.environ["DMTCP_TMPDIR"]):
        # When creating the master branch, the tmpdir is a directory not a link
        # Just move it and return.
        fredutil.fred_debug("DMTCP_TMPDIR is a directory, not a link.")
        s_new_path = get_dmtcp_tmpdir_path(s_name)
        os.rename(os.environ["DMTCP_TMPDIR"], s_new_path)
        return

    s_current_path = os.path.normpath(os.readlink(os.environ["DMTCP_TMPDIR"]))
    if not os.path.isabs(s_current_path):
        # Resolve relative to absolute path.
        s_current_path = os.path.join(os.path.dirname(os.environ["DMTCP_TMPDIR"]), s_current_path)
    s_new_path = get_dmtcp_tmpdir_path(s_name)
    if os.path.exists(s_new_path):
        fredutil.fred_error("Requested new path '%s' already exists." % s_new_path)
        return
    shutil.copytree(s_current_path, s_new_path)
    fredutil.fred_debug("Copied DMTCP_TMPDIR from '%s' to '%s'." % (s_current_path, s_new_path))
Ejemplo n.º 45
0
 def report_timing_statistics(self):
     """Report any gathered timing statistics."""
     global gn_time_checkpointing, gn_time_restarting, \
            gn_time_evaluating, gn_total_checkpoints, \
            gn_total_restarts, gn_total_evaluations
     fredutil.fred_debug("Timing statistics:")
     s = "\n"
     s += "Total time checkpointing:   %.3f s\n" % gn_time_checkpointing
     s += "Total time restarting:      %.3f s\n" % gn_time_restarting
     s += "Total time evaluating expr: %.3f s\n" % gn_time_evaluating
     s += "Total checkpoints:          %d\n"     % gn_total_checkpoints
     s += "Total restarts:             %d\n"     % gn_total_restarts
     s += "Total evaluations of expr:  %d\n"     % gn_total_evaluations
     s += "Average checkpoint time:    %.3f s\n" % (gn_time_checkpointing /
                                                    gn_total_checkpoints)
     s += "Average restart time:       %.3f s\n" % (gn_time_restarting /
                                                    gn_total_restarts)
     s += "Average evaluation time:    %.3f s\n" % (gn_time_evaluating /
                                                    gn_total_evaluations)
     fredutil.fred_debug(s)
Ejemplo n.º 46
0
 def report_timing_statistics(self):
     """Report any gathered timing statistics."""
     global gn_time_checkpointing, gn_time_restarting, \
            gn_time_evaluating, gn_total_checkpoints, \
            gn_total_restarts, gn_total_evaluations
     fredutil.fred_debug("Timing statistics:")
     s = "\n"
     s += "Total time checkpointing:   %.3f s\n" % gn_time_checkpointing
     s += "Total time restarting:      %.3f s\n" % gn_time_restarting
     s += "Total time evaluating expr: %.3f s\n" % gn_time_evaluating
     s += "Total checkpoints:          %d\n" % gn_total_checkpoints
     s += "Total restarts:             %d\n" % gn_total_restarts
     s += "Total evaluations of expr:  %d\n" % gn_total_evaluations
     s += "Average checkpoint time:    %.3f s\n" % (gn_time_checkpointing /
                                                    gn_total_checkpoints)
     s += "Average restart time:       %.3f s\n" % (gn_time_restarting /
                                                    gn_total_restarts)
     s += "Average evaluation time:    %.3f s\n" % (gn_time_evaluating /
                                                    gn_total_evaluations)
     fredutil.fred_debug(s)
Ejemplo n.º 47
0
def _execute_fred_command(s_cmd, s_arg=None):
    """Execute the given fred_command command and return its output."""
    global g_child_subprocess
    fredutil.fred_assert(s_cmd in ["status", "info", "break", "continue"])
    l_cmd = ["%s/fred_command" % GS_FREDHIJACK_PATH]
    fredutil.fred_assert(get_virtual_inferior_pid() != -1)
    s_path = "%s/fred-shm.%d" % (os.environ["DMTCP_TMPDIR"],
                                 get_virtual_inferior_pid())
    l_cmd.append("--%s" % s_cmd)
    if s_arg != None:
        l_cmd.append(s_arg)
    l_cmd.append(s_path)
    fredutil.fred_debug("Executing fred_command: %s" % l_cmd)
    if s_cmd == "break":
        fredutil.fred_assert(g_child_subprocess == None)
        g_child_subprocess = fredutil.execute_background_shell_command(l_cmd)
        s_output = ""
    else:
        s_output = fredutil.execute_shell_command(l_cmd)
    return s_output
Ejemplo n.º 48
0
    def log_command(self, s_command):
        """Convert given command to FredCommand instance and add to current
        history."""

        if self.personality_name() == "gdb":
            # XXX: Figure out a more elegant way to do this. We can't set the
            # inferior pids until we know the inferior is alive, so we keep trying
            # to update them with every command issued until it succeeds.
            if fredmanager.get_real_inferior_pid() == -1:
                fredmanager.reset_real_inferior_pid(
                    self.get_real_debugger_pid())
            if fredmanager.get_virtual_inferior_pid() == -1:
                s_virt_pid = self.evaluate_expression("getpid()")
                if s_virt_pid != GS_NO_SYMBOL_ERROR:
                    fredmanager.set_virtual_inferior_pid(int(s_virt_pid))
                else:
                    fredutil.fred_debug("Can't set virtual pid; no getpid() " +
                                        "symbol available.")
        if self.current_checkpoint() != None:
            # identify_command() sets native representation
            cmd = self._p.identify_command(s_command)
            self.current_checkpoint().log_command(cmd)
Ejemplo n.º 49
0
def relocate_dmtcp_tmpdir(s_name):
    """Copy the current DMTCP_TMPDIR to a new location with suffix s_name."""
    if not os.path.islink(os.environ["DMTCP_TMPDIR"]):
        # When creating the master branch, the tmpdir is a directory not a link
        # Just move it and return.
        fredutil.fred_debug("DMTCP_TMPDIR is a directory, not a link.")
        s_new_path = get_dmtcp_tmpdir_path(s_name)
        os.rename(os.environ["DMTCP_TMPDIR"], s_new_path)
        return

    s_current_path = os.path.normpath(os.readlink(os.environ["DMTCP_TMPDIR"]))
    if not os.path.isabs(s_current_path):
        # Resolve relative to absolute path.
        s_current_path = os.path.join(
            os.path.dirname(os.environ["DMTCP_TMPDIR"]), s_current_path)
    s_new_path = get_dmtcp_tmpdir_path(s_name)
    if os.path.exists(s_new_path):
        fredutil.fred_error("Requested new path '%s' already exists." %
                            s_new_path)
        return
    shutil.copytree(s_current_path, s_new_path)
    fredutil.fred_debug("Copied DMTCP_TMPDIR from '%s' to '%s'." %
                        (s_current_path, s_new_path))
Ejemplo n.º 50
0
 def do_restart(self, n_index, b_clear_history, reset_fnc):
     """Restart from the specified checkpoint, calling the provided
     reset_fnc before restarting, if provided."""
     if self.get_num_checkpoints() == 0:
         fredutil.fred_error("No checkpoints found.")
         return
     if reset_fnc != None:
         reset_fnc()
     if n_index == -1:
         fredutil.fred_debug("Restarting from checkpoint index %d." % \
                             self.get_last_checkpoint().get_index())
         dmtcpmanager.restart(self.get_last_checkpoint().get_index())
     else:
         if n_index > self.get_num_checkpoints() - 1:
             fredutil.fred_error("No such checkpoint index %d." % n_index)
             return
         fredutil.fred_debug("Restarting from checkpoint index %d "
                             "in branch %s" % \
                             (n_index, self.get_name()))
         dmtcpmanager.restart(n_index)
         self.set_current_checkpoint(self.get_checkpoint(n_index))
     if b_clear_history:
         self.get_current_checkpoint().clear_history()
Ejemplo n.º 51
0
 def do_restart(self, n_index, b_clear_history, reset_fnc):
     """Restart from the specified checkpoint, calling the provided
     reset_fnc before restarting, if provided."""
     if self.get_num_checkpoints() == 0:
         fredutil.fred_error("No checkpoints found.")
         return
     if reset_fnc != None:
         reset_fnc()
     if n_index == -1:
         fredutil.fred_debug("Restarting from checkpoint index %d." % \
                             self.get_current_checkpoint().get_index())
         dmtcpmanager.restart(self.get_current_checkpoint().get_index())
     else:
         if n_index > self.get_num_checkpoints() - 1:
             fredutil.fred_error("No such checkpoint index %d." % n_index)
             return
         fredutil.fred_debug("Restarting from checkpoint index %d "
                             "in branch %s" % \
                             (n_index, self.get_name()))
         dmtcpmanager.restart(n_index)
         self.set_current_checkpoint(self.get_checkpoint(n_index))
     if b_clear_history:
         self.get_current_checkpoint().clear_history()
Ejemplo n.º 52
0
 def do_log_continue(self):
     """Execute fred_command 'cont' to hit a log breakpoint."""
     # XXX: Disable all debugger breakpoints before 'continue'
     # Call _continue() directly here so it does not get logged.
     # Calling debugger _continue() will let the inferior run, but
     # we have set a log breakpoint, so it will only replay to that
     # point.
     self.enable_sigstop()
     fredutil.fred_debug("Sending FReD log-based 'continue'")
     self._continue(b_wait_for_prompt=False)
     fredutil.fred_debug("Debugger 'continue' issued -- waiting on bkpt.")
     self.log_fred_command(fred_log_continue_cmd())
     fredmanager.wait_on_fred_breakpoint()
     fredutil.fred_debug("Hit log-based breakpoint.")
     # Interrupt inferior to bring back the debugger prompt.
     self.stop_inferior()
     # Remove the log breakpoint that got us here (only support one
     # log bkpt right now).
     fredmanager.send_fred_continue()
Ejemplo n.º 53
0
 def do_log_continue(self):
     """Execute fred_command 'cont' to hit a log breakpoint."""
     # XXX: Disable all debugger breakpoints before 'continue'
     # Call _continue() directly here so it does not get logged.
     # Calling debugger _continue() will let the inferior run, but
     # we have set a log breakpoint, so it will only replay to that
     # point.
     self.enable_sigstop()
     fredutil.fred_debug("Sending FReD log-based 'continue'")
     self._continue(b_wait_for_prompt=False)
     fredutil.fred_debug("Debugger 'continue' issued -- waiting on bkpt.")
     self.log_fred_command(fred_log_continue_cmd())
     fredmanager.wait_on_fred_breakpoint()
     fredutil.fred_debug("Hit log-based breakpoint.")
     # Interrupt inferior to bring back the debugger prompt.
     self.stop_inferior()
     # Remove the log breakpoint that got us here (only support one
     # log bkpt right now).
     fredmanager.send_fred_continue()
Ejemplo n.º 54
0
def set_virtual_inferior_pid(n_pid):
    """Set the virtual pid of the inferior."""
    global gn_virtual_inferior_pid
    fredutil.fred_debug("Setting virtual inferior pid to %d." % n_pid)
    gn_virtual_inferior_pid = n_pid
Ejemplo n.º 55
0
def reexec(argv):
    """Replace the current child process with the new given one."""
    if GB_FRED_DEMO:
        print "===================== RESTARTING gdb ====================="
    fredutil.fred_debug("Replacing current child with '%s'" % str(argv))
    _spawn_child(argv)
Ejemplo n.º 56
0
def reset_real_inferior_pid(n_gdb_pid):
    global gn_real_inferior_pid
    n_pid = _read_real_inferior_pid(n_gdb_pid)
    fredutil.fred_debug("Setting real inferior pid to %d." % n_pid)
    gn_real_inferior_pid = n_pid
Ejemplo n.º 57
0
def reset_real_inferior_pid(n_gdb_pid):
    global gn_real_inferior_pid
    n_pid = _read_real_inferior_pid(n_gdb_pid)
    fredutil.fred_debug("Setting real inferior pid to %d." % n_pid)
    gn_real_inferior_pid = n_pid
Ejemplo n.º 58
0
 def update_state(self):
     """Update the underlying DebuggerState."""
     fredutil.fred_debug("Updating DebuggerState.")
     self.state().set_backtrace(self._p.get_backtrace())
     self.state().set_breakpoints(self._p.get_breakpoints())
Ejemplo n.º 59
0
 def set_scheduler_locking(self, n_on):
     fredutil.fred_assert(self.personality_name() == "gdb")
     fredutil.fred_debug("Turning scheduler locking to %s" % str(n_on))
     self._p.set_scheduler_locking(n_on)
Ejemplo n.º 60
0
def set_virtual_inferior_pid(n_pid):
    """Set the virtual pid of the inferior."""
    global gn_virtual_inferior_pid
    fredutil.fred_debug("Setting virtual inferior pid to %d." % n_pid)
    gn_virtual_inferior_pid = n_pid