Ejemplo n.º 1
0
def checkpoint():
    """Perform a blocking checkpoint request and rename the checkpoint files."""
    global gn_index_suffix

    remove_stale_ptrace_files()
    
    s_checkpoint_re = "ckpt_.+\.dmtcp\..*"
    l_ckpts_before = [os.path.join(os.environ["DMTCP_TMPDIR"], x) \
                      for x in os.listdir(os.environ["DMTCP_TMPDIR"]) \
                      if re.search(s_checkpoint_re, x) != None]
    #fredutil.fred_debug("List ckpts before: %s" % str(l_ckpts_before))
    # Request the checkpoint.
    n_peers = get_num_peers()
    cmdstr = ["dmtcp_command", "--quiet", "bc"]
    fredutil.execute_shell_command_and_wait(cmdstr)
    fredutil.fred_debug("After blocking checkpoint command.")

    l_new_ckpts = []
    # There is what seems to be a DMTCP bug: the blocking checkpoint
    # can actually return before the checkpoints are written. It is
    # rare.
    while len(l_new_ckpts) < n_peers:
        l_ckpts_after = [os.path.join(os.environ["DMTCP_TMPDIR"], x) \
                         for x in os.listdir(os.environ["DMTCP_TMPDIR"]) \
                         if x.startswith("ckpt_") and x.endswith("dmtcp")]
        #fredutil.fred_debug("List ckpts after: %s" % str(l_ckpts_after))
        l_new_ckpts = [x for x in l_ckpts_after if x not in l_ckpts_before]
        time.sleep(0.001)
    for f in l_new_ckpts:
        fredutil.fred_debug("Renaming ckpt file from '%s' to '%s.%d'" %
                            (f, f, gn_index_suffix))
        os.rename(f, "%s.%d" % (f, gn_index_suffix))
    gn_index_suffix += 1
Ejemplo n.º 2
0
def checkpoint():
    """Perform a blocking checkpoint request and rename the checkpoint files."""
    global gn_index_suffix

    remove_stale_ptrace_files()

    s_checkpoint_re = "ckpt_.+\.dmtcp\..*"
    l_ckpts_before = [os.path.join(os.environ["DMTCP_TMPDIR"], x) \
                      for x in os.listdir(os.environ["DMTCP_TMPDIR"]) \
                      if re.search(s_checkpoint_re, x) != None]
    #fredutil.fred_debug("List ckpts before: %s" % str(l_ckpts_before))
    # Request the checkpoint.
    n_peers = get_num_peers()
    cmdstr = ["dmtcp_command", "--quiet", "bc"]
    fredutil.execute_shell_command_and_wait(cmdstr)
    fredutil.fred_debug("After blocking checkpoint command.")

    l_new_ckpts = []
    # There is what seems to be a DMTCP bug: the blocking checkpoint
    # can actually return before the checkpoints are written. It is
    # rare.
    while len(l_new_ckpts) < n_peers:
        l_ckpts_after = [os.path.join(os.environ["DMTCP_TMPDIR"], x) \
                         for x in os.listdir(os.environ["DMTCP_TMPDIR"]) \
                         if x.startswith("ckpt_") and x.endswith("dmtcp")]
        #fredutil.fred_debug("List ckpts after: %s" % str(l_ckpts_after))
        l_new_ckpts = [x for x in l_ckpts_after if x not in l_ckpts_before]
        time.sleep(0.001)
    for f in l_new_ckpts:
        fredutil.fred_debug("Renaming ckpt file from '%s' to '%s.%d'" %
                            (f, f, gn_index_suffix))
        os.rename(f, "%s.%d" % (f, gn_index_suffix))
    gn_index_suffix += 1
Ejemplo n.º 3
0
def kill_coordinator(n_port):
    """Kills the coordinator on given port."""
    try:
        fredutil.execute_shell_command_and_wait(["dmtcp_command", "--quiet", "-p", str(n_port), "q"])
    except subprocess.CalledProcessError:
        pass
    except:
        raise
Ejemplo n.º 4
0
def kill_coordinator(n_port):
    """Kills the coordinator on given port."""
    try:
        fredutil.execute_shell_command_and_wait(
            ["dmtcp_command", "--quiet", "-p",
             str(n_port), "q"])
    except subprocess.CalledProcessError:
        pass
    except:
        raise
Ejemplo n.º 5
0
def start_coordinator(n_port):
    """Start a coordinator on given port. Return False on error."""
    status = fredutil.execute_shell_command_and_wait(["dmtcp_coordinator", "--daemon", "-p", str(n_port)])
    return status == 0
Ejemplo n.º 6
0
def start_coordinator(n_port):
    """Start a coordinator on given port. Return False on error."""
    status = fredutil.execute_shell_command_and_wait(
        ["dmtcp_coordinator", "--daemon", "-p",
         str(n_port)])
    return status == 0