Exemple #1
0
def test_at_task_exit(n=2):
    """Test checkpointing at task_exit behavior
    """

    d = {}

    print("Launching: ", n)
    for i in range(0, n):
        d[i] = slow_double(i)
    print("Done launching")

    for i in range(0, n):
        d[i].result()

    with time_limited_open("{}/checkpoint/tasks.pkl".format(dfk.rundir),
                           'rb',
                           seconds=5) as f:
        tasks = []
        try:
            while f:
                tasks.append(pickle.load(f))
        except EOFError:
            pass

        assert len(tasks) == n, "Expected {} checkpoint events, got {}".format(
            n, len(tasks))
def test_checkpointing_at_dfk_exit():
    """Ensure failed tasks are not cached with dfk_exit mode. Tests #239
    """

    rundir = run_checkpointed(mode="dfk_exit")
    with time_limited_open("{}/checkpoint/tasks.pkl".format(rundir), 'rb', seconds=2) as f:
        tasks = []
        try:
            while f:
                tasks.append(pickle.load(f))
                print
        except EOFError:
            pass
        print("Tasks from cache : ", tasks)
        assert len(tasks) == 1, "Expected {} checkpoint events, got {}".format(1, len(tasks))
Exemple #3
0
def test_at_task_exit(n=2):
    """Test checkpointing at task_exit behavior
    """

    d = {}

    print("Launching: ", n)
    for i in range(0, n):
        d[i] = slow_double(i)
    print("Done launching")

    for i in range(0, n):
        d[i].result()

    # There are two potential race conditions here which
    # might be useful to be aware of if debugging this test.

    #  i) .result() returning does not necessarily mean that
    #     a checkpoint that has been written: it means that the
    #     AppFuture has had its result written. In the DFK
    #     implementation at time of writing, .result() returning
    #     does not indicate that a checkpoint has been written,
    #     it seems like.

    # ii) time_limited_open has a specific time limit in it.
    #     While this limit might seem generous at time of writing,
    #     it should be remembered that this is still a race.

    with time_limited_open("{}/checkpoint/tasks.pkl".format(dfk.run_dir),
                           'rb',
                           seconds=5) as f:
        tasks = []
        try:
            while f:
                tasks.append(pickle.load(f))
        except EOFError:
            pass

        assert len(tasks) == n, "Expected {} checkpoint events, got {}".format(
            n, len(tasks))