Beispiel #1
0
def test_read_direct():
    dset = ReplayDataset("test.hdf5", STATE_SHAPE, dset_size=1000,
                         overwrite=True)

    batch_size = 32
    s = np.zeros((batch_size,)+STATE_SHAPE)
    a = np.zeros(batch_size)
    r = np.zeros(batch_size)
    sp = np.zeros((batch_size,)+STATE_SHAPE)

    states = random_fill(dset, 100)
    dset.sample_direct(s, a, r, sp, batch_size)

    diff = [np.linalg.norm(s[0] - states[i]) for i in range(100)]
    assert(min(diff) == 0)
def test_read_direct():
    dset = ReplayDataset("test.hdf5",
                         STATE_SHAPE,
                         dset_size=1000,
                         overwrite=True)

    batch_size = 32
    s = np.zeros((batch_size, ) + STATE_SHAPE)
    a = np.zeros(batch_size)
    r = np.zeros(batch_size)
    sp = np.zeros((batch_size, ) + STATE_SHAPE)

    states = random_fill(dset, 100)
    dset.sample_direct(s, a, r, sp, batch_size)

    diff = [np.linalg.norm(s[0] - states[i]) for i in range(100)]
    assert (min(diff) == 0)
def test_correctness():
    dset = ReplayDataset("test.hdf5",
                         STATE_SHAPE,
                         overwrite=True,
                         dset_size=10)

    print "Initially..."
    print "------------------------"
    print "Head: ", dset.head
    print "Valid: ", dset.valid

    try:
        sample = dset.sample(1)
    except ValueError as e:
        print e.message
        print "Correctly triggered exception"

    # Add one experience to dataset, then try sampling again
    dset.add_experience(0, 10, np.random.randint(0, 256, size=(4, 128, 128)))
    print "Added one experience"
    try:
        sample = dset.sample(1)
        print "Correctly avoided exception when sampling"
    except ValueError as e:
        print e.message

    for _ in xrange(13):
        state = np.random.randint(0, 256, size=(4, 128, 128))
        action = random.choice(range(0, 10))
        reward = random.choice(range(-5, 6))
        dset.add_experience(action, reward, state)

    assert (dset.head == (13 + 1) % 10)
    assert (dset.valid == 10)

    print
    print "Current state of dataset:"
    print "----------------------------"
    print "Action:", dset.action
    print "Reward:", dset.reward
    print "State:", dset.state
    print "State[0,0,0]:", [dset.state[i, 0, 0, 0] for i in range(10)]
    print
    print "Draw full sample:"
    s, a, r, ns = dset.sample(9)
    print "S (0,0,0):", [s[i, 0, 0, 0] for i in range(9)]
    print "S'(0,0,0):", [ns[i, 0, 0, 0] for i in range(9)]
Beispiel #4
0
def test_correctness():
    dset = ReplayDataset("test.hdf5", STATE_SHAPE,
                         overwrite=True, dset_size=10)

    print "Initially..."
    print "------------------------"
    print "Head: ", dset.head
    print "Valid: ", dset.valid

    try:
        sample = dset.sample(1)
    except ValueError as e:
        print e.message
        print "Correctly triggered exception"

    # Add one experience to dataset, then try sampling again
    dset.add_experience(0, 10, np.random.randint(0, 256, size=(4, 128, 128)))
    print "Added one experience"
    try:
        sample = dset.sample(1)
        print "Correctly avoided exception when sampling"
    except ValueError as e:
        print e.message

    for _ in xrange(13):
        state = np.random.randint(0, 256, size=(4, 128, 128))
        action = random.choice(range(0, 10))
        reward = random.choice(range(-5, 6))
        dset.add_experience(action, reward, state)

    assert(dset.head == (13 + 1) % 10)
    assert(dset.valid == 10)

    print
    print "Current state of dataset:"
    print "----------------------------"
    print "Action:", dset.action
    print "Reward:", dset.reward
    print "State:", dset.state
    print "State[0,0,0]:", [dset.state[i, 0, 0, 0] for i in range(10)]
    print
    print "Draw full sample:"
    s, a, r, ns = dset.sample(9)
    print "S (0,0,0):", [s[i, 0, 0, 0] for i in range(9)]
    print "S'(0,0,0):", [ns[i, 0, 0, 0] for i in range(9)]
Beispiel #5
0
def test_timing(dset_size=10000, num_write=20000,
                num_samples=1000, sample_size=32):
    """ Speed test for replay dataset storage scheme. """
    import matplotlib.pyplot as plt
    if os.path.exists("test.hdf5"):
        os.remove("test.hdf5")

    if not os.path.isdir("evaluation"):
        os.makedirs("evaluation")

    results_file = open("evaluation/results.txt", 'w')

    # Create and fill dataset
    dset = ReplayDataset("test.hdf5", STATE_SHAPE, dset_size=dset_size)

    # Time writing speed
    start = time.time()
    tocs = []
    for _ in xrange(num_write):
        state = np.ones((4, 128, 128), dtype=np.float32)
        dset.add_experience(0, 0, state)
        toc = time.time()
        tocs.append(toc)

    cum_time = np.array(tocs) - start
    plt.figure(1)
    plt.plot(cum_time)
    plt.title("Writing Performance")
    plt.xlabel("number of states written")
    plt.ylabel("cumulative run time (s)")
    plt.savefig("evaluation/write.png")

    print >> results_file, \
        "Time to write %d samples: %0.2f milliseconds" \
        % (num_write, 1000*(tocs[-1] - start))

    # Time sampling speed
    times = []
    for _ in xrange(num_samples):
        tic = time.time()
        sample = dset.sample(sample_size)
        toc = time.time()
        times.append(1000 * (toc - tic))

    mean_sample_time = np.mean(times)
    std_sample_time = np.std(times)
    print >> results_file, \
        "Mean sample time: %0.3f milliseconds" % np.mean(times)

    print >> results_file, \
        "Std. dev. sample time: %0.3f milliseconds" % np.std(times)

    plt.figure(2)
    plt.hist(times, bins=np.linspace(0, mean_sample_time + std_sample_time),
             alpha=0.5, color="green")

    plt.title("Distribution of Sampling Times (Batch Size = %d)" %
              sample_size)
    plt.xlabel("sampling time (ms)")
    plt.ylabel("frequency")
    plt.savefig("evaluation/sample.png")
def test_timing(dset_size=10000,
                num_write=20000,
                num_samples=1000,
                sample_size=32):
    """ Speed test for replay dataset storage scheme. """
    import matplotlib.pyplot as plt
    if os.path.exists("test.hdf5"):
        os.remove("test.hdf5")

    if not os.path.isdir("evaluation"):
        os.makedirs("evaluation")

    results_file = open("evaluation/results.txt", 'w')

    # Create and fill dataset
    dset = ReplayDataset("test.hdf5", STATE_SHAPE, dset_size=dset_size)

    # Time writing speed
    start = time.time()
    tocs = []
    for _ in xrange(num_write):
        state = np.ones((4, 128, 128), dtype=np.float32)
        dset.add_experience(0, 0, state)
        toc = time.time()
        tocs.append(toc)

    cum_time = np.array(tocs) - start
    plt.figure(1)
    plt.plot(cum_time)
    plt.title("Writing Performance")
    plt.xlabel("number of states written")
    plt.ylabel("cumulative run time (s)")
    plt.savefig("evaluation/write.png")

    print >> results_file, \
        "Time to write %d samples: %0.2f milliseconds" \
        % (num_write, 1000*(tocs[-1] - start))

    # Time sampling speed
    times = []
    for _ in xrange(num_samples):
        tic = time.time()
        sample = dset.sample(sample_size)
        toc = time.time()
        times.append(1000 * (toc - tic))

    mean_sample_time = np.mean(times)
    std_sample_time = np.std(times)
    print >> results_file, \
        "Mean sample time: %0.3f milliseconds" % np.mean(times)

    print >> results_file, \
        "Std. dev. sample time: %0.3f milliseconds" % np.std(times)

    plt.figure(2)
    plt.hist(times,
             bins=np.linspace(0, mean_sample_time + std_sample_time),
             alpha=0.5,
             color="green")

    plt.title("Distribution of Sampling Times (Batch Size = %d)" % sample_size)
    plt.xlabel("sampling time (ms)")
    plt.ylabel("frequency")
    plt.savefig("evaluation/sample.png")