Ejemplo n.º 1
0
		R.SaveBuffer(OUT_DIR+BUFFER_FILE)
		print "model saved, replay buffer: ", R.GetOccupency()
		L.Save(OUT_DIR+LOG_FILE)

Ws,bs = Q.get_weights()
Q_target.assign(sess, Ws,bs)
save_counter.increment(sess)
saver.save(sess,OUT_DIR+"model.ckpt", global_step=save_counter.evaluate(sess))
R.SaveBuffer(OUT_DIR+BUFFER_FILE)
print "model saved, replay buffer: ", R.GetOccupency()
L.Save(OUT_DIR+LOG_FILE)

sess.close()
sys.exit()
# plot statistics
R_P_l = L.GetLogByName('policy_left')
R_Q_l = L.GetLogByName('network_left')
R_P_m = L.GetLogByName('policy_middle')
R_Q_m = L.GetLogByName('network_middle')
R_P_r = L.GetLogByName('policy_right')
R_Q_r = L.GetLogByName('network_right')
totalR = L.GetLogByName('total_reward')
error = L.GetLogByName('error')
value = L.GetLogByName('estimated_value')

t = np.arange(R_P_l.size)
plt.figure(1)
plt.plot(t, R_Q_l, 'b', t, R_P_l, 'r')
plt.xlabel('Episodes')
plt.ylabel('reward')
plt.title('Puck on the left')
Ejemplo n.º 2
0
        L.Save(OUT_DIR + LOG_FILE)

Ws, bs = Q.get_weights()
Q_target.assign(sess, Ws, bs)
save_counter.increment(sess)
saver.save(sess,
           OUT_DIR + "model.ckpt",
           global_step=save_counter.evaluate(sess))
R.SaveBuffer(OUT_DIR + BUFFER_FILE)
print "model saved, replay buffer: ", R.GetOccupency()
L.Save(OUT_DIR + LOG_FILE)

sess.close()

# plot statistics
R_P_l = L.GetLogByName('policy_left')
R_Q_l = L.GetLogByName('network_left')
R_P_m = L.GetLogByName('policy_middle')
R_Q_m = L.GetLogByName('network_middle')
R_P_r = L.GetLogByName('policy_right')
R_Q_r = L.GetLogByName('network_right')

t = np.arange(R_P_l.size)
plt.figure(1)
plt.plot(t, R_Q_l, 'b', t, R_P_l, 'r')

plt.figure(2)
plt.plot(t, R_Q_m, 'b', t, R_P_m, 'r')

plt.figure(3)
plt.plot(t, R_Q_r, 'b', t, R_P_r, 'r')
Ejemplo n.º 3
0
#if UPDATE_TARGET:
#	Ws,bs = Q.get_weights()
#	Q_targets[cur_target % len(Q_targets)].assign(sess, Ws,bs)
save_counter.increment(sess)
saver.save(sess,
           OUT_DIR + "model.ckpt",
           global_step=save_counter.evaluate(sess))
R.SaveBuffer(OUT_DIR + BUFFER_FILE)
print "model saved, replay buffer: ", R.GetOccupency()
L.Save(OUT_DIR + LOG_FILE)

sess.close()
# plot statistics
if DISPLAY_STATISTICS:
    R_P_l = L.GetLogByName('policy_left')
    R_Q_l = L.GetLogByName('network_left')
    R_P_m = L.GetLogByName('policy_middle')
    R_Q_m = L.GetLogByName('network_middle')
    R_P_r = L.GetLogByName('policy_right')
    R_Q_r = L.GetLogByName('network_right')
    currentR = L.GetLogByName('network_random')
    error = L.GetLogByName('error')
    value = L.GetLogByName('estimated_value')

    t = np.arange(R_P_l.size)
    plt.figure()

    plt.subplot(231)
    plt.plot(t, R_Q_l, 'b', t, R_P_l, 'r')
    plt.xlabel('Episodes')