Ejemplo n.º 1
0
def td_fa_test():
    env = BlackjackEnv()
    estimator = Estimator()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        td_fa(env, sess, estimator)
        sess.close()
Ejemplo n.º 2
0
def q_network_test():
    env = BlackjackEnv()
    estimator = Estimator(0.001)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        V = q_network(env, sess, estimator, episode_num=10000)
    plotting.plot_value_function(V, title='Optimal Value Function')
Ejemplo n.º 3
0
def mc_control_with_epsilon_greedy_test():
    env = BlackjackEnv()
    Q = mc_control_with_epsilon_greedy(env, episode_nums=10000)
    V = defaultdict(float)
    for state, actions in Q.items():
        max_q = np.max(actions)
        V[state] = max_q
    plotting.plot_value_function(V, title='Optimal Value Function')
Ejemplo n.º 4
0
def q_learning_test():
    env = BlackjackEnv()
    Q = q_learning(env, episode_nums=10000)
    V = defaultdict(float)
    for state, actions in Q.items():
        max_q = np.max(actions)
        V[state] = max_q
    plotting.plot_value_function(V, title='Optimal Value Function')
Ejemplo n.º 5
0
def main():
    env = BlackjackEnv()
    actor = Actor()
    estimator = Estimator()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        V = ac_test4debug(sess, env, actor, estimator, episode_num=10000)
    plotting.plot_value_function(V, title='Optimal Value Function')
Ejemplo n.º 6
0
def td_network_test():
    env = BlackjackEnv()
    estimator = Estimator(learning_rate=0.003)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        V = td_network(env, sess, estimator)
        #print(sess.run(estimator.w))
        #print(sess.run(estimator.b))
    plotting.plot_value_function(V, title='Optimal Value')
Ejemplo n.º 7
0
def dyna_q_test():
    env = BlackjackEnv()
    estimator = Estimator(0.003)
    model = Model(0.003)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        V = dyna_q(env,
                   sess,
                   estimator,
                   model,
                   episode_num=3000,
                   train_model_times=3000,
                   train_with_model_times=3)
    plotting.plot_value_function(V, title='Optimal Value Function')