Beispiel #1
0
def train_q_table(env):
    q_table = np.zeros([121, 4])

    reward_list = []
    action_list = []

    for i in range(NUM_EPISODES):

        state_map = env.reset()
        state = util.reshape_state(state_map, ENV_WIDTH, 'val')
        reward_all = 0
        action_all = []
        done = False
        step = 0

        # Q-Table learning algorithm
        while step < 100:
            step += 1
            #Choose action greedily picking from Q table
            action = np.argmax(q_table[state, :] + np.random.randn(1, 4) *
                               (1. / (i + 1)))

            state_map1, reward, done = env.tick(action)
            state1 = util.reshape_state(state_map1, ENV_WIDTH, 'val')
            #Update Q-Table
            #Q-learning Q(s,a) = r + y * max_a'(Q(s',a'))
            q_table[state, action] = q_table[state, action] + \
                    LEARNING_RATE * (reward + FTR_DISCOUNT_Y * np.max(q_table[state1, :]) - q_table[state, action])

            reward_all += reward
            action_all.append(action)
            state = state1

            if done:
                print("Done racing for " + str(i))
                break

            if step == 99:
                print("For " + str(i) + " could not reach goal")

        if i == 0:
            action_list.append(action_all)
        elif i == 54:
            action_list.append(action_all)
        elif i == 499:
            action_list.append(action_all)

        reward_list.append(reward_all)
    return reward_list, action_list, NUM_EPISODES, q_table
Beispiel #2
0
def run_frozen_model(file_path):

    graph = load_graph(file_path)

    for op in graph.get_operations():
        print(op.name)

    x = graph.get_tensor_by_name('prefix/input:0')
    y = graph.get_tensor_by_name('prefix/output:0')

    with tf.Session(graph=graph) as sess:
        step = 0
        action_list = []
        track_env = TrackEnv(11, 11, "track1")
        state_map = track_env.reset()

        while step < 22:

            state = util.reshape_state(state_map, ENV_WIDTH, RESHAPE_TYPE)
            act = sess.run(y, feed_dict={x: state})
            action_list.append(act[0])
            state_map, _, done = track_env.tick(act[0])
            if done:
                break
            step += 1
        print(action_list)
def store_final_nn_actions(sess, ffn):
    test_env = TrackEnv(ENV_WIDTH, ENV_HEIGHT, "track1")
    state_map = test_env.reset()
    state = util.reshape_state(state_map, ENV_WIDTH, RESHAPE_TYPE)
    step = 0
    action_array = []
    while step < 99:
        step += 1
        action = sess.run(ffn.predict, feed_dict={ffn.flat_input: state})
        action_array.append(action)
        state_map, _, done = test_env.tick(action[0])
        state = util.reshape_state(state_map, ENV_WIDTH, RESHAPE_TYPE)
        if done:
            break

    np.savetxt(RESULT_PATH + '/deep_q_nn.txt', action_array, fmt='%d')
def train_model(env):
    tf.reset_default_graph()

    e = START_E
    total_steps = 0

    main_ffn = ConvolutionalNetwork(512, LEARNING_RATE)
    target_ffn = ConvolutionalNetwork(512, LEARNING_RATE)

    train_var = tf.trainable_variables()
    target_ops = update_target_graph(train_var)

    saver = tf.train.Saver()

    train_buffer = ExperienceBuffer(BUFFER_SIZE)

    loss_list = []

    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        episode_buffer = ExperienceBuffer(BUFFER_SIZE)

        for i in range(NUM_EP):
            state_map = env.reset()
            state = util.reshape_state(state_map, ENV_WIDTH, RESHAPE_TYPE)

            loss_total = 0.
            step = 0

            while step < 39:
                step += 1

                ep_list, state1, done = generate_episode(
                    e, total_steps, env, sess, main_ffn, state)
                total_steps += 1
                episode_buffer.add(ep_list)

                if total_steps > PRE_TRAIN_STEPS:
                    if e > END_E:
                        e -= RDC_E

                    loss = update_model(train_buffer, sess, main_ffn,
                                        target_ffn)
                    update_target(target_ops, sess)
                    loss_total += loss

                state = state1
                if done:
                    break

            train_buffer.add(episode_buffer.buffer)
            loss_list.append(loss_total)
            if i % 500 == 0:
                print(f"Training step {str(i)} complete!")
        test(sess, main_ffn)
        store_final_nn_actions(sess, main_ffn)
        saver.save(sess,
                   MODEL_PATH + '/deep-qnn-model-' + str(NUM_EP) + '.ckpt')
    return loss_list
def generate_episode(e, total_steps, env, sess, main_ffn, state):
    if np.random.rand(1) < e or total_steps < PRE_TRAIN_STEPS:
        action = [np.random.randint(0, 4)]
    else:
        action = sess.run(main_ffn.predict,
                          feed_dict={main_ffn.flat_input: state})

    state_map1, reward, done = env.tick(action[0])
    state1 = util.reshape_state(state_map1, ENV_WIDTH, RESHAPE_TYPE)

    return np.reshape(np.array([state, action[0], reward, state1, done]),
                      [1, 5]), state1, done
Beispiel #6
0
def test(sess, ffn, reshape_type):
    step = 0
    action_list = []
    track_env = TrackEnv(11, 11, "track1")
    state_map = track_env.reset()

    while step < 22:

        state = util.reshape_state(state_map, ENV_WIDTH, reshape_type)
        act, all_q = sess.run([ffn.predict, ffn.q_out], feed_dict={ffn.in_var:state})
        print(all_q)
        action_list.append(act[0])
        state_map, _, done = track_env.tick(act[0])
        if done:
            break
        step += 1
    print(action_list)
Beispiel #7
0
def run_ckpt(model_dir, meta_graph):
    # Restoring Graph
    # This function returns a Saver
    saver = tf.train.import_meta_graph(model_dir + '/' + meta_graph)

    # We can access the default graph where all our metadata has been loaded
    graph = tf.get_default_graph()
    for op in graph.get_operations():
        print(op.name)

    # Retrieve tensors, operations, collections, etc.
    x = graph.get_tensor_by_name('input:0')
    weights = graph.get_tensor_by_name('w1:0')
    y_weight = graph.get_tensor_by_name('output_w:0')
    y = graph.get_tensor_by_name('output:0')

    with tf.Session() as sess:
        ckpt = tf.train.get_checkpoint_state(model_dir)
        saver.restore(sess, ckpt.model_checkpoint_path)  #pylint: disable=E1101

        step = 0
        action_list = []
        track_env = TrackEnv(11, 11, "track1")
        state_map = track_env.reset()

        w = sess.run(weights)
        print(w)
        while step < 22:

            state = util.reshape_state(state_map, ENV_WIDTH, RESHAPE_TYPE)
            y_out, y_arr = sess.run([y, y_weight], feed_dict={x: state})
            print(y_arr)
            y_out = sess.run(y, feed_dict={x: state})
            action_list.append(y_out[0])
            state_map, _, done = track_env.tick(y_out[0])
            if done: break
            step += 1

        print(action_list)
Beispiel #8
0
def train_model(env, reshape_type, lr):
    tf.reset_default_graph()
    ffn = FeedForwardNetwork(121, 4, lr)
    init = tf.global_variables_initializer()

    saver = tf.train.Saver()

    # Learning parameters
    y = 0.99
    e = 0.1
    num_episodes = 1000

    action_list = []
    reward_list = []
    loss_list = []
    with tf.Session() as sess:
        sess.run(init)
        for i in range(num_episodes):

            state_map = env.reset()
            state = util.reshape_state(state_map, ENV_WIDTH, reshape_type)
            reward_all = 0.
            action_all = []
            loss_total = 0.
            done = False
            step = 0

            while step < 99:
                step += 1

                action, all_q = sess.run([ffn.predict, ffn.q_out], feed_dict={ffn.in_var: state})

                if np.random.rand(1) < e:
                    action[0] = np.random.randint(0, 4)

                state_map1, reward, done = env.tick(action[0])
                state1 = util.reshape_state(state_map1, ENV_WIDTH, reshape_type)
                q1 = sess.run(ffn.q_out, feed_dict={ffn.in_var:state1})

                max_q1 = np.max(q1)
                target_q = all_q
                target_q[0, action[0]] = reward + y * max_q1

                # Train our network using target and predicted Q values
                loss, _ = sess.run([ffn.loss, ffn.update_model], feed_dict={ffn.in_var: state, ffn.q_next:target_q})
                loss_total += loss
                reward_all += reward
                action_all.append(action)
                state = state1

                if done:
                    e = 1./((i/50) + 10)
                    break

            if i % 500 == 0:
                print("Completed " + str(i) + " episodes")

            if i == 0:
                action_list.append(action_all)
            elif i == 54:
                action_list.append(action_all)
            elif i == 499:
                action_list.append(action_all)
            reward_list.append(reward_all)
            loss_list.append(loss_total)
        w = sess.run(ffn.weights)
        print(w)
        saver.save(sess, MODEL_PATH + '/qnn-model-' + str(num_episodes) + '.ckpt')
        test(sess, ffn, reshape_type)

    return action_list, reward_list, loss_list, num_episodes