Beispiel #1
0
    def test_agent(self, agent, input_height, input_width):
        max_test_iter = 12
        shortest_path = 4

        j = 0
        mp_lock.acquire()
        state = self.last_state.get_last_state()
        mp_lock.release()

        paths = np.zeros((max_test_iter + 1, 1, 1, input_height, input_width), dtype='int32')
        paths[j] = state

        rewards = []

        # Begin test phase
        while True:
            action = agent.choose_action(state, 0)

            self.world.act(action)
            sleep(0.2)

            mp_lock.acquire()
            state_prime = self.last_state.get_last_state()
            mp_lock.release()

            reward, terminal = self.calculate_reward_and_terminal(state_prime)
            state = state_prime

            j += 1
            paths[j] = state
            rewards.append(reward)

            if j == max_test_iter and reward < 10:
                print('not successful, no reward found after {} moves').format(max_test_iter)
                break
            elif terminal == 1:
                print('path found.')
                break

        reward_total = 0

        for i in range(j + 1):
            print paths[i]
        for num in rewards:
            reward_total += num
        print "Total Reward: {}".format(reward_total)
        if j <= shortest_path + 1 and reward_total >= 10:
            print('success!')
        else:
            print('fail :(')

        # visualize the weights for each of the action nodes
        weights = agent.get_weights()
        plot_weights(weights)
Beispiel #2
0
print('Testing whether optimal path is learned ... '),
shortest_path = 5
state = s1
terminal = 0
path = np.zeros((5, 5))
path += state[0, 0, :, :]
i = 0
while terminal == 0:

    action = agent.choose_action(state, 0)
    state_prime, reward, terminal = world.act(state, action)
    state = state_prime

    path += state[0, 0, :, :]

    i += 1
    if i == 20 or reward == -1:
        print('fail :(')

if np.sum(path) == shortest_path:
    print('success!')
else:
    print('fail :(')

print('Path: ')
print(path)

# visualize the weights for each of the action nodes
weights = agent.get_weights()
plot_weights(weights)
Beispiel #3
0
print('Testing whether optimal path is learned ... '),
shortest_path = 5
state = s1
terminal = 0
path = np.zeros((5, 5))
path += state[0, 0, :, :]
i = 0
while terminal == 0:

    action = agent.choose_action(state, 0)
    state_prime, reward, terminal = world.act(state, action)
    state = state_prime

    path += state[0, 0, :, :]

    i += 1
    if i == 20 or reward == -1:
        print('fail :(')

if np.sum(path) == shortest_path:
    print('success!')
else:
    print('fail :(')

print('Path: ')
print(path)

# visualize the weights for each of the action nodes
weights = agent.get_weights()
plot_weights(weights)