Esempio n. 1
0
def test(strategy=dqn, log_file='train_params.json'):
    with open('test_params.json', 'r') as file:
        read_params = json.load(file)

    game_params = read_params['params']
    test_start_states = read_params['states']
    total_history = []
    total_scores = []

    env = PacmanGame(**game_params)
    for start_state in test_start_states:
        preprocess(start_state)
        episode_history = []
        env.reset()
        env.player = start_state['player']
        env.monsters = start_state['monsters']
        env.diamonds = start_state['diamonds']
        env.walls = start_state['walls']
        assert len(env.monsters) == env.nmonsters and len(
            env.diamonds) == env.ndiamonds and len(env.walls) == env.nwalls

        obs = env.get_obs()
        episode_history.append(obs)
        while not obs['end_game']:
            action = strategy(obs)
            obs = env.make_action(action)
            episode_history.append(obs)
        total_history.append(episode_history)
        total_scores.append(obs['total_score'])
    mean_score = np.mean(total_scores)
    with open(log_file, 'w') as file:
        json.dump(total_history, file)
    print(
        "Your average score is {}, saved log to '{}'. Do not forget to upload it for submission!"
        .format(mean_score, log_file))
    return mean_score
Esempio n. 2
0
                          write_graph=False,
                          write_images=False)

if __name__ == '__main__':
    # counters:
    step = 0  # training step counter (= epoch counter)
    iteration = 0  # frames counter
    episodes = 0  # game episodes counter
    done = True  # indicator that env needs to be reset
    nb_actions = 10

    episode_scores = []  # collect total scores in this list and log it later

    env = PacmanGame(**game_params)

    obs = env.reset()
    while step < n_steps:
        if obs['end_game']:  # game over, restart it
            obs = env.reset()
            score = 0  # reset score for current episode

        state = get_observation(obs)

        # Online network evaluates what to do
        iteration += 1

        q_values = online_network.predict(state)[
            0]  # calculate q-values using online network
        # select epsilon (which linearly decreases over training steps):
        epsilon = max(eps_min,
                      eps_max - (eps_max - eps_min) * step / eps_decay_steps)
Esempio n. 3
0
        max_q = max([
            self.qmap[tuple(new_state) + (a, )] for a in new_possible_actions
        ])
        self.qmap[old_stateaction] = (1 - self.alpha) * self.qmap[
            old_stateaction] + self.alpha * (reward + self.gamma * max_q)
        return

    def best_action(self, state, possible_actions):
        # Get the action with highest Q-Value estimate for specific state
        a, q = max([(a, self.qmap[tuple(state) + (a, )])
                    for a in possible_actions],
                   key=lambda x: x[1])
        return a


input_shape = (len(get_state(env.reset())), )
nb_actions = len(action_to_dxdy)
online_network = create_dqn_model(input_shape, nb_actions)
online_network.compile(optimizer=Adam(), loss='mse')
target_network = clone_model(online_network)
target_network.set_weights(online_network.get_weights())

from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
SVG(model_to_dot(online_network).create(prog='dot', format='svg'))
from keras.utils import plot_model
plot_model(online_network,
           to_file='online_network.png',
           show_shapes=True,
           show_layer_names=True)