Ejemplo n.º 1
0
def test(strategy=dqn, log_file='train_params.json'):
    with open('test_params.json', 'r') as file:
        read_params = json.load(file)

    game_params = read_params['params']
    test_start_states = read_params['states']
    total_history = []
    total_scores = []

    env = PacmanGame(**game_params)
    for start_state in test_start_states:
        preprocess(start_state)
        episode_history = []
        env.reset()
        env.player = start_state['player']
        env.monsters = start_state['monsters']
        env.diamonds = start_state['diamonds']
        env.walls = start_state['walls']
        assert len(env.monsters) == env.nmonsters and len(
            env.diamonds) == env.ndiamonds and len(env.walls) == env.nwalls

        obs = env.get_obs()
        episode_history.append(obs)
        while not obs['end_game']:
            action = strategy(obs)
            obs = env.make_action(action)
            episode_history.append(obs)
        total_history.append(episode_history)
        total_scores.append(obs['total_score'])
    mean_score = np.mean(total_scores)
    with open(log_file, 'w') as file:
        json.dump(total_history, file)
    print(
        "Your average score is {}, saved log to '{}'. Do not forget to upload it for submission!"
        .format(mean_score, log_file))
    return mean_score
Ejemplo n.º 2
0
            score = 0  # reset score for current episode

        state = get_observation(obs)

        # Online network evaluates what to do
        iteration += 1

        q_values = online_network.predict(state)[
            0]  # calculate q-values using online network
        # select epsilon (which linearly decreases over training steps):
        epsilon = max(eps_min,
                      eps_max - (eps_max - eps_min) * step / eps_decay_steps)
        #nb_actions = obs['possible_actions']
        action = epsilon_greedy(q_values, epsilon, nb_actions)
        # Play:
        obs = env.make_action(action)  # make action and get results
        reward = obs['reward']

        if obs['end_game']:
            episode_scores.append(obs['total_score'])

        done = obs['end_game']
        next_state = get_observation(obs)
        # Let's memorize what just happened
        replay_memory.append((state, action, reward, next_state, done))

        if iteration >= warmup and iteration % training_interval == 0:
            # learning branch
            step += 1
            minibatch = random.sample(replay_memory, batch_size)
            replay_state = np.array([x[0][0] for x in minibatch])
Ejemplo n.º 3
0
step = 0  # training step counter (= epoch counter)
iteration = 0  # frames counter
episodes = 0  # game episodes counter
end_game = True  # indicator that env needs to be reset

episode_scores = []  # collect total scores in this list and log it later

while step < n_steps:
    if end_game:  # game over, restart it
        obs = env.reset()
        score = 0  # reset score for current episode
        for skip in range(
                skip_start
        ):  # skip the start of each game (it's just freezing time before game starts)
            try:
                obs = env.make_action(1)  #env.step(0)
                reward = obs['reward']
                end_game = obs['end_game']
                score += reward
            except AssertionError:
                continue

        state = get_state(obs)
        episodes += 1

    # Online network evaluates what to do
    iteration += 1
    q_values = online_network.predict(np.array(
        [state]))[0]  # calculate q-values using online network

    # select epsilon (which linearly decreases over training steps):
Ejemplo n.º 4
0
# from keras.utils import plot_model
# plot_model(online_network, to_file='online_network.png',show_shapes=True,show_layer_names=True)

# training the model
step = 0  # our start
iteration = 0
done = True  # we are still alive
while step < n_steps:
    if done:
        obs = env.reset()
    iteration += 1
    q_values = online_network.predict(np.array([get_state(obs)]))[0]
    epsilon = max(eps_min,
                  eps_max - (eps_max - eps_min) * step / eps_decay_steps)
    action = epsilon_greedy(q_values, epsilon, nb_actions)
    next_obs = env.make_action(action + 1)
    reward = next_obs["reward"]
    done = next_obs["end_game"]
    replay_memory.append((obs, action, reward, next_obs, done))
    obs = next_obs

    if iteration >= warmup and iteration % training_interval == 0:
        step += 1
        minibatch = random.sample(replay_memory, batch_size)
        replay_state = np.array([get_state(x[0]) for x in minibatch])
        replay_action = np.array([x[1] for x in minibatch])
        replay_rewards = np.array([x[2] for x in minibatch])
        replay_next_state = np.array([get_state(x[3]) for x in minibatch])
        replay_done = np.array([x[4] for x in minibatch], dtype=int)
        target_for_action = replay_rewards + (1-replay_done) * gamma * \
                                    np.amax(target_network.predict(replay_next_state), axis=1)
Ejemplo n.º 5
0
# from mini_pacman import test, random_strategy, naive_strategy
# random_med = test(strategy=random_strategy, log_file='test_pacman_log_random.json')
# naive_med = test(strategy=naive_strategy, log_file='test_pacman_log_naive.json')
# custom_med = test(strategy=custom_strategy, log_file='test_pacman_log_custom.json')

# print(f'Random Median = {random_med} Naive Median = {naive_med} Custom Median = {custom_med}')

####### RENDERING TO SEE PERFORMANCE####
episode_history = []
total_history = []
total_scores = []

for game in range(10):
    print(f"Game {game}, let's go!")
    obs = env.reset()
    episode_history.append(obs)
    while not obs['end_game']:
        action = custom_strategy(obs)
        obs = env.make_action(action)
        episode_history.append(obs)
        env.render()
        time.sleep(0.1)

    total_history.append(episode_history)
    total_scores.append(obs['total_score'])
mean_score = np.mean(total_scores)
median_score = np.median(total_scores)

print("Your average score is {}, median is {}. "
      "Do not forget to upload it for submission!".format(
          mean_score, median_score))