def test_paddle_positions(): ball_pos = ((67,17),(72,22)) prev_ball_pos = ((68,18),(73,23)) weights = file_utils.load_weights() mbb = feature_extractors.MockBoundingBoxExtractor(ball_pos,prev_ball_pos) domain = [] range_weights = [] actions = [3,4] for x in range(0, 100): state = {} best_score = None features = mbb.get_features_paddle_x(state, actions, x) for feature_set in features: score = 0 for f, v in feature_set: score += weights[f] * v if best_score == None or score > best_score: best_score = score if best_score != None: domain.append(x) range_weights.append(best_score) plt.scatter(domain, range_weights) plt.show()
print '############################\n' if episode != 0 and episode % RECORD_WEIGHTS_PERIOD == 0 and record_weights: file_utils.save_rewards(rewards, filename='episode-{}-{}-rewards'.format(episode, type(agent).__name__)) file_utils.save_weights(agent.weights, filename='episode-{}-{}-weights'.format(episode, type(agent).__name__)) if agent.explorationProb > MINIMUM_EXPLORATION_EPSILON: agent.explorationProb -= reduce_exploration_prob_amount print('episode: {} ended with score: {}'.format(episode, total_reward)) ale.reset_game() return rewards if __name__ == '__main__': game = 'asterix.bin' gamepath = os.path.join('roms', game) agent = LEARNING_ALGORITHM ale = ALEInterface() ale.loadROM(gamepath) actions = ale.getMinimalActionSet() agent.actions = actions; print actions if LOAD_WEIGHTS: agent.weights = file_utils.load_weights(WEIGHTS_FILENAME) rewards = train_agent(gamepath, agent, n_episodes=NUM_EPISODES, display_screen=DISPLAY_SCREEN, record_weights=RECORD_WEIGHTS, reduce_exploration_prob_amount=EXPLORATION_REDUCTION_AMOUNT, n_frames_to_skip=NUM_FRAMES_TO_SKIP)
episode, type(agent).__name__)) if agent.explorationProb > MINIMUM_EXPLORATION_EPSILON: agent.explorationProb -= reduce_exploration_prob_amount print('episode: {} ended with score: {}'.format(episode, total_reward)) ale.reset_game() return rewards if __name__ == '__main__': game = 'asterix.bin' gamepath = os.path.join('roms', game) agent = LEARNING_ALGORITHM ale = ALEInterface() ale.loadROM(gamepath) actions = ale.getMinimalActionSet() agent.actions = actions print actions if LOAD_WEIGHTS: agent.weights = file_utils.load_weights(WEIGHTS_FILENAME) rewards = train_agent( gamepath, agent, n_episodes=NUM_EPISODES, display_screen=DISPLAY_SCREEN, record_weights=RECORD_WEIGHTS, reduce_exploration_prob_amount=EXPLORATION_REDUCTION_AMOUNT, n_frames_to_skip=NUM_FRAMES_TO_SKIP)