def test_paddle_positions():

    ball_pos = ((67,17),(72,22))
    prev_ball_pos = ((68,18),(73,23))
    weights = file_utils.load_weights()
    mbb = feature_extractors.MockBoundingBoxExtractor(ball_pos,prev_ball_pos)
    domain = []
    range_weights = []
    actions = [3,4]
    for x in range(0, 100):
        
        state = {}
        best_score = None
        features = mbb.get_features_paddle_x(state, actions, x) 
        for feature_set in features:
            score = 0
            for f, v in feature_set:
                score += weights[f] * v
                if best_score == None or score > best_score:
                    best_score = score
        if best_score != None:
            domain.append(x)
            range_weights.append(best_score)
    plt.scatter(domain, range_weights)
    plt.show()
예제 #2
0
            print '############################\n'

        if episode != 0 and episode % RECORD_WEIGHTS_PERIOD == 0 and record_weights:
            file_utils.save_rewards(rewards, filename='episode-{}-{}-rewards'.format(episode, type(agent).__name__))
            file_utils.save_weights(agent.weights, filename='episode-{}-{}-weights'.format(episode, type(agent).__name__))

        if agent.explorationProb > MINIMUM_EXPLORATION_EPSILON:
            agent.explorationProb -= reduce_exploration_prob_amount

        print('episode: {} ended with score: {}'.format(episode, total_reward))
        ale.reset_game()
    return rewards

if __name__ == '__main__':
    game = 'asterix.bin'
    gamepath = os.path.join('roms', game)
    agent = LEARNING_ALGORITHM
    ale = ALEInterface()
    ale.loadROM(gamepath)
    actions = ale.getMinimalActionSet()
    agent.actions = actions;
    print actions
    if LOAD_WEIGHTS:
        agent.weights = file_utils.load_weights(WEIGHTS_FILENAME)
    rewards = train_agent(gamepath, agent,
                        n_episodes=NUM_EPISODES,
                        display_screen=DISPLAY_SCREEN,
                        record_weights=RECORD_WEIGHTS,
                        reduce_exploration_prob_amount=EXPLORATION_REDUCTION_AMOUNT,
                        n_frames_to_skip=NUM_FRAMES_TO_SKIP)
예제 #3
0
                                        episode,
                                        type(agent).__name__))

        if agent.explorationProb > MINIMUM_EXPLORATION_EPSILON:
            agent.explorationProb -= reduce_exploration_prob_amount

        print('episode: {} ended with score: {}'.format(episode, total_reward))
        ale.reset_game()
    return rewards


if __name__ == '__main__':
    game = 'asterix.bin'
    gamepath = os.path.join('roms', game)
    agent = LEARNING_ALGORITHM
    ale = ALEInterface()
    ale.loadROM(gamepath)
    actions = ale.getMinimalActionSet()
    agent.actions = actions
    print actions
    if LOAD_WEIGHTS:
        agent.weights = file_utils.load_weights(WEIGHTS_FILENAME)
    rewards = train_agent(
        gamepath,
        agent,
        n_episodes=NUM_EPISODES,
        display_screen=DISPLAY_SCREEN,
        record_weights=RECORD_WEIGHTS,
        reduce_exploration_prob_amount=EXPLORATION_REDUCTION_AMOUNT,
        n_frames_to_skip=NUM_FRAMES_TO_SKIP)