Example #1
0
def evaluate(gamma=0.4, iters=100, chatter=True):

    learner = TDValueLearner()
    learner.gamma = gamma

    highscore = 0
    avgscore = 0.0

    for ii in xrange(iters):

        learner.epsilon = 1 / (ii + 1)

        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=1,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        score = swing.get_state()['score']
        highscore = max([highscore, score])
        avgscore = (ii * avgscore + score) / (ii + 1)

        if chatter:
            print ii, score, highscore, avgscore

        # Reset the state of the learner.
        learner.reset()

    return -avgscore
def run_games(learner, hist, iters = 100, t_len = 100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''
    timestamp = int(time.time())
    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(sound=False,                  # Don't play sounds.
                             text="Epoch %d" % (ii),       # Display the epoch on screen.
                             tick_length = t_len,          # Make game ticks super fast.
                             action_callback=learner.action_callback,
                             reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        # Save score history.
        hist.append(swing.score)
        print('epoch: ' + str(ii) + ', score: ' + str(swing.score) + ', gravity: ' + str(swing.gravity) + ', running_avg: ' + str(np.average(hist[-10:])))

        results = {
            'gamma': learner.gamma,
            'eta': learner.eta,
            'epsilon_decay': learner.epsilon_decay,
            'hist': hist
        }

        with open ('results/results_approx_' + str(timestamp) + '.p', 'wb') as f:
            pickle.dump(results, f)

        # Reset the state of the learner.
        learner.reset()
    pg.quit()
    return
Example #3
0
def run_games(learner, hist, iters=100, t_len=1):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''

    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            # Display the epoch on screen.
            text="Epoch %d" % (ii),
            # Make game ticks super fast.
            tick_length=t_len,
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        # Save score history.
        hist.append(swing.score)

        # Reset the state of the learner.
        learner.reset()

    return
Example #4
0
def run_games(learner, hist, iters=1000, t_len=100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''
    counter = 0
    while counter < iters:
        try:
            # Make a new monkey object.
            swing = SwingyMonkey(
                sound=False,  # Don't play sounds.
                text=
                f"Epoch {counter}: {learner.best_score}",  # Display the epoch on screen.
                tick_length=t_len,  # Make game ticks super fast.
                action_callback=learner.action_callback,
                reward_callback=learner.reward_callback)

            # Loop until you hit something.
            while swing.game_loop():
                pass

            # Save score history.
            if (swing.score > learner.best_score):
                learner.best_score = swing.score
            hist.append(swing.score)

            # Reset the state of the learner.
            learner.reset()
            counter += 1
        except:
            pass

    return
Example #5
0
def run_games(learner, hist, iters = 100, t_len = 100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''
    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(sound=False,                  # Don't play sounds.
                             text="Epoch %d" % (ii),       # Display the epoch on screen.
                             tick_length = t_len,          # Make game ticks super fast.
                             action_callback=learner.action_callback,
                             reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        # Save score history.
        hist.append(swing.score)
        if len(hist) < 100:
            avgscore = np.mean(hist)
        else:
            avgscore = np.mean(hist[-100:])
        print("epoch:",ii, "highest:", np.max(hist),
            "current score:", swing.score, "average:", avgscore)
        # Reset the state of the learner.
        learner.reset()
    pg.quit()
    return
Example #6
0
def run_games(learner, iters=100, t_len=100):
    '''
	Driver function to simulate learning by having the agent play a sequence of games.
	'''
    # intialize df
    df = pd.DataFrame(columns=["gravity", "score", "death"])

    # run iters games
    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=t_len,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)
        learner.swing = swing

        # Loop until you hit something.
        while swing.game_loop():
            pass

        # Save score history.
        df.loc[len(df)] = [swing.gravity, swing.score, swing.death]

        # Reset the state of the learner.
        learner.reset()

    return df
Example #7
0
def run_games(learner, hist, iters=100, t_len=100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''

    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=t_len,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        # Save score history.
        hist.append(swing.score)
        print("[Game #%d / Score: %d / " % (ii, swing.score), end="")
        # Train learner on the last game
        toc = time.time()
        learner.train(swing.score)
        tic = time.time()
        print("training time: %3.3f]" % float(tic - toc))

        # Reset last_state, last_action, last_reward, and game memory of the learner (learned parameters are retained).
        learner.reset()

    return
Example #8
0
def run_games(learner, hist, iters=100, t_len=100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''

    for ii in range(iters):
        print "Epoch: %i |" % ii,
        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=t_len,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        # Save score history.
        hist.append(swing.score)
        if learner.algo == "qlearn":
            q_filled = float(np.count_nonzero(
                learner.Q)) * 100 / learner.Q.size
            print 'score: %d |' % swing.score, 'Q: %s' % str(round(
                q_filled, 3)) + "%"
        else:
            print 'score %d' % swing.score

        # Reset the state of the learner.
        learner.reset()
    return
Example #9
0
def run_games(learner, hist, iters=100, t_len=100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''
    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=t_len,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        #print('Epoch Gravity:', swing.gravity)
        while swing.game_loop():
            pass

        # Save score history.
        hist.append(swing.score)
        print(f'it: {ii}')
        print(f'best score: {max(hist)}')
        print(f'average score: {sum(hist)/len(hist)}')
        # Reset the state of the learner.
        learner.reset()
    # print(f'best score: {max(hist)}')
    # print(f'average score: {mean(hist)}')
    pg.quit()
    return
def run_games(learner, hist, iters=100, t_len=100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''
    print("epoch", "\t", "score", "\t", "high", "\t", "avg")
    highscore, avgscore = 0.0, 0.0
    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=t_len,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        # Save score history.
        hist.append(swing.score)

        score = swing.score
        highscore = max([highscore, score])
        avgscore = (ii * avgscore + score) / (ii + 1)

        print(ii, "\t", score, "\t", highscore, "\t", avgscore)

        # Reset the state of the learner.
        learner.reset()

    return
Example #11
0
def run_games(file, learner, hist, iters=100, t_len=10):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''

    f = open(file + '.txt', 'w')
    f.write('Training History\n')

    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=t_len,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        # Save score history.
        hist.append(swing.score)

        # save modulo
        if learner.epoch <= 200:
            mod = 50
        else:
            mod = 10

        if learner.epoch % mod == 0:
            curr_best = '\nEpoch {} - Current Best Score: {}\n'.format(
                learner.epoch, np.max(hist))
            print(curr_best)
            f.write(curr_best)

            with open(file + '.pickle', 'wb') as outputfile:
                pickle.dump(learner.Q, outputfile)

            np.save(file, np.array(hist))

        if DECREASING_EPSILON:
            learner.epsilon *= 0.99

        # Reset the state of the learner.
        learner.reset()

    best_score = 'Best Score: {}'.format(np.max(hist))
    print(best_score)
    f.write(best_score + '\n')

    avg_score = 'Average Score: {}'.format(np.mean(hist))
    print(avg_score)
    f.write(avg_score + '\n')

    f.close()

    pg.quit()
    return
Example #12
0
def run_games(learner, iters=100, t_len=100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''

    scores = []
    scores1 = []
    scores4 = []
    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=t_len,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        # Save score history.
        hist.append(swing.score)

        scores.append(swing.score)
        if swing.gravity == 1:
            scores1.append(swing.score)
        elif swing.gravity == 4:
            scores4.append(swing.score)
        # Reset the state of the learner.
        learner.reset()

    # plot the game scores over time, and save to a png
    plt.plot(range(iters), scores)
    plt.title('Scores')
    plt.get_current_fig_manager().window.showMaximized()
    plt.savefig('scores.png')
    plt.show()
    plt.close()

    window = 50
    # compute a moving average of the score
    ma = np.convolve(scores, np.ones(window) / window, mode='valid')
    plt.plot(np.arange(len(ma)) + window, ma)
    plt.title('50-Game Moving Average Score')
    plt.get_current_fig_manager().window.showMaximized()
    plt.savefig('scores_ma.png')
    plt.show()
    plt.close()

    print 'When gravity=1: %d games, with an average score of %.3f' % (
        len(scores1), np.mean(scores1))
    print 'When gravity=4: %d games, with an average score of %.3f' % (
        len(scores4), np.mean(scores4))
    print 'For all games: %d games, with an average score of %.3f' % (
        len(scores), np.mean(scores))
    # store the scores in a pickle file
    pickle.dump((scores, scores1, scores4), open('scores.p', 'w'))
    return
    def run_game():
        # Make a new monkey object.
        swing = SwingyMonkey(visual=False,      # no video
                             sound=False,       # no audio        
                             action_callback=learner_class.action_callback,
                             reward_callback=learner_class.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
          pass

        return swing
Example #14
0
def run_games(learner, hist, iters = 100, t_len = 100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''
    if iters < 20:
        print "I can't learn that fast! Try more iterations."
    
    # DATA-GATHERING PHASE
    for ii in range(30):
        # Make a new monkey object.
        swing = SwingyMonkey(sound=False,                  # Don't play sounds.
                             text="Epoch %d" % (ii),       # Display the epoch on screen.
                             tick_length = t_len,          # Make game ticks super fast.
                             action_callback=learner.explore_action_callback,
                             reward_callback=learner.reward_callback)
        # Loop until you hit something.
        while swing.game_loop():
            pass  
        # Save score history.
        hist.append(swing.score)
        # Reset the state of the learner.
        learner.reset()
    
    # EXPLOITATION PHASE
    for ii in range(iters)[30:]:
        # Make a new monkey object.
        swing = SwingyMonkey(sound=False,                  # Don't play sounds.
                             text="Epoch %d" % (ii),       # Display the epoch on screen.
                             tick_length = t_len,          # Make game ticks super fast.
                             action_callback=learner.action_callback,
                             reward_callback=learner.reward_callback)
        # Loop until you hit something.
        while swing.game_loop():
            pass      
        # Save score history.
        hist.append(swing.score)
        # Reset the state of the learner.
        learner.reset()
        
    return
Example #15
0
def session(learner, options):
    learner_class = init_learner(learner, options.learner_classes)

    # history dictionaries: epoch # -> whatever
    rewards = {}
    scores = {}

    history = History(rewards, scores)

    # save state
    pvideo = options.video

    print "Starting training phase for %s ..." % (learner)
    max_score = 0
    for t in xrange(options.train_iters + options.test_iters):
        prev_score = scores[t - 1] if t > 0 else 0
        # print information about the epoch currently being run
        if t == options.train_iters:
            print("Starting testing phase for %s ..." % (learner))
            options.video = (options.test_tick > 0)
        if t < options.train_iters:
            print("======= Training epoch %d / %d." % (t, options.train_iters))
        else:
            print("======= Test epoch %d / %d." %
                  (t - options.train_iters, options.test_iters))
        print("Max score: %d. Previous epoch score: %d" %
              (max_score, prev_score))

        # Make a new monkey object.
        swing = SwingyMonkey(visual=options.video,
                             sound=False,
                             tick_length=options.train_tick
                             if t < options.train_iters else options.test_tick,
                             action_callback=learner_class.action_callback,
                             reward_callback=learner_class.reward_callback)

        # Loop until you hit something.
        episode_rewards = []
        while swing.game_loop():
            if learner_class.last_reward is not None:
                episode_rewards.append(learner_class.last_reward)

        # collect statistics
        rewards[t] = copy.deepcopy(episode_rewards)
        scores[t] = copy.deepcopy(swing.score)

        max_score = max(max_score, scores[t])

    # reset
    options.video = pvideo

    return history, learner_class
Example #16
0
def testgame(iters=100, show=True):

    learner = QLearner2()

    highestscore = 0
    avgscore = 0
    learner.alpha = 0.2
    learner.gamma = 0.6
    alpha = learner.alpha
    gamma = learner.gamma
    with open("test_Q2.csv", "w", newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(
            ["alpha", "gamma", "epoch", "highest", "average", "score", "q"])

    for ii in range(iters):

        learner.epsilon = 1 / (ii + 1)

        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=1,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        score = swing.get_state()['score']
        highestscore = max([highestscore, score])
        avgscore = (ii * avgscore + score) / (ii + 1)
        q = round(float(np.count_nonzero(learner.Q)) * 100 / learner.Q.size, 3)

        if show == True:
            print("epoch:", ii, "highest:", highestscore, "current score:",
                  score, "average:", avgscore, "% of Q mx filled:", q)
        with open("test_Q2.csv", "a+", newline='') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerows(
                [[alpha, gamma, ii, highestscore, avgscore, score, q]])

        # Reset the state of the learner.
        learner.reset()

    pg.quit()
    return avgscore, highestscore, score
Example #17
0
def sim_games(learner, iters=None, t_len=50):
    i = 0
    # demonstrate the learner playing the game; this will end only when you close the window manually
    while (iters == None or i < iters):
        # Make a new monkey object.
        swing = SwingyMonkey(sound=False,
                             tick_length=50,
                             action_callback=learner.action_callback,
                             reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass
        learner.reset()
        i += 1
    return
Example #18
0
def testgame(iters=100, show=True):

    learner = QLearner2()

    highestscore = 0
    avgscore = 0
    record = {}
    record['epoch'] = []
    record['highest'] = []
    record['avg'] = []
    record['score'] = []
    record['q'] = []

    for ii in range(iters):

        learner.epsilon = 1 / (ii + 1)

        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=1,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        score = swing.get_state()['score']
        highestscore = max([highestscore, score])
        avgscore = (ii * avgscore + score) / (ii + 1)
        q = round(float(np.count_nonzero(learner.Q)) * 100 / learner.Q.size, 3)

        if show == True:
            print "epoch:", ii, "highest:", highestscore, "current score:", score, "average:", avgscore, "% of Q mx filled:", q

        record['epoch'].append(ii)
        record['highest'].append(highestscore)
        record['avg'].append(avgscore)
        record['score'].append(score)
        record['q'].append(q)
        pickle.dump(record, open("record12.p", "wb"))
        # Reset the state of the learner.
        learner.reset()

    return avgscore, highestscore, score
def run_games(learner, hist, iters=100, t_len=100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''

    for ii in range(iters):

        # make a new monkey object
        swing = SwingyMonkey(
            sound=False,  # don't play sounds
            text="Epoch %d" % (ii),  # display the epoch on screen
            tick_length=t_len,  # make game ticks super fast
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # pass the screen dimensions to the agent
        learner.update_specs(swing.screen_height, swing.screen_width)

        # loop until you hit something
        while swing.game_loop():
            pass

        # update transition to terminal state
        learner.update_terminal_transition()

        # save score history
        hist.append(swing.score)
        print 'Epoch %i: current score %i; best score %i' % (ii, swing.score,
                                                             np.max(hist))

        # reset the state of the learner
        learner.reset()

    # display score history and stats
    print '----------'
    print 'Parameters: %0.2f alpha; %0.2f gamma; %0.2f epsilon' % (
        learner.alpha, learner.gamma, learner.epsilon)
    print 'Score history:', hist
    print 'Best score:', np.max(hist)
    print 'Average score:', np.mean(hist)
    print '----------'

    return np.max(hist)
Example #20
0
    for k in xrange(len(gammas)):
        for j in xrange(trial):
            learner = Learner(10, 10, 3)
            learner.gamma = gammas[i]
            learner.epsilon = 0.0
            score = 0
            cur_iter = 0
            scores = []

            for ii in xrange(iters):

                # Make a new monkey object.
                swing = SwingyMonkey(
                    sound=False,  # Don't play sounds.
                    text="Epoch %d" % (ii),  # Display the epoch on screen.
                    tick_length=1,  # Make game ticks super fast.
                    render=False,
                    action_callback=learner.action_callback,
                    reward_callback=learner.reward_callback)

                learner.iter = cur_iter + 1

                # Loop until you hit something.
                while swing.game_loop():
                    pass

                cur_iter = learner.iter
                record[i][k][j][ii] = learner.cur_score

                score = max(score, learner.cur_score)
                scores.append(learner.cur_score)
Example #21
0
        '''This gets called so you can see what reward you get.'''

        self.last_reward = reward


iters = 10000
learner = Learner()

for ii in xrange(iters):

    # Make a new monkey object.
    swing = SwingyMonkey(
        sound=False,  # Don't play sounds.
        tick_length=1,  # Make game ticks super fast.
        # Display the epoch on screen and % of Q matrix filled
        text="Epoch %d " % (ii) +
        str(round(
            float(np.count_nonzero(learner.Q)) * 100 / learner.Q.size, 3)) +
        "%",
        action_callback=learner.action_callback,
        reward_callback=learner.reward_callback)

    # Loop until you hit something.
    while swing.game_loop():
        pass

    # Keep track of the score for that epoch.
    learner.scores.append(learner.last_state['score'])
    if learner.last_state['score'] > learner.best_score:
        print 'New best Q'
        learner.best_score = learner.last_state['score']
        learner.bestQ = learner.Q.copy()
Example #22
0
def run_games(learner, hist, iters=100, t_len=100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''
    net_states = []
    net_rewards = []
    net_actions = []
    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=t_len,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        iter_states = []
        iter_rewards = []
        iter_actions = []
        iter_count = 0
        while swing.game_loop():
            state = swing.get_state()
            iter_states.append(np.array(state['tree'].values()+\
                    state['monkey'].values()+[learner.gravity]))
            iter_rewards.append(learner.last_reward)
            iter_actions.append(int(learner.last_action))
            iter_count += 1
            if iter_count > 1 and learner.know_gravity == False:
                learner.learn_gravity(iter_states, iter_actions)
                if learner.know_gravity == True:
                    for num in range(len(iter_states)):
                        iter_states[num][-1] = learner.gravity
        #To get the state after the
        state = swing.get_state()
        iter_states.append(state['tree'].values()+\
                    state['monkey'].values()+[learner.gravity])
        iter_rewards.append(learner.last_reward)
        iter_actions.append(int(learner.last_action))

        #Adding to the net training set
        net_states += iter_states
        net_rewards += iter_rewards
        net_actions += iter_actions

        if ii == 0:
            xtrain = build_training_set(net_states, net_actions)
            ytrain = np.array(net_rewards)
            RF = ExtraTreesRegressor(n_estimators=50)
            RF.fit(xtrain, ytrain)

        else:
            xtrain = build_training_set(net_states[:-1], net_actions[:-1])
            #Building the q_state update.
            ytrain = np.array([learner.model.predict(np.append(net_states[k], net_actions[k])) + \
                    learner.alpha*(net_rewards[k] + learner.gamma* np.max([learner.model.predict(np.append(net_states[k+1], int(action)))\
                            for action in learner.actions]) - \
                    learner.model.predict(np.append(net_states[k], net_actions[k]))) for k in range(len(net_states)-1)])
            RF = ExtraTreesRegressor(n_estimators=50)
            RF.fit(xtrain, ytrain)

        learner.model = RF
        learner.model_trained = True

        if ii % 10 == 0:
            learner.epsilon -= 0.05

        # Save score history.
        hist.append(swing.score)

        # Reset the state of the learner.
        learner.reset()

    return