def run_games(learner, hist, iters=100, t_len=100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''
    print("epoch", "\t", "score", "\t", "high", "\t", "avg")
    highscore, avgscore = 0.0, 0.0
    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=t_len,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        # Save score history.
        hist.append(swing.score)

        score = swing.score
        highscore = max([highscore, score])
        avgscore = (ii * avgscore + score) / (ii + 1)

        print(ii, "\t", score, "\t", highscore, "\t", avgscore)

        # Reset the state of the learner.
        learner.reset()

    return
Esempio n. 2
0
def run_games(learner, iters=100, t_len=100):
    '''
	Driver function to simulate learning by having the agent play a sequence of games.
	'''
    # intialize df
    df = pd.DataFrame(columns=["gravity", "score", "death"])

    # run iters games
    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=t_len,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)
        learner.swing = swing

        # Loop until you hit something.
        while swing.game_loop():
            pass

        # Save score history.
        df.loc[len(df)] = [swing.gravity, swing.score, swing.death]

        # Reset the state of the learner.
        learner.reset()

    return df
Esempio n. 3
0
def run_games(learner, hist, iters = 100, t_len = 100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''
    
    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(sound=False,                  # Don't play sounds.
                             text="Epoch %d" % (ii),       # Display the epoch on screen.
                             tick_length = t_len,          # Make game ticks super fast.
                             action_callback=learner.action_callback,
                             reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():

            # This is where we build sarsa arrays utilizing learner.method()
            # You can get the action via learner.last_action (False=0/glide, True=1/jump)
            # You can get the state via learner.last_state
            # You can get the reward via learner.last_reward (0,+1 if pass, -5 if hit, -10 if fall off screen)
            # Can infer gravity by checking monkey velocity from time step to time step if action is false
                # Gravity is an integer 1, 2, 3, or 4

            pass
        
        # Save score history.
        hist.append(swing.score)

        # Reset the state of the learner.
        learner.reset()
        
    return
Esempio n. 4
0
def run_games(learner, hist, iters=100, t_len=100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''

    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=t_len,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        # Save score history.
        hist.append(swing.score)
        print("[Game #%d / Score: %d / " % (ii, swing.score), end="")
        # Train learner on the last game
        toc = time.time()
        learner.train(swing.score)
        tic = time.time()
        print("training time: %3.3f]" % float(tic - toc))

        # Reset last_state, last_action, last_reward, and game memory of the learner (learned parameters are retained).
        learner.reset()

    return
Esempio n. 5
0
def run_games(learner, hist, iters=100, t_len=1):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''

    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            # Display the epoch on screen.
            text="Epoch %d" % (ii),
            # Make game ticks super fast.
            tick_length=t_len,
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        # Save score history.
        hist.append(swing.score)

        # Reset the state of the learner.
        learner.reset()

    return
Esempio n. 6
0
def evaluate(gamma=0.4, iters=100, chatter=True):

    learner = TDValueLearner()
    learner.gamma = gamma

    highscore = 0
    avgscore = 0.0

    for ii in xrange(iters):

        learner.epsilon = 1 / (ii + 1)

        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=1,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        score = swing.get_state()['score']
        highscore = max([highscore, score])
        avgscore = (ii * avgscore + score) / (ii + 1)

        if chatter:
            print ii, score, highscore, avgscore

        # Reset the state of the learner.
        learner.reset()

    return -avgscore
Esempio n. 7
0
def run_games(learner, hist, iters = 100, t_len = 100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''

    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(sound=False,                  # Don't play sounds.
                             text="Epoch %d" % (ii),       # Display the epoch on screen.
                             tick_length = t_len,          # Make game ticks super fast.
                             action_callback=learner.action_callback,
                             reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            learner.last_state = swing.get_state()
            pass

        # Save score history.
        hist.append(swing.score)

        # Reset the state of the learner.
        learner.reset()

    return
Esempio n. 8
0
def run_games(learner, hist, iters = 100, t_len = 100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''
    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(sound=False,                  # Don't play sounds.
                             text="Epoch %d" % (ii),       # Display the epoch on screen.
                             tick_length = t_len,          # Make game ticks super fast.
                             action_callback=learner.action_callback,
                             reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        # Save score history.
        hist.append(swing.score)
        if len(hist) < 100:
            avgscore = np.mean(hist)
        else:
            avgscore = np.mean(hist[-100:])
        print("epoch:",ii, "highest:", np.max(hist),
            "current score:", swing.score, "average:", avgscore)
        # Reset the state of the learner.
        learner.reset()
    pg.quit()
    return
Esempio n. 9
0
def run_games(learner, hist, iters=100, t_len=100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''

    for ii in range(iters):
        print "Epoch: %i |" % ii,
        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=t_len,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        # Save score history.
        hist.append(swing.score)
        if learner.algo == "qlearn":
            q_filled = float(np.count_nonzero(
                learner.Q)) * 100 / learner.Q.size
            print 'score: %d |' % swing.score, 'Q: %s' % str(round(
                q_filled, 3)) + "%"
        else:
            print 'score %d' % swing.score

        # Reset the state of the learner.
        learner.reset()
    return
Esempio n. 10
0
def run_games(learner, hist, iters=100, t_len=100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''
    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=t_len,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        #print('Epoch Gravity:', swing.gravity)
        while swing.game_loop():
            pass

        # Save score history.
        hist.append(swing.score)
        print(f'it: {ii}')
        print(f'best score: {max(hist)}')
        print(f'average score: {sum(hist)/len(hist)}')
        # Reset the state of the learner.
        learner.reset()
    # print(f'best score: {max(hist)}')
    # print(f'average score: {mean(hist)}')
    pg.quit()
    return
Esempio n. 11
0
def run_games(learner, hist, iters=1000, t_len=100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''
    counter = 0
    while counter < iters:
        try:
            # Make a new monkey object.
            swing = SwingyMonkey(
                sound=False,  # Don't play sounds.
                text=
                f"Epoch {counter}: {learner.best_score}",  # Display the epoch on screen.
                tick_length=t_len,  # Make game ticks super fast.
                action_callback=learner.action_callback,
                reward_callback=learner.reward_callback)

            # Loop until you hit something.
            while swing.game_loop():
                pass

            # Save score history.
            if (swing.score > learner.best_score):
                learner.best_score = swing.score
            hist.append(swing.score)

            # Reset the state of the learner.
            learner.reset()
            counter += 1
        except:
            pass

    return
Esempio n. 12
0
def run_games(learner, hist, iters = 100, t_len = 100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''
    timestamp = int(time.time())
    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(sound=False,                  # Don't play sounds.
                             text="Epoch %d" % (ii),       # Display the epoch on screen.
                             tick_length = t_len,          # Make game ticks super fast.
                             action_callback=learner.action_callback,
                             reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        # Save score history.
        hist.append(swing.score)
        print('epoch: ' + str(ii) + ', score: ' + str(swing.score) + ', gravity: ' + str(swing.gravity) + ', running_avg: ' + str(np.average(hist[-10:])))

        results = {
            'gamma': learner.gamma,
            'eta': learner.eta,
            'epsilon_decay': learner.epsilon_decay,
            'hist': hist
        }

        with open ('results/results_approx_' + str(timestamp) + '.p', 'wb') as f:
            pickle.dump(results, f)

        # Reset the state of the learner.
        learner.reset()
    pg.quit()
    return
Esempio n. 13
0
def evaluate(gamma=0.4, iters=100, chatter=True):

    learner = TDValueLearner()
    learner.gamma = gamma

    highscore = 0
    avgscore = 0.0

    for ii in xrange(iters):

        learner.epsilon = 1/(ii+1)

        # Make a new monkey object.
        swing = SwingyMonkey(sound=False,            # Don't play sounds.
                             text="Epoch %d" % (ii), # Display the epoch on screen.
                             tick_length=1,          # Make game ticks super fast.
                             action_callback=learner.action_callback,
                             reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        score = swing.get_state()['score']
        highscore = max([highscore, score])
        avgscore = (ii*avgscore+score)/(ii+1)

        if chatter:
            print ii, score, highscore, avgscore

        # Reset the state of the learner.
        learner.reset()

    return -avgscore
Esempio n. 14
0
def run_games(learner, iters=100, t_len=100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''

    scores = []
    scores1 = []
    scores4 = []
    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=t_len,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        # Save score history.
        hist.append(swing.score)

        scores.append(swing.score)
        if swing.gravity == 1:
            scores1.append(swing.score)
        elif swing.gravity == 4:
            scores4.append(swing.score)
        # Reset the state of the learner.
        learner.reset()

    # plot the game scores over time, and save to a png
    plt.plot(range(iters), scores)
    plt.title('Scores')
    plt.get_current_fig_manager().window.showMaximized()
    plt.savefig('scores.png')
    plt.show()
    plt.close()

    window = 50
    # compute a moving average of the score
    ma = np.convolve(scores, np.ones(window) / window, mode='valid')
    plt.plot(np.arange(len(ma)) + window, ma)
    plt.title('50-Game Moving Average Score')
    plt.get_current_fig_manager().window.showMaximized()
    plt.savefig('scores_ma.png')
    plt.show()
    plt.close()

    print 'When gravity=1: %d games, with an average score of %.3f' % (
        len(scores1), np.mean(scores1))
    print 'When gravity=4: %d games, with an average score of %.3f' % (
        len(scores4), np.mean(scores4))
    print 'For all games: %d games, with an average score of %.3f' % (
        len(scores), np.mean(scores))
    # store the scores in a pickle file
    pickle.dump((scores, scores1, scores4), open('scores.p', 'w'))
    return
Esempio n. 15
0
def run_games(file, learner, hist, iters=100, t_len=10):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''

    f = open(file + '.txt', 'w')
    f.write('Training History\n')

    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=t_len,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        # Save score history.
        hist.append(swing.score)

        # save modulo
        if learner.epoch <= 200:
            mod = 50
        else:
            mod = 10

        if learner.epoch % mod == 0:
            curr_best = '\nEpoch {} - Current Best Score: {}\n'.format(
                learner.epoch, np.max(hist))
            print(curr_best)
            f.write(curr_best)

            with open(file + '.pickle', 'wb') as outputfile:
                pickle.dump(learner.Q, outputfile)

            np.save(file, np.array(hist))

        if DECREASING_EPSILON:
            learner.epsilon *= 0.99

        # Reset the state of the learner.
        learner.reset()

    best_score = 'Best Score: {}'.format(np.max(hist))
    print(best_score)
    f.write(best_score + '\n')

    avg_score = 'Average Score: {}'.format(np.mean(hist))
    print(avg_score)
    f.write(avg_score + '\n')

    f.close()

    pg.quit()
    return
Esempio n. 16
0
    def run_game():
        # Make a new monkey object.
        swing = SwingyMonkey(visual=False,      # no video
                             sound=False,       # no audio        
                             action_callback=learner_class.action_callback,
                             reward_callback=learner_class.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
          pass

        return swing
Esempio n. 17
0
def run_games(learner, hist, iters = 100, t_len = 100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''
    if iters < 20:
        print "I can't learn that fast! Try more iterations."
    
    # DATA-GATHERING PHASE
    for ii in range(30):
        # Make a new monkey object.
        swing = SwingyMonkey(sound=False,                  # Don't play sounds.
                             text="Epoch %d" % (ii),       # Display the epoch on screen.
                             tick_length = t_len,          # Make game ticks super fast.
                             action_callback=learner.explore_action_callback,
                             reward_callback=learner.reward_callback)
        # Loop until you hit something.
        while swing.game_loop():
            pass  
        # Save score history.
        hist.append(swing.score)
        # Reset the state of the learner.
        learner.reset()
    
    # EXPLOITATION PHASE
    for ii in range(iters)[30:]:
        # Make a new monkey object.
        swing = SwingyMonkey(sound=False,                  # Don't play sounds.
                             text="Epoch %d" % (ii),       # Display the epoch on screen.
                             tick_length = t_len,          # Make game ticks super fast.
                             action_callback=learner.action_callback,
                             reward_callback=learner.reward_callback)
        # Loop until you hit something.
        while swing.game_loop():
            pass      
        # Save score history.
        hist.append(swing.score)
        # Reset the state of the learner.
        learner.reset()
        
    return
Esempio n. 18
0
def session(learner, options):
    learner_class = init_learner(learner, options.learner_classes)

    # history dictionaries: epoch # -> whatever
    rewards = {}
    scores = {}

    history = History(rewards, scores)

    # save state
    pvideo = options.video

    print "Starting training phase for %s ..." % (learner)
    max_score = 0
    for t in xrange(options.train_iters + options.test_iters):
        prev_score = scores[t - 1] if t > 0 else 0
        # print information about the epoch currently being run
        if t == options.train_iters:
            print("Starting testing phase for %s ..." % (learner))
            options.video = (options.test_tick > 0)
        if t < options.train_iters:
            print("======= Training epoch %d / %d." % (t, options.train_iters))
        else:
            print("======= Test epoch %d / %d." %
                  (t - options.train_iters, options.test_iters))
        print("Max score: %d. Previous epoch score: %d" %
              (max_score, prev_score))

        # Make a new monkey object.
        swing = SwingyMonkey(visual=options.video,
                             sound=False,
                             tick_length=options.train_tick
                             if t < options.train_iters else options.test_tick,
                             action_callback=learner_class.action_callback,
                             reward_callback=learner_class.reward_callback)

        # Loop until you hit something.
        episode_rewards = []
        while swing.game_loop():
            if learner_class.last_reward is not None:
                episode_rewards.append(learner_class.last_reward)

        # collect statistics
        rewards[t] = copy.deepcopy(episode_rewards)
        scores[t] = copy.deepcopy(swing.score)

        max_score = max(max_score, scores[t])

    # reset
    options.video = pvideo

    return history, learner_class
Esempio n. 19
0
def testgame(iters=100, show=True):

    learner = QLearner2()

    highestscore = 0
    avgscore = 0
    learner.alpha = 0.2
    learner.gamma = 0.6
    alpha = learner.alpha
    gamma = learner.gamma
    with open("test_Q2.csv", "w", newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(
            ["alpha", "gamma", "epoch", "highest", "average", "score", "q"])

    for ii in range(iters):

        learner.epsilon = 1 / (ii + 1)

        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=1,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        score = swing.get_state()['score']
        highestscore = max([highestscore, score])
        avgscore = (ii * avgscore + score) / (ii + 1)
        q = round(float(np.count_nonzero(learner.Q)) * 100 / learner.Q.size, 3)

        if show == True:
            print("epoch:", ii, "highest:", highestscore, "current score:",
                  score, "average:", avgscore, "% of Q mx filled:", q)
        with open("test_Q2.csv", "a+", newline='') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerows(
                [[alpha, gamma, ii, highestscore, avgscore, score, q]])

        # Reset the state of the learner.
        learner.reset()

    pg.quit()
    return avgscore, highestscore, score
Esempio n. 20
0
def sim_games(learner, iters=None, t_len=50):
    i = 0
    # demonstrate the learner playing the game; this will end only when you close the window manually
    while (iters == None or i < iters):
        # Make a new monkey object.
        swing = SwingyMonkey(sound=False,
                             tick_length=50,
                             action_callback=learner.action_callback,
                             reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass
        learner.reset()
        i += 1
    return
Esempio n. 21
0
def testgame(iters=100, show=True):

    learner = QLearner2()

    highestscore = 0
    avgscore = 0
    record = {}
    record['epoch'] = []
    record['highest'] = []
    record['avg'] = []
    record['score'] = []
    record['q'] = []

    for ii in range(iters):

        learner.epsilon = 1 / (ii + 1)

        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=1,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        score = swing.get_state()['score']
        highestscore = max([highestscore, score])
        avgscore = (ii * avgscore + score) / (ii + 1)
        q = round(float(np.count_nonzero(learner.Q)) * 100 / learner.Q.size, 3)

        if show == True:
            print "epoch:", ii, "highest:", highestscore, "current score:", score, "average:", avgscore, "% of Q mx filled:", q

        record['epoch'].append(ii)
        record['highest'].append(highestscore)
        record['avg'].append(avgscore)
        record['score'].append(score)
        record['q'].append(q)
        pickle.dump(record, open("record12.p", "wb"))
        # Reset the state of the learner.
        learner.reset()

    return avgscore, highestscore, score
Esempio n. 22
0
def testgame(iters=100,show=True):

    learner = QLearner2()

    highestscore = 0
    avgscore = 0
    record={}
    record['epoch']=[]
    record['highest']=[]
    record['avg']=[]
    record['score']=[]
    record['q']=[]

    for ii in range(iters):

        learner.epsilon = 1/(ii+1)

        # Make a new monkey object.
        swing = SwingyMonkey(sound=False,            # Don't play sounds.
                             text="Epoch %d" % (ii), # Display the epoch on screen.
                             tick_length=1,          # Make game ticks super fast.
                             action_callback=learner.action_callback,
                             reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        score = swing.get_state()['score']
        highestscore = max([highestscore, score])
        avgscore = (ii*avgscore+score)/(ii+1)
        q=round(float(np.count_nonzero(learner.Q))*100/learner.Q.size,3)
        
        if show==True:
            print "epoch:",ii, "highest:", highestscore, "current score:", score, "average:", avgscore, "% of Q mx filled:", q 
            
        record['epoch'].append(ii)
        record['highest'].append(highestscore)
        record['avg'].append(avgscore)
        record['score'].append(score)
        record['q'].append(q)
        pickle.dump( record, open( "record12.p", "wb" ) )  
        # Reset the state of the learner.
        learner.reset()
    

    return avgscore,highestscore,score
Esempio n. 23
0
def run_games(learner, hist, iters=100, t_len=100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''

    for ii in range(iters):

        # make a new monkey object
        swing = SwingyMonkey(
            sound=False,  # don't play sounds
            text="Epoch %d" % (ii),  # display the epoch on screen
            tick_length=t_len,  # make game ticks super fast
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # pass the screen dimensions to the agent
        learner.update_specs(swing.screen_height, swing.screen_width)

        # loop until you hit something
        while swing.game_loop():
            pass

        # update transition to terminal state
        learner.update_terminal_transition()

        # save score history
        hist.append(swing.score)
        print 'Epoch %i: current score %i; best score %i' % (ii, swing.score,
                                                             np.max(hist))

        # reset the state of the learner
        learner.reset()

    # display score history and stats
    print '----------'
    print 'Parameters: %0.2f alpha; %0.2f gamma; %0.2f epsilon' % (
        learner.alpha, learner.gamma, learner.epsilon)
    print 'Score history:', hist
    print 'Best score:', np.max(hist)
    print 'Average score:', np.mean(hist)
    print '----------'

    return np.max(hist)
Esempio n. 24
0
def run_games(learner, hist, iters = 100, t_len = 100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''
    
    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey()

        # Initialize history dictionaries for iteration ii
        hist['state'][ii] = []
        hist['action'][ii] = []
        hist['reward'][ii] = []

        # Loop until you hit something.
        while swing.game_loop():

            # This is where we build sarsa arrays utilizing learner.method()
            # You can get the action via learner.last_action (False=0/glide, True=1/jump)
            # You can get the state via learner.last_state
            # You can get the reward via learner.last_reward (0,+1 if pass, -5 if hit, -10 if fall off screen)
            # Can infer gravity by checking monkey velocity from time step to time step if action is false
                # Gravity is an integer 1, 2, 3, or 4

            # import pdb
            # pdb.set_trace()

            hist['state'][ii].append(learner.last_state)
            hist['action'][ii].append(learner.last_action)
            hist['reward'][ii].append(learner.last_reward)

        else: # Get final action,reward and state just to see how the monkey failed.
            hist['state'][ii].append(learner.last_state)
            hist['action'][ii].append(learner.last_action)
            hist['reward'][ii].append(learner.last_reward)
        
        # Save score history.
        hist['score'].append(swing.score)

        # Reset the state of the learner.
        learner.reset()
        
    return
Esempio n. 25
0
def run_games(learner, hist, iters=100, t_len=100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''

    for ii in range(iters):

        # make a new monkey object
        swing = SwingyMonkey(sound=False,                  # don't play sounds
                             text="Epoch %d" % (ii),       # display the epoch on screen
                             tick_length = t_len,          # make game ticks super fast
                             action_callback=learner.action_callback,
                             reward_callback=learner.reward_callback)

        # pass the screen dimensions to the agent
        learner.update_specs(swing.screen_height, swing.screen_width)

        # loop until you hit something
        while swing.game_loop():
            pass

        # update transition to terminal state
        learner.update_terminal_transition()

        # save score history
        hist.append(swing.score)
        print 'Epoch %i: current score %i; best score %i' % (ii, swing.score, np.max(hist))

        # reset the state of the learner
        learner.reset()

    # display score history and stats
    print '----------'
    print 'Parameters: %0.2f alpha; %0.2f gamma; %0.2f epsilon' % (learner.alpha, learner.gamma, learner.epsilon)
    print 'Score history:', hist
    print 'Best score:', np.max(hist)
    print 'Average score:', np.mean(hist)
    print '----------'

    return np.max(hist)
def run_games(learner, hist, iters = 100, t_len = 100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''
        
    
    high = 0
    avg = 0
    for ii in range(iters):



        # Make a new monkey object.
        swing = SwingyMonkey(sound=False,                  # Don't play sounds.
                             text="Epoch %d" % (ii),       # Display the epoch on screen.
                             tick_length = t_len,          # Make game ticks super fast.
                             action_callback=learner.action_callback,
                             reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        new_score = swing.score

        # Save score history.
        if new_score > high:
            high = new_score

        avg = (new_score + ii*avg)/(ii+1.0)
        print "%i\t%i\t%i\t%s:\t%s"%(ii,new_score,high,avg,np.mean(learner.Q))
        hist.append(swing.score)
        # Reset the state of the learner.
        learner.reset()
        
    print learner.Q
    print learner.state_counts
    return
Esempio n. 27
0
for ii in xrange(iters):

    # Make a new monkey object.
    swing = SwingyMonkey(
        sound=False,  # Don't play sounds.
        tick_length=1,  # Make game ticks super fast.
        # Display the epoch on screen and % of Q matrix filled
        text="Epoch %d " % (ii) +
        str(round(
            float(np.count_nonzero(learner.Q)) * 100 / learner.Q.size, 3)) +
        "%",
        action_callback=learner.action_callback,
        reward_callback=learner.reward_callback)

    # Loop until you hit something.
    while swing.game_loop():
        pass

    # Keep track of the score for that epoch.
    learner.scores.append(learner.last_state['score'])
    if learner.last_state['score'] > learner.best_score:
        print 'New best Q'
        learner.best_score = learner.last_state['score']
        learner.bestQ = learner.Q.copy()

    print 'score %d' % learner.last_state['score'], str(
        round(float(np.count_nonzero(learner.Q)) * 100 / learner.Q.size,
              3)) + "%"

    # Reset the state of the learner.
    learner.reset()
Esempio n. 28
0
def run_games(learner, hist, iters=100, t_len=100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''
    net_states = []
    net_rewards = []
    net_actions = []
    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=t_len,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        iter_states = []
        iter_rewards = []
        iter_actions = []
        iter_count = 0
        while swing.game_loop():
            state = swing.get_state()
            iter_states.append(np.array(state['tree'].values()+\
                    state['monkey'].values()+[learner.gravity]))
            iter_rewards.append(learner.last_reward)
            iter_actions.append(int(learner.last_action))
            iter_count += 1
            if iter_count > 1 and learner.know_gravity == False:
                learner.learn_gravity(iter_states, iter_actions)
                if learner.know_gravity == True:
                    for num in range(len(iter_states)):
                        iter_states[num][-1] = learner.gravity
        #To get the state after the
        state = swing.get_state()
        iter_states.append(state['tree'].values()+\
                    state['monkey'].values()+[learner.gravity])
        iter_rewards.append(learner.last_reward)
        iter_actions.append(int(learner.last_action))

        #Adding to the net training set
        net_states += iter_states
        net_rewards += iter_rewards
        net_actions += iter_actions

        if ii == 0:
            xtrain = build_training_set(net_states, net_actions)
            ytrain = np.array(net_rewards)
            RF = ExtraTreesRegressor(n_estimators=50)
            RF.fit(xtrain, ytrain)

        else:
            xtrain = build_training_set(net_states[:-1], net_actions[:-1])
            #Building the q_state update.
            ytrain = np.array([learner.model.predict(np.append(net_states[k], net_actions[k])) + \
                    learner.alpha*(net_rewards[k] + learner.gamma* np.max([learner.model.predict(np.append(net_states[k+1], int(action)))\
                            for action in learner.actions]) - \
                    learner.model.predict(np.append(net_states[k], net_actions[k]))) for k in range(len(net_states)-1)])
            RF = ExtraTreesRegressor(n_estimators=50)
            RF.fit(xtrain, ytrain)

        learner.model = RF
        learner.model_trained = True

        if ii % 10 == 0:
            learner.epsilon -= 0.05

        # Save score history.
        hist.append(swing.score)

        # Reset the state of the learner.
        learner.reset()

    return
Esempio n. 29
0
def run_games(learner, hist, eps=0.5, gam=0.5, alph=0.75, iters = 20, t_len = 100, test=False):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''

    # Place alpha and epsilon values into learner
    learner.eps = eps
    learner.gam = gam
    learner.alph = alph
    learner.num_actions = 2

    # Initialize estimator for Q-function

    total_states = []
    total_actions = []
    total_rewards = []
    total_scores = []

    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(sound=False,                  # Don't play sounds.
                             text="Epoch %d" % (ii),       # Display the epoch on screen.
                             tick_length = t_len,          # Make game ticks super fast.
                             action_callback=learner.action_callback,
                             reward_callback=learner.reward_callback)

        # Initialize history dictionaries for iteration ii
        states = []
        actions = []
        rewards = []
        loop_counter = 0

        # Loop until you hit something.
        while swing.game_loop():

            states.append(learner.create_state_tuple(learner.last_state))
            actions.append(int(learner.last_action==True))
            rewards.append(learner.last_reward)

            if learner.learn_g & (loop_counter > 1):
                learner.infer_g(states,actions)
                for pp in range(len(states)):
                    states[pp][-1] = learner.gravity

            loop_counter += 1

        else: # Get final action,reward and state just to see how the monkey failed.
            states.append(learner.create_state_tuple(learner.last_state))
            actions.append(int(learner.last_action==True))
            rewards.append(learner.last_reward)
        
        # Append histories from most recent epoch, create training arrays
        total_scores.append(swing.score)
        total_states += states
        total_actions += actions
        total_rewards += rewards
        
        if not test:

            # Iteratively refine the optimal policy after each epoch
            if ii == 0:
                X_train = np.array([np.append(total_states[kk],total_actions[kk]) for kk in range(len(total_states))])
                y_train = np.array(total_rewards)

                #Build tree using first stage Q-learning
                extraTrees = ExtraTreesRegressor(n_estimators=50)
                extraTrees.fit(X_train, y_train)

            # Refit random forest estimator based on composite epochs
            
            else:
                # Generate new X(state,action) and y(reward) lists from newly run batch, based off of Q-estimator and using prior rewards a la Ernst '06'
                X_train = np.array([np.append(total_states[kk],total_actions[kk]) for kk in range(len(total_rewards)-1)])
                # Construct Bellman's equations to get expected rewards based on next proposed state
                y_train = np.array([agent.estimator.predict(np.append(total_states[kk],total_actions[kk])) \
                    +agent.alph*(total_rewards[kk]+(agent.gam * np.max([agent.estimator.predict(np.append(total_states[kk+1]\
                    ,act)) for act in range(agent.num_actions)]))-agent.estimator.predict(np.append(total_states[kk],total_actions[kk])))\
                    for kk in range(len(total_states)-1)])
                
                # Re-fit regression to refine optimal policy according to expected reward.
                extraTrees = ExtraTreesRegressor(n_estimators=50)
                extraTrees.fit(X_train,y_train)

            # As we refine the policy, we should reduce the amount we explore.    
            if ii % 10 == 0:
                learner.eps += 0.05

            learner.estimator = extraTrees
            learner.fitted = True

        else:

            learner.fitted = True        

        # Reset the state of the learner.
        learner.reset()

    # Place state, action, reward and score histories to be saved by wrapper.    
    hist['state_history'] = total_states
    hist['action_history'] = total_actions
    hist['reward_history'] = total_rewards
    hist['score_history'] = total_scores 
    return
Esempio n. 30
0
iters = 10000
learner = Learner()

for ii in xrange(iters):

    # Make a new monkey object.
    swing = SwingyMonkey(sound=False,            # Don't play sounds.
                         tick_length=1,          # Make game ticks super fast.
                         # Display the epoch on screen and % of Q matrix filled
                         text="Epoch %d " % (ii) + str(round(float(np.count_nonzero(learner.Q))*100/learner.Q.size,3)) + "%", 
                         action_callback=learner.action_callback,
                         reward_callback=learner.reward_callback)

    # Loop until you hit something.
    while swing.game_loop():
        pass

    # Keep track of the score for that epoch.
    learner.scores.append(learner.last_state['score'])
    if learner.last_state['score'] > learner.best_score:
        print 'New best Q'
        learner.best_score = learner.last_state['score']
        learner.bestQ = learner.Q.copy()

    print 'score %d' % learner.last_state['score'], str(round(float(np.count_nonzero(learner.Q))*100/learner.Q.size,3)) + "%"

    # Reset the state of the learner.
    learner.reset()

print np.mean(scores)
Esempio n. 31
0
def run_games(learner, hist, policy="random", eps=0.9, gam=0.5, alph=0.75, iters=20, t_len=100):
    """
    Driver function to simulate learning by having the agent play a sequence of games.
    """
    # Place alpha and epsilon values into learner
    learner.eps = eps
    learner.gam = gam
    learner.alph = alph
    learner.num_actions = 2

    # Initialize estimator for Q-function

    total_states = []
    total_actions = []
    total_rewards = []
    total_scores = []

    for ii in range(iters):
        # Make a new monkey object.

        if policy == "random":
            swing = SwingyMonkey(
                sound=False,
                text="Random Epoch %d" % (ii),
                tick_length=t_len,
                action_callback=learner.random_actions,
                reward_callback=learner.reward_callback,
            )

        else:
            swing = SwingyMonkey(
                sound=False,  # Don't play sounds.
                text="Learned Epoch %d" % (ii),  # Display the epoch on screen.
                tick_length=t_len,  # Make game ticks super fast.
                action_callback=learner.action_callback,
                reward_callback=learner.reward_callback,
            )

            learner.fitted = True

            # Initialize history dictionaries for iteration ii
        states = []
        actions = []
        rewards = []
        loop_counter = 0

        # Loop until you hit something.
        while swing.game_loop():

            states.append(learner.create_state_tuple(learner.last_state))
            actions.append(int(learner.last_action == True))
            rewards.append(learner.last_reward)

            if learner.learn_g & (loop_counter > 1):
                learner.infer_g(states, actions)
                for pp in range(len(states)):
                    states[pp][-1] = learner.gravity

            loop_counter += 1

        else:  # Get final action,reward and state just to see how the monkey failed.
            states.append(learner.create_state_tuple(learner.last_state))
            actions.append(int(learner.last_action == True))
            rewards.append(learner.last_reward)

        # Append histories from most recent epoch, create training arrays
        total_scores.append(swing.score)
        total_states += states
        total_actions += actions
        total_rewards += rewards

        # Reset the state of the learner.
        learner.reset()

    hist["state_history"] = hist["state_history"] + total_states
    hist["action_history"] += total_actions
    hist["reward_history"] += total_rewards
    hist["score_history"] += total_scores

    return