Example #1
0
def run_games(learner, hist, iters = 100, t_len = 100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''
    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(sound=False,                  # Don't play sounds.
                             text="Epoch %d" % (ii),       # Display the epoch on screen.
                             tick_length = t_len,          # Make game ticks super fast.
                             action_callback=learner.action_callback,
                             reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        # Save score history.
        hist.append(swing.score)
        if len(hist) < 100:
            avgscore = np.mean(hist)
        else:
            avgscore = np.mean(hist[-100:])
        print("epoch:",ii, "highest:", np.max(hist),
            "current score:", swing.score, "average:", avgscore)
        # Reset the state of the learner.
        learner.reset()
    pg.quit()
    return
Example #2
0
def run_games(learner, hist, iters = 100, t_len = 100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''

    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(sound=False,                  # Don't play sounds.
                             text="Epoch %d" % (ii),       # Display the epoch on screen.
                             tick_length = t_len,          # Make game ticks super fast.
                             action_callback=learner.action_callback,
                             reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            learner.last_state = swing.get_state()
            pass

        # Save score history.
        hist.append(swing.score)

        # Reset the state of the learner.
        learner.reset()

    return
Example #3
0
def run_games(learner, hist, iters=100, t_len=100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''
    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=t_len,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        #print('Epoch Gravity:', swing.gravity)
        while swing.game_loop():
            pass

        # Save score history.
        hist.append(swing.score)
        print(f'it: {ii}')
        print(f'best score: {max(hist)}')
        print(f'average score: {sum(hist)/len(hist)}')
        # Reset the state of the learner.
        learner.reset()
    # print(f'best score: {max(hist)}')
    # print(f'average score: {mean(hist)}')
    pg.quit()
    return
def run_games(learner, hist, iters = 100, t_len = 100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''
    timestamp = int(time.time())
    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(sound=False,                  # Don't play sounds.
                             text="Epoch %d" % (ii),       # Display the epoch on screen.
                             tick_length = t_len,          # Make game ticks super fast.
                             action_callback=learner.action_callback,
                             reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        # Save score history.
        hist.append(swing.score)
        print('epoch: ' + str(ii) + ', score: ' + str(swing.score) + ', gravity: ' + str(swing.gravity) + ', running_avg: ' + str(np.average(hist[-10:])))

        results = {
            'gamma': learner.gamma,
            'eta': learner.eta,
            'epsilon_decay': learner.epsilon_decay,
            'hist': hist
        }

        with open ('results/results_approx_' + str(timestamp) + '.p', 'wb') as f:
            pickle.dump(results, f)

        # Reset the state of the learner.
        learner.reset()
    pg.quit()
    return
def run_games(learner, hist, iters=100, t_len=100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''
    print("epoch", "\t", "score", "\t", "high", "\t", "avg")
    highscore, avgscore = 0.0, 0.0
    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=t_len,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        # Save score history.
        hist.append(swing.score)

        score = swing.score
        highscore = max([highscore, score])
        avgscore = (ii * avgscore + score) / (ii + 1)

        print(ii, "\t", score, "\t", highscore, "\t", avgscore)

        # Reset the state of the learner.
        learner.reset()

    return
Example #6
0
def run_games(learner, hist, iters=100, t_len=100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''

    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=t_len,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        # Save score history.
        hist.append(swing.score)
        print("[Game #%d / Score: %d / " % (ii, swing.score), end="")
        # Train learner on the last game
        toc = time.time()
        learner.train(swing.score)
        tic = time.time()
        print("training time: %3.3f]" % float(tic - toc))

        # Reset last_state, last_action, last_reward, and game memory of the learner (learned parameters are retained).
        learner.reset()

    return
Example #7
0
def evaluate(gamma=0.4, iters=100, chatter=True):

    learner = TDValueLearner()
    learner.gamma = gamma

    highscore = 0
    avgscore = 0.0

    for ii in xrange(iters):

        learner.epsilon = 1/(ii+1)

        # Make a new monkey object.
        swing = SwingyMonkey(sound=False,            # Don't play sounds.
                             text="Epoch %d" % (ii), # Display the epoch on screen.
                             tick_length=1,          # Make game ticks super fast.
                             action_callback=learner.action_callback,
                             reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        score = swing.get_state()['score']
        highscore = max([highscore, score])
        avgscore = (ii*avgscore+score)/(ii+1)

        if chatter:
            print ii, score, highscore, avgscore

        # Reset the state of the learner.
        learner.reset()

    return -avgscore
Example #8
0
def run_games(learner, hist, iters=100, t_len=100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''

    for ii in range(iters):
        print "Epoch: %i |" % ii,
        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=t_len,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        # Save score history.
        hist.append(swing.score)
        if learner.algo == "qlearn":
            q_filled = float(np.count_nonzero(
                learner.Q)) * 100 / learner.Q.size
            print 'score: %d |' % swing.score, 'Q: %s' % str(round(
                q_filled, 3)) + "%"
        else:
            print 'score %d' % swing.score

        # Reset the state of the learner.
        learner.reset()
    return
Example #9
0
def run_games(learner, hist, iters = 100, t_len = 100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''
    
    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(sound=False,                  # Don't play sounds.
                             text="Epoch %d" % (ii),       # Display the epoch on screen.
                             tick_length = t_len,          # Make game ticks super fast.
                             action_callback=learner.action_callback,
                             reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():

            # This is where we build sarsa arrays utilizing learner.method()
            # You can get the action via learner.last_action (False=0/glide, True=1/jump)
            # You can get the state via learner.last_state
            # You can get the reward via learner.last_reward (0,+1 if pass, -5 if hit, -10 if fall off screen)
            # Can infer gravity by checking monkey velocity from time step to time step if action is false
                # Gravity is an integer 1, 2, 3, or 4

            pass
        
        # Save score history.
        hist.append(swing.score)

        # Reset the state of the learner.
        learner.reset()
        
    return
    def run_game():
        # Make a new monkey object.
        swing = SwingyMonkey(visual=False,      # no video
                             sound=False,       # no audio        
                             action_callback=learner_class.action_callback,
                             reward_callback=learner_class.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
          pass

        return swing
Example #11
0
def testgame(iters=100, show=True):

    learner = QLearner2()

    highestscore = 0
    avgscore = 0
    learner.alpha = 0.2
    learner.gamma = 0.6
    alpha = learner.alpha
    gamma = learner.gamma
    with open("test_Q2.csv", "w", newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(
            ["alpha", "gamma", "epoch", "highest", "average", "score", "q"])

    for ii in range(iters):

        learner.epsilon = 1 / (ii + 1)

        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=1,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        score = swing.get_state()['score']
        highestscore = max([highestscore, score])
        avgscore = (ii * avgscore + score) / (ii + 1)
        q = round(float(np.count_nonzero(learner.Q)) * 100 / learner.Q.size, 3)

        if show == True:
            print("epoch:", ii, "highest:", highestscore, "current score:",
                  score, "average:", avgscore, "% of Q mx filled:", q)
        with open("test_Q2.csv", "a+", newline='') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerows(
                [[alpha, gamma, ii, highestscore, avgscore, score, q]])

        # Reset the state of the learner.
        learner.reset()

    pg.quit()
    return avgscore, highestscore, score
Example #12
0
def testgame(iters=100, show=True):

    learner = QLearner2()

    highestscore = 0
    avgscore = 0
    record = {}
    record['epoch'] = []
    record['highest'] = []
    record['avg'] = []
    record['score'] = []
    record['q'] = []

    for ii in range(iters):

        learner.epsilon = 1 / (ii + 1)

        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=1,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        score = swing.get_state()['score']
        highestscore = max([highestscore, score])
        avgscore = (ii * avgscore + score) / (ii + 1)
        q = round(float(np.count_nonzero(learner.Q)) * 100 / learner.Q.size, 3)

        if show == True:
            print "epoch:", ii, "highest:", highestscore, "current score:", score, "average:", avgscore, "% of Q mx filled:", q

        record['epoch'].append(ii)
        record['highest'].append(highestscore)
        record['avg'].append(avgscore)
        record['score'].append(score)
        record['q'].append(q)
        pickle.dump(record, open("record12.p", "wb"))
        # Reset the state of the learner.
        learner.reset()

    return avgscore, highestscore, score
Example #13
0
def testgame(iters=100,show=True):

    learner = QLearner2()

    highestscore = 0
    avgscore = 0
    record={}
    record['epoch']=[]
    record['highest']=[]
    record['avg']=[]
    record['score']=[]
    record['q']=[]

    for ii in range(iters):

        learner.epsilon = 1/(ii+1)

        # Make a new monkey object.
        swing = SwingyMonkey(sound=False,            # Don't play sounds.
                             text="Epoch %d" % (ii), # Display the epoch on screen.
                             tick_length=1,          # Make game ticks super fast.
                             action_callback=learner.action_callback,
                             reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        score = swing.get_state()['score']
        highestscore = max([highestscore, score])
        avgscore = (ii*avgscore+score)/(ii+1)
        q=round(float(np.count_nonzero(learner.Q))*100/learner.Q.size,3)
        
        if show==True:
            print "epoch:",ii, "highest:", highestscore, "current score:", score, "average:", avgscore, "% of Q mx filled:", q 
            
        record['epoch'].append(ii)
        record['highest'].append(highestscore)
        record['avg'].append(avgscore)
        record['score'].append(score)
        record['q'].append(q)
        pickle.dump( record, open( "record12.p", "wb" ) )  
        # Reset the state of the learner.
        learner.reset()
    

    return avgscore,highestscore,score
def run_games(learner, hist, iters = 100, t_len = 100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''
    
    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey()

        # Initialize history dictionaries for iteration ii
        hist['state'][ii] = []
        hist['action'][ii] = []
        hist['reward'][ii] = []

        # Loop until you hit something.
        while swing.game_loop():

            # This is where we build sarsa arrays utilizing learner.method()
            # You can get the action via learner.last_action (False=0/glide, True=1/jump)
            # You can get the state via learner.last_state
            # You can get the reward via learner.last_reward (0,+1 if pass, -5 if hit, -10 if fall off screen)
            # Can infer gravity by checking monkey velocity from time step to time step if action is false
                # Gravity is an integer 1, 2, 3, or 4

            # import pdb
            # pdb.set_trace()

            hist['state'][ii].append(learner.last_state)
            hist['action'][ii].append(learner.last_action)
            hist['reward'][ii].append(learner.last_reward)

        else: # Get final action,reward and state just to see how the monkey failed.
            hist['state'][ii].append(learner.last_state)
            hist['action'][ii].append(learner.last_action)
            hist['reward'][ii].append(learner.last_reward)
        
        # Save score history.
        hist['score'].append(swing.score)

        # Reset the state of the learner.
        learner.reset()
        
    return
def run_games(learner, hist, iters = 100, t_len = 100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''
    if iters < 20:
        print "I can't learn that fast! Try more iterations."
    
    # DATA-GATHERING PHASE
    for ii in range(30):
        # Make a new monkey object.
        swing = SwingyMonkey(sound=False,                  # Don't play sounds.
                             text="Epoch %d" % (ii),       # Display the epoch on screen.
                             tick_length = t_len,          # Make game ticks super fast.
                             action_callback=learner.explore_action_callback,
                             reward_callback=learner.reward_callback)
        # Loop until you hit something.
        while swing.game_loop():
            pass  
        # Save score history.
        hist.append(swing.score)
        # Reset the state of the learner.
        learner.reset()
    
    # EXPLOITATION PHASE
    for ii in range(iters)[30:]:
        # Make a new monkey object.
        swing = SwingyMonkey(sound=False,                  # Don't play sounds.
                             text="Epoch %d" % (ii),       # Display the epoch on screen.
                             tick_length = t_len,          # Make game ticks super fast.
                             action_callback=learner.action_callback,
                             reward_callback=learner.reward_callback)
        # Loop until you hit something.
        while swing.game_loop():
            pass      
        # Save score history.
        hist.append(swing.score)
        # Reset the state of the learner.
        learner.reset()
        
    return
Example #16
0
def run_games(learner, hist, iters=100, t_len=100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''

    for ii in range(iters):

        # make a new monkey object
        swing = SwingyMonkey(sound=False,                  # don't play sounds
                             text="Epoch %d" % (ii),       # display the epoch on screen
                             tick_length = t_len,          # make game ticks super fast
                             action_callback=learner.action_callback,
                             reward_callback=learner.reward_callback)

        # pass the screen dimensions to the agent
        learner.update_specs(swing.screen_height, swing.screen_width)

        # loop until you hit something
        while swing.game_loop():
            pass

        # update transition to terminal state
        learner.update_terminal_transition()

        # save score history
        hist.append(swing.score)
        print 'Epoch %i: current score %i; best score %i' % (ii, swing.score, np.max(hist))

        # reset the state of the learner
        learner.reset()

    # display score history and stats
    print '----------'
    print 'Parameters: %0.2f alpha; %0.2f gamma; %0.2f epsilon' % (learner.alpha, learner.gamma, learner.epsilon)
    print 'Score history:', hist
    print 'Best score:', np.max(hist)
    print 'Average score:', np.mean(hist)
    print '----------'

    return np.max(hist)
def run_games(learner, hist, iters = 100, t_len = 100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''
        
    
    high = 0
    avg = 0
    for ii in range(iters):



        # Make a new monkey object.
        swing = SwingyMonkey(sound=False,                  # Don't play sounds.
                             text="Epoch %d" % (ii),       # Display the epoch on screen.
                             tick_length = t_len,          # Make game ticks super fast.
                             action_callback=learner.action_callback,
                             reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        new_score = swing.score

        # Save score history.
        if new_score > high:
            high = new_score

        avg = (new_score + ii*avg)/(ii+1.0)
        print "%i\t%i\t%i\t%s:\t%s"%(ii,new_score,high,avg,np.mean(learner.Q))
        hist.append(swing.score)
        # Reset the state of the learner.
        learner.reset()
        
    print learner.Q
    print learner.state_counts
    return
        return self.last_action

    def reward_callback(self, reward):
        self.last_reward = reward

iters = 100
nvars = 3
nstates = 10
alpha = 0.2
gamma = 0.9
epsil = 0.1
learner = Learner(nvars, nstates, alpha, gamma, epsil)

for ii in xrange(iters):

    # Make a new monkey object.
    swing = SwingyMonkey(sound=False,            # Don't play sounds.
                         text="Epoch %d" % (ii), # Display the epoch on screen.
                         tick_length=1,          # Make game ticks super fast.
                         action_callback=learner.action_callback,
                         reward_callback=learner.reward_callback)

    # Loop until you hit something.
    while swing.game_loop():
        print swing.get_state()
        pass

    # Reset the state of the learner.
    learner.reset()
    
Example #19
0
def run_games(learner, hist, policy="random", eps=0.9, gam=0.5, alph=0.75, iters=20, t_len=100):
    """
    Driver function to simulate learning by having the agent play a sequence of games.
    """
    # Place alpha and epsilon values into learner
    learner.eps = eps
    learner.gam = gam
    learner.alph = alph
    learner.num_actions = 2

    # Initialize estimator for Q-function

    total_states = []
    total_actions = []
    total_rewards = []
    total_scores = []

    for ii in range(iters):
        # Make a new monkey object.

        if policy == "random":
            swing = SwingyMonkey(
                sound=False,
                text="Random Epoch %d" % (ii),
                tick_length=t_len,
                action_callback=learner.random_actions,
                reward_callback=learner.reward_callback,
            )

        else:
            swing = SwingyMonkey(
                sound=False,  # Don't play sounds.
                text="Learned Epoch %d" % (ii),  # Display the epoch on screen.
                tick_length=t_len,  # Make game ticks super fast.
                action_callback=learner.action_callback,
                reward_callback=learner.reward_callback,
            )

            learner.fitted = True

            # Initialize history dictionaries for iteration ii
        states = []
        actions = []
        rewards = []
        loop_counter = 0

        # Loop until you hit something.
        while swing.game_loop():

            states.append(learner.create_state_tuple(learner.last_state))
            actions.append(int(learner.last_action == True))
            rewards.append(learner.last_reward)

            if learner.learn_g & (loop_counter > 1):
                learner.infer_g(states, actions)
                for pp in range(len(states)):
                    states[pp][-1] = learner.gravity

            loop_counter += 1

        else:  # Get final action,reward and state just to see how the monkey failed.
            states.append(learner.create_state_tuple(learner.last_state))
            actions.append(int(learner.last_action == True))
            rewards.append(learner.last_reward)

        # Append histories from most recent epoch, create training arrays
        total_scores.append(swing.score)
        total_states += states
        total_actions += actions
        total_rewards += rewards

        # Reset the state of the learner.
        learner.reset()

    hist["state_history"] = hist["state_history"] + total_states
    hist["action_history"] += total_actions
    hist["reward_history"] += total_rewards
    hist["score_history"] += total_scores

    return
Example #20
0
        return new_action

    def reward_callback(self, reward):
        '''This gets called so you can see what reward you get.'''

        self.last_reward = reward

iters = 10000
learner = Learner()

for ii in xrange(iters):

    # Make a new monkey object.
    swing = SwingyMonkey(sound=False,            # Don't play sounds.
                         tick_length=1,          # Make game ticks super fast.
                         # Display the epoch on screen and % of Q matrix filled
                         text="Epoch %d " % (ii) + str(round(float(np.count_nonzero(learner.Q))*100/learner.Q.size,3)) + "%", 
                         action_callback=learner.action_callback,
                         reward_callback=learner.reward_callback)

    # Loop until you hit something.
    while swing.game_loop():
        pass

    # Keep track of the score for that epoch.
    learner.scores.append(learner.last_state['score'])
    if learner.last_state['score'] > learner.best_score:
        print 'New best Q'
        learner.best_score = learner.last_state['score']
        learner.bestQ = learner.Q.copy()

    print 'score %d' % learner.last_state['score'], str(round(float(np.count_nonzero(learner.Q))*100/learner.Q.size,3)) + "%"
    def reward_callback(self, reward):
        '''This gets called so you can see what reward you get.'''

        self.last_reward = reward

iters = 150
learner = Learner()
scores = []

for ii in xrange(iters):

    # Make a new monkey object.
    swing = SwingyMonkey(sound=False,            # Don't play sounds.
                         text="Epoch %d" % (ii), # Display the epoch on screen.
                         tick_length=1,          # Make game ticks super fast.
                         action_callback=learner.action_callback,
                         reward_callback=learner.reward_callback)

    # Loop until you hit something.
    while swing.game_loop():
        pass

    scores.append(swing.get_score())

    # Reset the state of the learner.
    learner.reset()

domain = np.arange(1, iters + 1, 1)
plt.plot(domain, scores)
plt.title("Scores over each Epoch (discount = " + str(learner.discount) + ")")
Example #22
0
def run_games(learner, hist, eps=0.5, gam=0.5, alph=0.75, iters = 20, t_len = 100, test=False):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''

    # Place alpha and epsilon values into learner
    learner.eps = eps
    learner.gam = gam
    learner.alph = alph
    learner.num_actions = 2

    # Initialize estimator for Q-function

    total_states = []
    total_actions = []
    total_rewards = []
    total_scores = []

    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(sound=False,                  # Don't play sounds.
                             text="Epoch %d" % (ii),       # Display the epoch on screen.
                             tick_length = t_len,          # Make game ticks super fast.
                             action_callback=learner.action_callback,
                             reward_callback=learner.reward_callback)

        # Initialize history dictionaries for iteration ii
        states = []
        actions = []
        rewards = []
        loop_counter = 0

        # Loop until you hit something.
        while swing.game_loop():

            states.append(learner.create_state_tuple(learner.last_state))
            actions.append(int(learner.last_action==True))
            rewards.append(learner.last_reward)

            if learner.learn_g & (loop_counter > 1):
                learner.infer_g(states,actions)
                for pp in range(len(states)):
                    states[pp][-1] = learner.gravity

            loop_counter += 1

        else: # Get final action,reward and state just to see how the monkey failed.
            states.append(learner.create_state_tuple(learner.last_state))
            actions.append(int(learner.last_action==True))
            rewards.append(learner.last_reward)
        
        # Append histories from most recent epoch, create training arrays
        total_scores.append(swing.score)
        total_states += states
        total_actions += actions
        total_rewards += rewards
        
        if not test:

            # Iteratively refine the optimal policy after each epoch
            if ii == 0:
                X_train = np.array([np.append(total_states[kk],total_actions[kk]) for kk in range(len(total_states))])
                y_train = np.array(total_rewards)

                #Build tree using first stage Q-learning
                extraTrees = ExtraTreesRegressor(n_estimators=50)
                extraTrees.fit(X_train, y_train)

            # Refit random forest estimator based on composite epochs
            
            else:
                # Generate new X(state,action) and y(reward) lists from newly run batch, based off of Q-estimator and using prior rewards a la Ernst '06'
                X_train = np.array([np.append(total_states[kk],total_actions[kk]) for kk in range(len(total_rewards)-1)])
                # Construct Bellman's equations to get expected rewards based on next proposed state
                y_train = np.array([agent.estimator.predict(np.append(total_states[kk],total_actions[kk])) \
                    +agent.alph*(total_rewards[kk]+(agent.gam * np.max([agent.estimator.predict(np.append(total_states[kk+1]\
                    ,act)) for act in range(agent.num_actions)]))-agent.estimator.predict(np.append(total_states[kk],total_actions[kk])))\
                    for kk in range(len(total_states)-1)])
                
                # Re-fit regression to refine optimal policy according to expected reward.
                extraTrees = ExtraTreesRegressor(n_estimators=50)
                extraTrees.fit(X_train,y_train)

            # As we refine the policy, we should reduce the amount we explore.    
            if ii % 10 == 0:
                learner.eps += 0.05

            learner.estimator = extraTrees
            learner.fitted = True

        else:

            learner.fitted = True        

        # Reset the state of the learner.
        learner.reset()

    # Place state, action, reward and score histories to be saved by wrapper.    
    hist['state_history'] = total_states
    hist['action_history'] = total_actions
    hist['reward_history'] = total_rewards
    hist['score_history'] = total_scores 
    return
Example #23
0
        '''This gets called so you can see what reward you get.'''

        self.last_reward = reward


iters = 10000
learner = Learner()
scorelist=[]

for ii in xrange(iters):
    learning_rate=(learning_rate_start+.5)/(iters/100)

    # Make a new monkey object.
    swing = SwingyMonkey(sound=False,            # Don't play sounds.
                         text="Epoch %d" % (ii), # Display the epoch on screen.
                         tick_length=1,          # Make game ticks super fast.
                         action_callback=learner.action_callback,
                         reward_callback=learner.reward_callback)

    # Loop until you hit something.
    while swing.game_loop():
        pass

    #store all values for mins and max  calcs -- only need to run once to get values for the find_state_bounds function which saves these values    
    scorelist.append(swing.get_state()['score'])

    #print swing.get_state()
    # Reset the state of the learner.
    learner.reset()

#calculate avg score for this approach
Example #24
0
    score_cur = 0
    ii = 0

    # for ii in xrange(iters):

    # learner.Q = np.load("Qmat_manual.npy")
    # learner.learnTime = np.load("Lmat_manual.npy")

# while score_cur < 5000:
while ii < 1e5:
    ii += 1
    # Make a new monkey object.
    swing = SwingyMonkey(
        sound=False,  # Don't play sounds.
        text="Epoch %d" % (ii),  # Display the epoch on screen.
        tick_length=0,  # Make game ticks super fast.
        action_callback=learner.action_callback,
        reward_callback=learner.reward_callback,
    )

    # Loop until you hit something.
    while swing.game_loop():
        pass
    reward.append(learner.last_reward)
    score_cur = swing.get_state()["score"]
    veloc_cur = swing.get_state()["monkey"]["vel"]
    result_cur = learner.result_callback()
    qnorm = np.linalg.norm(learner.Q)
    score.append(score_cur)
    state_grid.append(learner.state_grid)
    state_num.append(learner.state_num)
Example #25
0
# formal learning step
iters = 10000
learner = Learner()
reward = []
score = []
score_cur = 0
ii = 0

#for ii in xrange(iters):

while score_cur < 100:
    ii += 1
    # Make a new monkey object.
    swing = SwingyMonkey(sound=False,            # Don't play sounds.
                         text="Epoch %d" % (ii), # Display the epoch on screen.
                         tick_length=0,          # Make game ticks super fast.
                         action_callback=learner.action_callback,
                         reward_callback=learner.reward_callback)

    # Loop until you hit something.
    while swing.game_loop():
        pass
    reward.append(learner.last_reward)
    score_cur = swing.get_state()["score"]
    score.append(swing.get_state()["score"])

    print "################### Score = " + \
          str(swing.get_state()["score"]) + " ########################"
    # Reset the state of the learner.
    learner.reset()
Example #26
0
def run_games(learner, hist, iters=100, t_len=100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''
    net_states = []
    net_rewards = []
    net_actions = []
    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=t_len,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        iter_states = []
        iter_rewards = []
        iter_actions = []
        iter_count = 0
        while swing.game_loop():
            state = swing.get_state()
            iter_states.append(np.array(state['tree'].values()+\
                    state['monkey'].values()+[learner.gravity]))
            iter_rewards.append(learner.last_reward)
            iter_actions.append(int(learner.last_action))
            iter_count += 1
            if iter_count > 1 and learner.know_gravity == False:
                learner.learn_gravity(iter_states, iter_actions)
                if learner.know_gravity == True:
                    for num in range(len(iter_states)):
                        iter_states[num][-1] = learner.gravity
        #To get the state after the
        state = swing.get_state()
        iter_states.append(state['tree'].values()+\
                    state['monkey'].values()+[learner.gravity])
        iter_rewards.append(learner.last_reward)
        iter_actions.append(int(learner.last_action))

        #Adding to the net training set
        net_states += iter_states
        net_rewards += iter_rewards
        net_actions += iter_actions

        if ii == 0:
            xtrain = build_training_set(net_states, net_actions)
            ytrain = np.array(net_rewards)
            RF = ExtraTreesRegressor(n_estimators=50)
            RF.fit(xtrain, ytrain)

        else:
            xtrain = build_training_set(net_states[:-1], net_actions[:-1])
            #Building the q_state update.
            ytrain = np.array([learner.model.predict(np.append(net_states[k], net_actions[k])) + \
                    learner.alpha*(net_rewards[k] + learner.gamma* np.max([learner.model.predict(np.append(net_states[k+1], int(action)))\
                            for action in learner.actions]) - \
                    learner.model.predict(np.append(net_states[k], net_actions[k]))) for k in range(len(net_states)-1)])
            RF = ExtraTreesRegressor(n_estimators=50)
            RF.fit(xtrain, ytrain)

        learner.model = RF
        learner.model_trained = True

        if ii % 10 == 0:
            learner.epsilon -= 0.05

        # Save score history.
        hist.append(swing.score)

        # Reset the state of the learner.
        learner.reset()

    return
Example #27
0
# formal learning step
iters = 10000
learner = Learner()
reward = []
score = []
score_cur = 0
ii = 0

#for ii in xrange(iters):

while score_cur < 100:
    ii += 1
    # Make a new monkey object.
    swing = SwingyMonkey(sound=False,            # Don't play sounds.
                         text="Epoch %d" % (ii), # Display the epoch on screen.
                         tick_length=0,          # Make game ticks super fast.
                         action_callback=learner.action_callback,
                         reward_callback=learner.reward_callback)

    # Loop until you hit something.
    while swing.game_loop():
        pass
    reward.append(learner.last_reward)
    score_cur = swing.get_state()["score"]
    score.append(swing.get_state()["score"])

    print "################### Score = " + \
          str(swing.get_state()["score"]) + " ########################"
    # Reset the state of the learner.
    learner.reset()