def run_games(learner, hist, iters = 100, t_len = 100): ''' Driver function to simulate learning by having the agent play a sequence of games. ''' for ii in range(iters): # Make a new monkey object. swing = SwingyMonkey(sound=False, # Don't play sounds. text="Epoch %d" % (ii), # Display the epoch on screen. tick_length = t_len, # Make game ticks super fast. action_callback=learner.action_callback, reward_callback=learner.reward_callback) # Loop until you hit something. while swing.game_loop(): learner.last_state = swing.get_state() pass # Save score history. hist.append(swing.score) # Reset the state of the learner. learner.reset() return
def evaluate(gamma=0.4, iters=100, chatter=True): learner = TDValueLearner() learner.gamma = gamma highscore = 0 avgscore = 0.0 for ii in xrange(iters): learner.epsilon = 1/(ii+1) # Make a new monkey object. swing = SwingyMonkey(sound=False, # Don't play sounds. text="Epoch %d" % (ii), # Display the epoch on screen. tick_length=1, # Make game ticks super fast. action_callback=learner.action_callback, reward_callback=learner.reward_callback) # Loop until you hit something. while swing.game_loop(): pass score = swing.get_state()['score'] highscore = max([highscore, score]) avgscore = (ii*avgscore+score)/(ii+1) if chatter: print ii, score, highscore, avgscore # Reset the state of the learner. learner.reset() return -avgscore
def evaluate(gamma=0.4, iters=100, chatter=True): learner = TDValueLearner() learner.gamma = gamma highscore = 0 avgscore = 0.0 for ii in xrange(iters): learner.epsilon = 1 / (ii + 1) # Make a new monkey object. swing = SwingyMonkey( sound=False, # Don't play sounds. text="Epoch %d" % (ii), # Display the epoch on screen. tick_length=1, # Make game ticks super fast. action_callback=learner.action_callback, reward_callback=learner.reward_callback) # Loop until you hit something. while swing.game_loop(): pass score = swing.get_state()['score'] highscore = max([highscore, score]) avgscore = (ii * avgscore + score) / (ii + 1) if chatter: print ii, score, highscore, avgscore # Reset the state of the learner. learner.reset() return -avgscore
def testgame(iters=100, show=True): learner = QLearner2() highestscore = 0 avgscore = 0 learner.alpha = 0.2 learner.gamma = 0.6 alpha = learner.alpha gamma = learner.gamma with open("test_Q2.csv", "w", newline='') as csvfile: writer = csv.writer(csvfile) writer.writerow( ["alpha", "gamma", "epoch", "highest", "average", "score", "q"]) for ii in range(iters): learner.epsilon = 1 / (ii + 1) # Make a new monkey object. swing = SwingyMonkey( sound=False, # Don't play sounds. text="Epoch %d" % (ii), # Display the epoch on screen. tick_length=1, # Make game ticks super fast. action_callback=learner.action_callback, reward_callback=learner.reward_callback) # Loop until you hit something. while swing.game_loop(): pass score = swing.get_state()['score'] highestscore = max([highestscore, score]) avgscore = (ii * avgscore + score) / (ii + 1) q = round(float(np.count_nonzero(learner.Q)) * 100 / learner.Q.size, 3) if show == True: print("epoch:", ii, "highest:", highestscore, "current score:", score, "average:", avgscore, "% of Q mx filled:", q) with open("test_Q2.csv", "a+", newline='') as csvfile: writer = csv.writer(csvfile) writer.writerows( [[alpha, gamma, ii, highestscore, avgscore, score, q]]) # Reset the state of the learner. learner.reset() pg.quit() return avgscore, highestscore, score
def testgame(iters=100,show=True): learner = QLearner2() highestscore = 0 avgscore = 0 record={} record['epoch']=[] record['highest']=[] record['avg']=[] record['score']=[] record['q']=[] for ii in range(iters): learner.epsilon = 1/(ii+1) # Make a new monkey object. swing = SwingyMonkey(sound=False, # Don't play sounds. text="Epoch %d" % (ii), # Display the epoch on screen. tick_length=1, # Make game ticks super fast. action_callback=learner.action_callback, reward_callback=learner.reward_callback) # Loop until you hit something. while swing.game_loop(): pass score = swing.get_state()['score'] highestscore = max([highestscore, score]) avgscore = (ii*avgscore+score)/(ii+1) q=round(float(np.count_nonzero(learner.Q))*100/learner.Q.size,3) if show==True: print "epoch:",ii, "highest:", highestscore, "current score:", score, "average:", avgscore, "% of Q mx filled:", q record['epoch'].append(ii) record['highest'].append(highestscore) record['avg'].append(avgscore) record['score'].append(score) record['q'].append(q) pickle.dump( record, open( "record12.p", "wb" ) ) # Reset the state of the learner. learner.reset() return avgscore,highestscore,score
def testgame(iters=100, show=True): learner = QLearner2() highestscore = 0 avgscore = 0 record = {} record['epoch'] = [] record['highest'] = [] record['avg'] = [] record['score'] = [] record['q'] = [] for ii in range(iters): learner.epsilon = 1 / (ii + 1) # Make a new monkey object. swing = SwingyMonkey( sound=False, # Don't play sounds. text="Epoch %d" % (ii), # Display the epoch on screen. tick_length=1, # Make game ticks super fast. action_callback=learner.action_callback, reward_callback=learner.reward_callback) # Loop until you hit something. while swing.game_loop(): pass score = swing.get_state()['score'] highestscore = max([highestscore, score]) avgscore = (ii * avgscore + score) / (ii + 1) q = round(float(np.count_nonzero(learner.Q)) * 100 / learner.Q.size, 3) if show == True: print "epoch:", ii, "highest:", highestscore, "current score:", score, "average:", avgscore, "% of Q mx filled:", q record['epoch'].append(ii) record['highest'].append(highestscore) record['avg'].append(avgscore) record['score'].append(score) record['q'].append(q) pickle.dump(record, open("record12.p", "wb")) # Reset the state of the learner. learner.reset() return avgscore, highestscore, score
learning_rate = (learning_rate_start + .5) / (iters / 100) # Make a new monkey object. swing = SwingyMonkey( sound=False, # Don't play sounds. text="Epoch %d" % (ii), # Display the epoch on screen. tick_length=1, # Make game ticks super fast. action_callback=learner.action_callback, reward_callback=learner.reward_callback) # Loop until you hit something. while swing.game_loop(): pass #store all values for mins and max calcs -- only need to run once to get values for the find_state_bounds function which saves these values scorelist.append(swing.get_state()['score']) #print swing.get_state() # Reset the state of the learner. learner.reset() #calculate avg score for this approach print numpy.average(scorelist) # the function below finds the min/max values for each variable to deterime bin ranges #def find_state_bounds(): # print scorelist # seq_tree = [x['tree'] for x in scorelist] # seq_tree_min= min(seq_tree) # seq_tree_max= max(seq_tree)
for ii in xrange(iters): learning_rate=(learning_rate_start+.5)/(iters/100) # Make a new monkey object. swing = SwingyMonkey(sound=False, # Don't play sounds. text="Epoch %d" % (ii), # Display the epoch on screen. tick_length=1, # Make game ticks super fast. action_callback=learner.action_callback, reward_callback=learner.reward_callback) # Loop until you hit something. while swing.game_loop(): pass #store all values for mins and max calcs -- only need to run once to get values for the find_state_bounds function which saves these values scorelist.append(swing.get_state()['score']) #print swing.get_state() # Reset the state of the learner. learner.reset() #calculate avg score for this approach print numpy.average(scorelist) # the function below finds the min/max values for each variable to deterime bin ranges #def find_state_bounds(): # print scorelist # seq_tree = [x['tree'] for x in scorelist]
iters = 10000 learner = Learner() reward = [] score = [] score_cur = 0 ii = 0 #for ii in xrange(iters): while score_cur < 100: ii += 1 # Make a new monkey object. swing = SwingyMonkey(sound=False, # Don't play sounds. text="Epoch %d" % (ii), # Display the epoch on screen. tick_length=0, # Make game ticks super fast. action_callback=learner.action_callback, reward_callback=learner.reward_callback) # Loop until you hit something. while swing.game_loop(): pass reward.append(learner.last_reward) score_cur = swing.get_state()["score"] score.append(swing.get_state()["score"]) print "################### Score = " + \ str(swing.get_state()["score"]) + " ########################" # Reset the state of the learner. learner.reset()
def run_games(learner, hist, iters=100, t_len=100): ''' Driver function to simulate learning by having the agent play a sequence of games. ''' net_states = [] net_rewards = [] net_actions = [] for ii in range(iters): # Make a new monkey object. swing = SwingyMonkey( sound=False, # Don't play sounds. text="Epoch %d" % (ii), # Display the epoch on screen. tick_length=t_len, # Make game ticks super fast. action_callback=learner.action_callback, reward_callback=learner.reward_callback) # Loop until you hit something. iter_states = [] iter_rewards = [] iter_actions = [] iter_count = 0 while swing.game_loop(): state = swing.get_state() iter_states.append(np.array(state['tree'].values()+\ state['monkey'].values()+[learner.gravity])) iter_rewards.append(learner.last_reward) iter_actions.append(int(learner.last_action)) iter_count += 1 if iter_count > 1 and learner.know_gravity == False: learner.learn_gravity(iter_states, iter_actions) if learner.know_gravity == True: for num in range(len(iter_states)): iter_states[num][-1] = learner.gravity #To get the state after the state = swing.get_state() iter_states.append(state['tree'].values()+\ state['monkey'].values()+[learner.gravity]) iter_rewards.append(learner.last_reward) iter_actions.append(int(learner.last_action)) #Adding to the net training set net_states += iter_states net_rewards += iter_rewards net_actions += iter_actions if ii == 0: xtrain = build_training_set(net_states, net_actions) ytrain = np.array(net_rewards) RF = ExtraTreesRegressor(n_estimators=50) RF.fit(xtrain, ytrain) else: xtrain = build_training_set(net_states[:-1], net_actions[:-1]) #Building the q_state update. ytrain = np.array([learner.model.predict(np.append(net_states[k], net_actions[k])) + \ learner.alpha*(net_rewards[k] + learner.gamma* np.max([learner.model.predict(np.append(net_states[k+1], int(action)))\ for action in learner.actions]) - \ learner.model.predict(np.append(net_states[k], net_actions[k]))) for k in range(len(net_states)-1)]) RF = ExtraTreesRegressor(n_estimators=50) RF.fit(xtrain, ytrain) learner.model = RF learner.model_trained = True if ii % 10 == 0: learner.epsilon -= 0.05 # Save score history. hist.append(swing.score) # Reset the state of the learner. learner.reset() return
while ii < 1e5: ii += 1 # Make a new monkey object. swing = SwingyMonkey( sound=False, # Don't play sounds. text="Epoch %d" % (ii), # Display the epoch on screen. tick_length=0, # Make game ticks super fast. action_callback=learner.action_callback, reward_callback=learner.reward_callback, ) # Loop until you hit something. while swing.game_loop(): pass reward.append(learner.last_reward) score_cur = swing.get_state()["score"] veloc_cur = swing.get_state()["monkey"]["vel"] result_cur = learner.result_callback() qnorm = np.linalg.norm(learner.Q) score.append(score_cur) state_grid.append(learner.state_grid) state_num.append(learner.state_num) result.append(result_cur) Qnorm.append(qnorm) State = np.sum(learner.Q != 0) totalState = np.sum(learner.Q > -np.inf) if ii > 0 and ii % 50 == 0: np.save(data_dir + "Qmat_backup.npy", learner.Q)
#while score_cur < 5000: while ii < 1e5: ii += 1 # Make a new monkey object. swing = SwingyMonkey( sound=False, # Don't play sounds. text="Epoch %d" % (ii), # Display the epoch on screen. tick_length=0, # Make game ticks super fast. action_callback=learner.action_callback, reward_callback=learner.reward_callback) # Loop until you hit something. while swing.game_loop(): pass reward.append(learner.last_reward) score_cur = swing.get_state()["score"] veloc_cur = swing.get_state()["monkey"]["vel"] result_cur = learner.result_callback() qnorm = np.linalg.norm(learner.Q) score.append(score_cur) state_grid.append(learner.state_grid) state_num.append(learner.state_num) result.append(result_cur) Qnorm.append(qnorm) State = np.sum(learner.Q != 0) totalState = np.sum(learner.Q > -np.inf) if ii > 0 and ii % 50 == 0: np.save(data_dir + "Qmat_backup.npy", learner.Q)
return self.last_action def reward_callback(self, reward): self.last_reward = reward iters = 100 nvars = 3 nstates = 10 alpha = 0.2 gamma = 0.9 epsil = 0.1 learner = Learner(nvars, nstates, alpha, gamma, epsil) for ii in xrange(iters): # Make a new monkey object. swing = SwingyMonkey(sound=False, # Don't play sounds. text="Epoch %d" % (ii), # Display the epoch on screen. tick_length=1, # Make game ticks super fast. action_callback=learner.action_callback, reward_callback=learner.reward_callback) # Loop until you hit something. while swing.game_loop(): print swing.get_state() pass # Reset the state of the learner. learner.reset()