def run_epoch(self, epoch, num_steps, prefix, collect_reward=False): """ Run one 'epoch' of training or testing, where an epoch is defined by the number of steps executed. Prints a progress report after every trial Arguments: num_steps - steps per epoch prefix - string to print ('training' or 'testing') """ steps_left = num_steps if prefix == "training" or not collect_reward: while steps_left > 0: print prefix + " epoch: ", epoch, "steps_left: ", steps_left sys.stdout.flush() terminal = RLGlue.RL_episode(steps_left) if not terminal: RLGlue.RL_agent_message("episode_end") steps_left -= RLGlue.RL_num_steps() elif prefix == "testing": total_reward = 0 episode_counter = 0 terminal = False while steps_left > 0: if terminal: print prefix + " epoch: ", epoch, "steps_left: ", steps_left sys.stdout.flush() roat = RLGlue.RL_step() reward = roat.r terminal = roat.terminal total_reward += reward episode_counter += terminal steps_left -= 1 return total_reward, episode_counter
def runEpisode(step_limit): global which_episode global total_win global total_draw global total_lose global ns_epoch global pcts_win global pcts_win_or_draw global pcts_lose which_episode += 1 # ゲーム1回 開始 terminal = RLGlue.RL_episode(step_limit) # 勝負がつくまでのステップ数と報酬を取得 total_steps = RLGlue.RL_num_steps() total_reward = RLGlue.RL_return() # 今回の結果を表示 r_win = 1.0 r_draw = -0.5 r_lose = -1.0 if total_reward == r_win: total_win += 1 elif total_reward == r_draw: total_draw += 1 elif total_reward == r_lose: total_lose += 1 print("Episode " + str(which_episode) + "\t " + str(total_steps) + " steps \t" + str(total_reward) + " total reward\t " + str(terminal) + " natural end") # 100回毎に勝敗を集計 record_interval = 100 if which_episode % record_interval == 0: line = 'Episode: {}, {} wins, {} draws, {} loses'.format( which_episode, total_win, total_draw, total_lose) print( '---------------------------------------------------------------') print(line) print( '---------------------------------------------------------------') # 集計結果をファイルに出力 with open('result.txt', 'a') as f: f.writelines(line + '\n') ns_epoch.append(which_episode) pcts_win.append(float(total_win) / record_interval * 100) pcts_win_or_draw.append( float(total_win + total_draw) / record_interval * 100) pcts_lose.append(float(total_win) / record_interval * 100) total_win = 0 total_draw = 0 total_lose = 0
def runEpisode(is_learning_episode): global whichEpisode, learningEpisode RLGlue.RL_episode(10000) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() whichEpisode += 1 if is_learning_episode: learningEpisode += 1 #print "Episode " + str(learningEpisode) + "/" + str(whichEpisode) + "\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t " print "Episode %d/%d\t %d steps \t %.1f total reward\t" % ( learningEpisode, whichEpisode, totalSteps, totalReward) else: #print "Evaluation ::\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t " print "Evaluation ::\t %d steps \t %.1f total reward" % (totalSteps, totalReward) with open('eval_dump.json', 'a') as f: json.dump( { "Steps": totalSteps, "Episode": whichEpisode, "Reward": totalReward }, f) f.write('\n') return totalSteps
def run_episode(self): """ Run a single episode """ # Update epsilon ''' phase_len = self.episodes / 3 if self.episode_number == phase_len * 2: # Start low phase RLGlue.RL_agent_message('set epsilon %f' % (self.epsilon_low)) elif self.episode_number >= phase_len and self.episode_number < phase_len * 2: # In decr phase epsilon = float(RLGlue.RL_agent_message('get epsilon')) epsilon += (self.epsilon_decr - self.epsilon_high) / phase_len RLGlue.RL_agent_message('set epsilon %f' % (epsilon)) elif self.episode_number == 0: # Start high phase RLGlue.RL_agent_message('set epsilon %f' % (self.epsilon_high)) ''' terminal = RLGlue.RL_episode(0) # 0 - run until terminal steps = RLGlue.RL_num_steps() reward = RLGlue.RL_return() #print "\nEpisode %d\t %d steps\t reward: %d" % (self.episode_number, steps, reward) #print "Episode "+str(episode_number)+"\t "+str(totalSteps)+ " steps \t" + str(totalReward) + " total reward\t " + str(terminal) + " natural end" self.returns[self.episode_number] = ( reward + self.returns[self.episode_number] * (self.instance - 1)) / self.instance self.steps[self.episode_number] = (steps + self.steps[self.episode_number] * (self.instance - 1)) / self.instance self.episode_number += 1
def run_epoch(epoch, num_steps, prefix): steps_left = num_steps while steps_left > 0: print prefix + " epoch: ", epoch, "steps_left: ", steps_left terminal = RLGlue.RL_episode(steps_left) if not terminal: RLGlue.RL_agent_message("episode_end") steps_left -= RLGlue.RL_num_steps()
def runEpisode(stepLimit, trial): global whichEpisode terminal=RLGlue.RL_episode(stepLimit) totalSteps=RLGlue.RL_num_steps() totalReward=RLGlue.RL_return() print "Experiment "+str(trial + 1)+"\t Episode "+str(whichEpisode)+"\t "+str(totalSteps)+ " steps \t" + str(totalReward) + " total reward\t " + str(terminal) + " natural end" whichEpisode=whichEpisode+1
def main(): num_episodes = 10000000 max_steps_per_episode = 50000 RLGlue.RL_init() for episode in range(0,num_episodes): RLGlue.RL_episode(max_steps_per_episode) #print "Episode finished.", time.time() #print "Score: ", RLGlue.RL_return() RLGlue.RL_agent_message("save_data data.pkl");
def run_experiment(maxsteps=100, numeps=1): taskSpec = RLGlue.RL_init() for ep in range(numeps): terminal = RLGlue.RL_episode(maxsteps) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() print "Episode " + str(ep) + "\t " + str( totalSteps) + " steps \t" + str( totalReward) + " total reward\t " + str( terminal) + " natural end" RLGlue.RL_cleanup()
def runEpisode(stepLimit): # stepLimit of 0 implies no limit global whichEpisode terminal = RLGlue.RL_episode(stepLimit) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() print "Episode " + str(whichEpisode) + "\t " + str( totalSteps) + " steps \t" + str(totalReward) + " total reward\t " whichEpisode = whichEpisode + 1
def offlineDemo(): statistics = [] this_score = evaluateAgent() printScore(0, this_score) statistics.append(this_score) for i in range(0, 20): for j in range(0, 25): RLGlue.RL_episode(0) this_score = evaluateAgent() printScore((i + 1) * 25, this_score) statistics.append(this_score) saveResultToCSV(statistics, "results.csv")
def runEpisode(stepLimit): global whichEpisode RLGlue.RL_agent_message('reset') terminal = RLGlue.RL_episode(stepLimit) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() print("Episode " + str(whichEpisode)), print("\t " + str(totalSteps)), print(" steps \t" + str(totalReward)), print " total reward\t " + str(terminal) + " natural end" RLGlue.RL_agent_message('episode_end') whichEpisode = whichEpisode + 1
def runEpisode(is_learning_episode): global whichEpisode, learningEpisode RLGlue.RL_episode(0) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() whichEpisode += 1 if is_learning_episode: learningEpisode += 1 print("Episode " + str(learningEpisode) + "\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t ") else: print("Evaluation ::\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t ")
def demo(): statistics = [] episodeLength = 100 #this_score = evaluateAgent() #printScore(0, this_score) #statistics.append(this_score) for i in range(1, 1000): RLGlue.RL_env_message("set-start-state " + S) RLGlue.RL_start() RLGlue.RL_episode(episodeLength) this_return = RLGlue.RL_return() print "%d\t\t%.2f" % (i, this_return) statistics.append(this_return) saveResultToCSV(statistics, "MyResults_sarsa1000_ver2.csv")
def run_episode(self): """ Run a single episode """ terminal = RLGlue.RL_episode(10) steps = RLGlue.RL_num_steps() reward = RLGlue.RL_return() #print "Episode %d\t %d steps\t reward: %d" % (episode_number, steps, reward) #print "Episode "+str(episode_number)+"\t "+str(totalSteps)+ " steps \t" + str(totalReward) + " total reward\t " + str(terminal) + " natural end" self.total_reward += reward # Update average x = self.total_reward / (self.episode_number + 1) self.results[self.episode_number] += (x - self.results[self.episode_number]) / self.instance self.episode_number += 1
def run_epoch(epoch, num_steps, prefix): """ Run one 'epoch' of training or testing, where an epoch is defined by the number of steps executed. Prints a progress report after every trial Arguments: num_steps - steps per epoch prefix - string to print ('training' or 'testing') """ steps_left = num_steps while steps_left > 0: print prefix + " epoch: ", epoch, "steps_left: ", steps_left terminal = RLGlue.RL_episode(steps_left) if not terminal: RLGlue.RL_agent_message("episode_end") steps_left -= RLGlue.RL_num_steps()
def evaluateAgent(): sum = 0 sum_of_squares = 0 n = 10 episodeLength = 100 RLGlue.RL_agent_message("freeze learning") #print "FREEZE LEARNING" for i in range(0, n): RLGlue.RL_episode(100) this_return = RLGlue.RL_return() sum += this_return sum_of_squares += this_return**2 mean = sum / n variance = (sum_of_squares - n * mean * mean) / (n - 1.0) standard_dev = math.sqrt(variance) RLGlue.RL_agent_message("unfreeze learning") #print "UNFREEZE LEARNING" return mean, standard_dev
def runEpisode(is_learning_episode): global whichEpisode, learningEpisode RLGlue.RL_episode(0) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() whichEpisode += 1 if is_learning_episode: learningEpisode += 1 logger.info("{},{},{},{}".format( dt.now().strftime("%Y-%m-%d_%H:%M:%S"), learningEpisode, totalSteps, totalReward)) print "Episode " + str(learningEpisode) + "\t " + str( totalSteps) + " steps \t" + str( totalReward) + " total reward\t " + dt.now().strftime( "%Y%m%d_%H%M%S") else: print "Evaluation ::\t " + str(totalSteps) + " steps \t" + str( totalReward) + " total reward\t "
def offlineDemo(): this_score = evaluateAgent() printScore(0, this_score) theFile = open("results.csv", "w") theFile.close() if os.path.isfile("Archive.csv"): os.remove('Archive.csv') for i in range(0, 200): for j in range(0, 50): RLGlue.RL_episode(0) RLGlue.RL_env_message("stop print") if j % 20 == 0 and i > 0: RLGlue.RL_env_message("print") printScore((i + 1) * 50, this_score) this_score = evaluateAgent() printScore((i + 1) * 50, this_score) theFile = open("results.csv", "a") theFile.write("%d\t%.2f\t%.2f\n" % ((i) * 50, this_score[0], this_score[1])) theFile.close() os.rename('results.csv', 'Archive.csv')
def runEpisode(is_learning_episode): global whichEpisode, learningEpisode RLGlue.RL_episode(0) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() whichEpisode += 1 if is_learning_episode: learningEpisode += 1 print "Episode " + str(learningEpisode) + "\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t " else: print "Evaluation ::\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t " # write reward in csv file list_csv = [str(learningEpisode), str(totalReward)] f_csv = open('reward.csv', 'a') writer_r = csv.writer(f_csv, lineterminator = '\n') writer_r.writerow(list_csv) f_csv.close()
def offlineDemo(): statistics = [] this_score = evaluateAgent() printScore(0, this_score) statistics.append(this_score) for i in range(0, 20): for j in range(0, 25): RLGlue.RL_episode(0) this_score = evaluateAgent() printScore((i + 1) * 25, this_score) statistics.append(this_score) print "Printing out sample trajectory" trajectory = recordTrajectory() for roat in trajectory: print "State:", roat.o.intArray[0], "Action:", roat.a.intArray[ 0], "Reward:", roat.r print "Done" saveResultToCSV(statistics, "results.csv")
def main(): whichTrainingMDP = 1 # Uncomment ONE of the following lines to choose your experiment #loadTetris(whichTrainingMDP); #put the desired parameter set in where MDP is in [0,19] #loadHelicopter(whichTrainingMDP); #put the desired parameter set in where MDP is in [0,9] #loadAcrobot(whichTrainingMDP); #put the desired parameter set in where MDP is in [1,49] #0 is standard acrobot #loadPolyathlon(whichTrainingMDP); #put the desired parameter set in where MDP is in [0,5] loadMario(True, True, 121, 0, 99, whichTrainingMDP) # and then, # just run the experiment: RLGlue.RL_init() episodesToRun = 10 totalSteps = 0 for i in range(episodesToRun): RLGlue.RL_episode(20000) thisSteps = RLGlue.RL_num_steps() print "Total steps in episode %d is %d" % (i, thisSteps) totalSteps += thisSteps print "Total steps : %d\n" % (totalSteps) RLGlue.RL_cleanup()
def evaluateAgent(): sum = 0 sum_of_squares = 0 this_return = 0 mean = 0 variance = 0 n = 10 RLGlue.RL_agent_message("freeze learning") for i in range(0, n): # We use a cutoff here in case the #policy is bad and will never end an episode RLGlue.RL_episode(5000) this_return = RLGlue.RL_return() sum += this_return sum_of_squares += this_return**2 mean = sum / n variance = (sum_of_squares - n * mean * mean) / (n - 1.0) standard_dev = math.sqrt(variance) RLGlue.RL_agent_message("unfreeze learning") return mean, standard_dev
def evaluateAgent(): sum=0; sum_of_squares=0; this_return=0; mean=0; variance=0; n=10; RLGlue.RL_agent_message("freeze learning"); for i in range(0,n): # We use a cutoff here in case the #policy is bad and will never end an episode RLGlue.RL_episode(5000); this_return=RLGlue.RL_return(); sum+=this_return; sum_of_squares+=this_return**2; mean=sum/n; variance = (sum_of_squares - n*mean*mean)/(n - 1.0); standard_dev=math.sqrt(variance); RLGlue.RL_agent_message("unfreeze learning"); return mean,standard_dev;
def run_episode(training=True): global total_episode, learned_episode, total_time, learned_steps, csv_episode, highscore, num_finished_eval_episode, evaluation_scores start_time = time.time() RLGlue.RL_episode(0) num_steps = RLGlue.RL_num_steps() total_reward = RLGlue.RL_return() total_episode += 1 elapsed_time = time.time() - start_time total_time += elapsed_time epoch = int(learned_steps / time_steps_per_epoch) if training: learned_steps += num_steps learned_episode += 1 sec = int(elapsed_time) total_minutes = int(total_time / 60) csv_episode.append([learned_episode, total_reward, num_steps, sec, total_minutes, epoch, learned_steps]) if total_reward > highscore: highscore = total_reward csv_training_highscore.append([learned_episode, highscore, total_minutes, epoch]) print "Episode:", learned_episode, "epoch:", epoch, "num_steps:", num_steps, "total_reward:", total_reward, "time:", sec, "sec", "total_time:", total_minutes, "min" return num_steps, total_reward
# limitations under the License. # # $Revision$ # $Date$ # $Author$ # $HeadURL$ import sys import rlglue.RLGlue as RLGlue from glue_test import glue_test tester = glue_test("test_rl_episode") task_spec = RLGlue.RL_init() isTerminal = RLGlue.RL_episode(0) tester.check_fail(isTerminal != 1) tester.check_fail(RLGlue.RL_num_steps() != 5) isTerminal = RLGlue.RL_episode(1) tester.check_fail(isTerminal != 0) tester.check_fail(RLGlue.RL_num_steps() != 1) isTerminal = RLGlue.RL_episode(2) tester.check_fail(isTerminal != 0) tester.check_fail(RLGlue.RL_num_steps() != 2) isTerminal = RLGlue.RL_episode(4) tester.check_fail(isTerminal != 0) tester.check_fail(RLGlue.RL_num_steps() != 4)
for j in range(50): RLGlue.RL_episode(0) num_of_steps = num_of_steps + RLGlue.RL_num_steps() reward = reward + RLGlue.RL_return() avg_reward_b.append(reward/50) avg_steps_b.append(num_of_steps/50) ''' avg_steps_c = [] avg_reward_c = [] RLGlue.RL_env_message("set-start-state 2") for i in range(100): num_of_steps = 0 reward = 0 for j in range(50): RLGlue.RL_episode(0) num_of_steps = num_of_steps + RLGlue.RL_num_steps() reward = reward + RLGlue.RL_return() avg_reward_c.append(reward / 50) avg_steps_c.append(num_of_steps / 50) ''' plt.plot(avg_steps_a,'r') plt.ylabel('Average_Steps') plt.xlabel('Number of 50 episode runs') plt.title('Average_steps of A') plt.show() plt.plot(avg_reward_a,'r') plt.ylabel('Average_Reward') plt.xlabel('Number of 50 episode runs')
''' avg_steps_c = [] avg_reward_c = [] RLGlue.RL_env_message("set-start-state 2") for i in range(50): num_of_steps = 0 reward = 0 j = 0 jobs = [] while j < 20: #p=multiprocessing.Process(target=RLGlue.RL_episode,args=(100000,)) #jobs.append(p) #p.start RLGlue.RL_episode(100000) j = j + 1 if RLGlue.RL_return() == 0: j = j - 1 else: num_of_steps = num_of_steps + RLGlue.RL_num_steps() reward = reward + RLGlue.RL_return() print RLGlue.RL_return() avg_reward_c.append(reward / 50) avg_steps_c.append(num_of_steps / 50) file = open("values.txt", 'w') file.write(str(avg_steps_c)) file.write(" ") file.write(str(avg_reward_c))
# # $Revision: 617 $ # $Date: 2009-02-05 02:24:12 -0700 (Thu, 05 Feb 2009) $ # $Author: gabalz $ # $HeadURL: http://rl-glue-ext.googlecode.com/svn/trunk/projects/codecs/Python/src/tests/test_rl_episode_experiment.py $ import sys import rlglue.RLGlue as RLGlue from glue_test import glue_test tester =glue_test("test_rl_episode") task_spec=RLGlue.RL_init() isTerminal = RLGlue.RL_episode(0); tester.check_fail(isTerminal!=1); tester.check_fail(RLGlue.RL_num_steps()!=5); isTerminal = RLGlue.RL_episode(1); tester.check_fail(isTerminal!=0); tester.check_fail(RLGlue.RL_num_steps()!=1); isTerminal = RLGlue.RL_episode(2); tester.check_fail(isTerminal!=0); tester.check_fail(RLGlue.RL_num_steps()!=2); isTerminal = RLGlue.RL_episode(4); tester.check_fail(isTerminal!=0); tester.check_fail(RLGlue.RL_num_steps()!=4);