def run_epoch(self, epoch, num_steps, prefix, collect_reward=False): """ Run one 'epoch' of training or testing, where an epoch is defined by the number of steps executed. Prints a progress report after every trial Arguments: num_steps - steps per epoch prefix - string to print ('training' or 'testing') """ steps_left = num_steps if prefix == "training" or not collect_reward: while steps_left > 0: print prefix + " epoch: ", epoch, "steps_left: ", steps_left sys.stdout.flush() terminal = RLGlue.RL_episode(steps_left) if not terminal: RLGlue.RL_agent_message("episode_end") steps_left -= RLGlue.RL_num_steps() elif prefix == "testing": total_reward = 0 episode_counter = 0 terminal = False while steps_left > 0: if terminal: print prefix + " epoch: ", epoch, "steps_left: ", steps_left sys.stdout.flush() roat = RLGlue.RL_step() reward = roat.r terminal = roat.terminal total_reward += reward episode_counter += terminal steps_left -= 1 return total_reward, episode_counter
def runEpisode(is_learning_episode): global whichEpisode, learningEpisode RLGlue.RL_episode(10000) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() whichEpisode += 1 if is_learning_episode: learningEpisode += 1 #print "Episode " + str(learningEpisode) + "/" + str(whichEpisode) + "\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t " print "Episode %d/%d\t %d steps \t %.1f total reward\t" % ( learningEpisode, whichEpisode, totalSteps, totalReward) else: #print "Evaluation ::\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t " print "Evaluation ::\t %d steps \t %.1f total reward" % (totalSteps, totalReward) with open('eval_dump.json', 'a') as f: json.dump( { "Steps": totalSteps, "Episode": whichEpisode, "Reward": totalReward }, f) f.write('\n') return totalSteps
def run_episode(self): """ Run a single episode """ # Update epsilon ''' phase_len = self.episodes / 3 if self.episode_number == phase_len * 2: # Start low phase RLGlue.RL_agent_message('set epsilon %f' % (self.epsilon_low)) elif self.episode_number >= phase_len and self.episode_number < phase_len * 2: # In decr phase epsilon = float(RLGlue.RL_agent_message('get epsilon')) epsilon += (self.epsilon_decr - self.epsilon_high) / phase_len RLGlue.RL_agent_message('set epsilon %f' % (epsilon)) elif self.episode_number == 0: # Start high phase RLGlue.RL_agent_message('set epsilon %f' % (self.epsilon_high)) ''' terminal = RLGlue.RL_episode(0) # 0 - run until terminal steps = RLGlue.RL_num_steps() reward = RLGlue.RL_return() #print "\nEpisode %d\t %d steps\t reward: %d" % (self.episode_number, steps, reward) #print "Episode "+str(episode_number)+"\t "+str(totalSteps)+ " steps \t" + str(totalReward) + " total reward\t " + str(terminal) + " natural end" self.returns[self.episode_number] = ( reward + self.returns[self.episode_number] * (self.instance - 1)) / self.instance self.steps[self.episode_number] = (steps + self.steps[self.episode_number] * (self.instance - 1)) / self.instance self.episode_number += 1
def runEpisode(step_limit): global which_episode global total_win global total_draw global total_lose global ns_epoch global pcts_win global pcts_win_or_draw global pcts_lose which_episode += 1 # ゲーム1回 開始 terminal = RLGlue.RL_episode(step_limit) # 勝負がつくまでのステップ数と報酬を取得 total_steps = RLGlue.RL_num_steps() total_reward = RLGlue.RL_return() # 今回の結果を表示 r_win = 1.0 r_draw = -0.5 r_lose = -1.0 if total_reward == r_win: total_win += 1 elif total_reward == r_draw: total_draw += 1 elif total_reward == r_lose: total_lose += 1 print("Episode " + str(which_episode) + "\t " + str(total_steps) + " steps \t" + str(total_reward) + " total reward\t " + str(terminal) + " natural end") # 100回毎に勝敗を集計 record_interval = 100 if which_episode % record_interval == 0: line = 'Episode: {}, {} wins, {} draws, {} loses'.format( which_episode, total_win, total_draw, total_lose) print( '---------------------------------------------------------------') print(line) print( '---------------------------------------------------------------') # 集計結果をファイルに出力 with open('result.txt', 'a') as f: f.writelines(line + '\n') ns_epoch.append(which_episode) pcts_win.append(float(total_win) / record_interval * 100) pcts_win_or_draw.append( float(total_win + total_draw) / record_interval * 100) pcts_lose.append(float(total_win) / record_interval * 100) total_win = 0 total_draw = 0 total_lose = 0
def run_epoch(epoch, num_steps, prefix): steps_left = num_steps while steps_left > 0: print prefix + " epoch: ", epoch, "steps_left: ", steps_left terminal = RLGlue.RL_episode(steps_left) if not terminal: RLGlue.RL_agent_message("episode_end") steps_left -= RLGlue.RL_num_steps()
def runEpisode(stepLimit, trial): global whichEpisode terminal=RLGlue.RL_episode(stepLimit) totalSteps=RLGlue.RL_num_steps() totalReward=RLGlue.RL_return() print "Experiment "+str(trial + 1)+"\t Episode "+str(whichEpisode)+"\t "+str(totalSteps)+ " steps \t" + str(totalReward) + " total reward\t " + str(terminal) + " natural end" whichEpisode=whichEpisode+1
def run_experiment(maxsteps=100, numeps=1): taskSpec = RLGlue.RL_init() for ep in range(numeps): terminal = RLGlue.RL_episode(maxsteps) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() print "Episode " + str(ep) + "\t " + str( totalSteps) + " steps \t" + str( totalReward) + " total reward\t " + str( terminal) + " natural end" RLGlue.RL_cleanup()
def runEpisode(stepLimit): # stepLimit of 0 implies no limit global whichEpisode terminal = RLGlue.RL_episode(stepLimit) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() print "Episode " + str(whichEpisode) + "\t " + str( totalSteps) + " steps \t" + str(totalReward) + " total reward\t " whichEpisode = whichEpisode + 1
def runEpisode(stepLimit): global whichEpisode RLGlue.RL_agent_message('reset') terminal = RLGlue.RL_episode(stepLimit) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() print("Episode " + str(whichEpisode)), print("\t " + str(totalSteps)), print(" steps \t" + str(totalReward)), print " total reward\t " + str(terminal) + " natural end" RLGlue.RL_agent_message('episode_end') whichEpisode = whichEpisode + 1
def run_episode(self): """ Run a single episode """ terminal = RLGlue.RL_episode(10) steps = RLGlue.RL_num_steps() reward = RLGlue.RL_return() #print "Episode %d\t %d steps\t reward: %d" % (episode_number, steps, reward) #print "Episode "+str(episode_number)+"\t "+str(totalSteps)+ " steps \t" + str(totalReward) + " total reward\t " + str(terminal) + " natural end" self.total_reward += reward # Update average x = self.total_reward / (self.episode_number + 1) self.results[self.episode_number] += (x - self.results[self.episode_number]) / self.instance self.episode_number += 1
def runEpisode(is_learning_episode): global whichEpisode, learningEpisode RLGlue.RL_episode(0) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() whichEpisode += 1 if is_learning_episode: learningEpisode += 1 print("Episode " + str(learningEpisode) + "\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t ") else: print("Evaluation ::\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t ")
def run_epoch(epoch, num_steps, prefix): """ Run one 'epoch' of training or testing, where an epoch is defined by the number of steps executed. Prints a progress report after every trial Arguments: num_steps - steps per epoch prefix - string to print ('training' or 'testing') """ steps_left = num_steps while steps_left > 0: print prefix + " epoch: ", epoch, "steps_left: ", steps_left terminal = RLGlue.RL_episode(steps_left) if not terminal: RLGlue.RL_agent_message("episode_end") steps_left -= RLGlue.RL_num_steps()
def runEpisode(is_learning_episode): global whichEpisode, learningEpisode RLGlue.RL_episode(0) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() whichEpisode += 1 if is_learning_episode: learningEpisode += 1 print "Episode " + str(learningEpisode) + "\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t " else: print "Evaluation ::\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t " # write reward in csv file list_csv = [str(learningEpisode), str(totalReward)] f_csv = open('reward.csv', 'a') writer_r = csv.writer(f_csv, lineterminator = '\n') writer_r.writerow(list_csv) f_csv.close()
def runEpisode(is_learning_episode): global whichEpisode, learningEpisode RLGlue.RL_episode(0) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() whichEpisode += 1 if is_learning_episode: learningEpisode += 1 logger.info("{},{},{},{}".format( dt.now().strftime("%Y-%m-%d_%H:%M:%S"), learningEpisode, totalSteps, totalReward)) print "Episode " + str(learningEpisode) + "\t " + str( totalSteps) + " steps \t" + str( totalReward) + " total reward\t " + dt.now().strftime( "%Y%m%d_%H%M%S") else: print "Evaluation ::\t " + str(totalSteps) + " steps \t" + str( totalReward) + " total reward\t "
def main(): whichTrainingMDP = 1 # Uncomment ONE of the following lines to choose your experiment #loadTetris(whichTrainingMDP); #put the desired parameter set in where MDP is in [0,19] #loadHelicopter(whichTrainingMDP); #put the desired parameter set in where MDP is in [0,9] #loadAcrobot(whichTrainingMDP); #put the desired parameter set in where MDP is in [1,49] #0 is standard acrobot #loadPolyathlon(whichTrainingMDP); #put the desired parameter set in where MDP is in [0,5] loadMario(True, True, 121, 0, 99, whichTrainingMDP) # and then, # just run the experiment: RLGlue.RL_init() episodesToRun = 10 totalSteps = 0 for i in range(episodesToRun): RLGlue.RL_episode(20000) thisSteps = RLGlue.RL_num_steps() print "Total steps in episode %d is %d" % (i, thisSteps) totalSteps += thisSteps print "Total steps : %d\n" % (totalSteps) RLGlue.RL_cleanup()
def run_episode(training=True): global total_episode, learned_episode, total_time, learned_steps, csv_episode, highscore, num_finished_eval_episode, evaluation_scores start_time = time.time() RLGlue.RL_episode(0) num_steps = RLGlue.RL_num_steps() total_reward = RLGlue.RL_return() total_episode += 1 elapsed_time = time.time() - start_time total_time += elapsed_time epoch = int(learned_steps / time_steps_per_epoch) if training: learned_steps += num_steps learned_episode += 1 sec = int(elapsed_time) total_minutes = int(total_time / 60) csv_episode.append([learned_episode, total_reward, num_steps, sec, total_minutes, epoch, learned_steps]) if total_reward > highscore: highscore = total_reward csv_training_highscore.append([learned_episode, highscore, total_minutes, epoch]) print "Episode:", learned_episode, "epoch:", epoch, "num_steps:", num_steps, "total_reward:", total_reward, "time:", sec, "sec", "total_time:", total_minutes, "min" return num_steps, total_reward
# $Revision$ # $Date$ # $Author$ # $HeadURL$ import sys import rlglue.RLGlue as RLGlue from glue_test import glue_test tester = glue_test("test_rl_episode") task_spec = RLGlue.RL_init() isTerminal = RLGlue.RL_episode(0) tester.check_fail(isTerminal != 1) tester.check_fail(RLGlue.RL_num_steps() != 5) isTerminal = RLGlue.RL_episode(1) tester.check_fail(isTerminal != 0) tester.check_fail(RLGlue.RL_num_steps() != 1) isTerminal = RLGlue.RL_episode(2) tester.check_fail(isTerminal != 0) tester.check_fail(RLGlue.RL_num_steps() != 2) isTerminal = RLGlue.RL_episode(4) tester.check_fail(isTerminal != 0) tester.check_fail(RLGlue.RL_num_steps() != 4) isTerminal = RLGlue.RL_episode(5) tester.check_fail(isTerminal != 0)
""" Manual experiment for testing the environment """ import sys import os import time from rlglue import RLGlue # Initialize RL Glue RLGlue.RL_init() RLGlue.RL_env_message('debug=True') RLGlue.RL_start() running = True reward = 0 while running: result = RLGlue.RL_step() running = not result.terminal steps = RLGlue.RL_num_steps() R = RLGlue.RL_return() print 'Experiment ended after %d steps with a return of %d' % (steps, R) RLGlue.RL_cleanup()
def runEpisode(stepLimit): global whichEpisode terminal = RLGlue.RL_episode(stepLimit) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() print "Experiment " + str(args.expid) + "\t Episode " + str( whichEpisode) + "\t " + str(totalSteps) + " steps \t" + str( totalReward) + " total reward\t " + str(terminal) + " natural end" whichEpisode = whichEpisode + 1 #Main Program starts here # Remember that stepLimit of 0 means there is no limit at all!*/ for t in range(args.numtrials): print 'trial: ' + str(t) whichEpisode = 0 taskSpec = RLGlue.RL_init() steps = np.zeros(args.numeps) rews = np.zeros(args.numeps) for ep in range(args.numeps): runEpisode(args.maxsteps) steps[ep] = RLGlue.RL_num_steps() rews[ep] = RLGlue.RL_return() print 'trial finished, final reward: ' + str(rews[-1]) #with open(args.path+'/'+args.logname+'_'+str(args.expid)+'_'+str(t)+'.pkl','w') as f: # pickle.dump((steps,rews),f,-1) RLGlue.RL_cleanup()
print("\n\n----------Stepping through an episode----------") #We could also start over and do another experiment */ taskSpec = RLGlue.RL_init() #We could run one step at a time instead of one episode at a time */ #Start the episode */ startResponse = RLGlue.RL_start() firstObservation = startResponse.o.intArray[0] firstAction = startResponse.a.intArray[0] print("First observation and action were: " + str(firstObservation) + " and: " + str(firstAction)) #Run one step */ stepResponse = RLGlue.RL_step() #Run until the episode ends*/ while (stepResponse.terminal != 1): stepResponse = RLGlue.RL_step() #if (stepResponse.terminal != 1) #Could optionally print state,action pairs */ #printf("(%d,%d) ",stepResponse.o.intArray[0],stepResponse.a.intArray[0])*/ print("\n\n----------Summary----------") totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() print("It ran for " + str(totalSteps) + " steps, total reward was: " + str(totalReward)) RLGlue.RL_cleanup()
RLGlue.RL_episode(0) num_of_steps = num_of_steps + RLGlue.RL_num_steps() reward = reward + RLGlue.RL_return() avg_reward_b.append(reward/50) avg_steps_b.append(num_of_steps/50) ''' avg_steps_c = [] avg_reward_c = [] RLGlue.RL_env_message("set-start-state 2") for i in range(100): num_of_steps = 0 reward = 0 for j in range(50): RLGlue.RL_episode(0) num_of_steps = num_of_steps + RLGlue.RL_num_steps() reward = reward + RLGlue.RL_return() avg_reward_c.append(reward / 50) avg_steps_c.append(num_of_steps / 50) ''' plt.plot(avg_steps_a,'r') plt.ylabel('Average_Steps') plt.xlabel('Number of 50 episode runs') plt.title('Average_steps of A') plt.show() plt.plot(avg_reward_a,'r') plt.ylabel('Average_Reward') plt.xlabel('Number of 50 episode runs') plt.title('Average_Reward of A')