def runEpisode(step_limit): global which_episode global total_win global total_draw global total_lose global ns_epoch global pcts_win global pcts_win_or_draw global pcts_lose which_episode += 1 # ゲーム1回 開始 terminal = RLGlue.RL_episode(step_limit) # 勝負がつくまでのステップ数と報酬を取得 total_steps = RLGlue.RL_num_steps() total_reward = RLGlue.RL_return() # 今回の結果を表示 r_win = 1.0 r_draw = -0.5 r_lose = -1.0 if total_reward == r_win: total_win += 1 elif total_reward == r_draw: total_draw += 1 elif total_reward == r_lose: total_lose += 1 print("Episode " + str(which_episode) + "\t " + str(total_steps) + " steps \t" + str(total_reward) + " total reward\t " + str(terminal) + " natural end") # 100回毎に勝敗を集計 record_interval = 100 if which_episode % record_interval == 0: line = 'Episode: {}, {} wins, {} draws, {} loses'.format( which_episode, total_win, total_draw, total_lose) print( '---------------------------------------------------------------') print(line) print( '---------------------------------------------------------------') # 集計結果をファイルに出力 with open('result.txt', 'a') as f: f.writelines(line + '\n') ns_epoch.append(which_episode) pcts_win.append(float(total_win) / record_interval * 100) pcts_win_or_draw.append( float(total_win + total_draw) / record_interval * 100) pcts_lose.append(float(total_win) / record_interval * 100) total_win = 0 total_draw = 0 total_lose = 0
def main(): """ Run the desired number of training epochs, a testing epoch is conducted after each training epoch. """ parser = argparse.ArgumentParser(description='Neural rl experiment.') parser.add_argument('--num_epochs', type=int, default=100, help='Number of training epochs') parser.add_argument('--epoch_length', type=int, default=50000, help='Number of steps per epoch') parser.add_argument('--test_length', type=int, default=10000, help='Number of steps per test') args = parser.parse_args() RLGlue.RL_init() for epoch in range(1, args.num_epochs + 1): RLGlue.RL_agent_message("training") run_epoch(epoch, args.epoch_length, "training") RLGlue.RL_agent_message("start_testing") run_epoch(epoch, args.test_length, "testing") RLGlue.RL_agent_message("finish_testing " + str(epoch))
def run_episode(self): """ Run a single episode """ # Update epsilon ''' phase_len = self.episodes / 3 if self.episode_number == phase_len * 2: # Start low phase RLGlue.RL_agent_message('set epsilon %f' % (self.epsilon_low)) elif self.episode_number >= phase_len and self.episode_number < phase_len * 2: # In decr phase epsilon = float(RLGlue.RL_agent_message('get epsilon')) epsilon += (self.epsilon_decr - self.epsilon_high) / phase_len RLGlue.RL_agent_message('set epsilon %f' % (epsilon)) elif self.episode_number == 0: # Start high phase RLGlue.RL_agent_message('set epsilon %f' % (self.epsilon_high)) ''' terminal = RLGlue.RL_episode(0) # 0 - run until terminal steps = RLGlue.RL_num_steps() reward = RLGlue.RL_return() #print "\nEpisode %d\t %d steps\t reward: %d" % (self.episode_number, steps, reward) #print "Episode "+str(episode_number)+"\t "+str(totalSteps)+ " steps \t" + str(totalReward) + " total reward\t " + str(terminal) + " natural end" self.returns[self.episode_number] = ( reward + self.returns[self.episode_number] * (self.instance - 1)) / self.instance self.steps[self.episode_number] = (steps + self.steps[self.episode_number] * (self.instance - 1)) / self.instance self.episode_number += 1
def run_epoch(self, epoch, num_steps, prefix, collect_reward=False): """ Run one 'epoch' of training or testing, where an epoch is defined by the number of steps executed. Prints a progress report after every trial Arguments: num_steps - steps per epoch prefix - string to print ('training' or 'testing') """ steps_left = num_steps if prefix == "training" or not collect_reward: while steps_left > 0: print prefix + " epoch: ", epoch, "steps_left: ", steps_left sys.stdout.flush() terminal = RLGlue.RL_episode(steps_left) if not terminal: RLGlue.RL_agent_message("episode_end") steps_left -= RLGlue.RL_num_steps() elif prefix == "testing": total_reward = 0 episode_counter = 0 terminal = False while steps_left > 0: if terminal: print prefix + " epoch: ", epoch, "steps_left: ", steps_left sys.stdout.flush() roat = RLGlue.RL_step() reward = roat.r terminal = roat.terminal total_reward += reward episode_counter += terminal steps_left -= 1 return total_reward, episode_counter
def runEpisode(is_learning_episode): global whichEpisode, learningEpisode RLGlue.RL_episode(10000) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() whichEpisode += 1 if is_learning_episode: learningEpisode += 1 #print "Episode " + str(learningEpisode) + "/" + str(whichEpisode) + "\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t " print "Episode %d/%d\t %d steps \t %.1f total reward\t" % ( learningEpisode, whichEpisode, totalSteps, totalReward) else: #print "Evaluation ::\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t " print "Evaluation ::\t %d steps \t %.1f total reward" % (totalSteps, totalReward) with open('eval_dump.json', 'a') as f: json.dump( { "Steps": totalSteps, "Episode": whichEpisode, "Reward": totalReward }, f) f.write('\n') return totalSteps
def run_epoch(epoch, num_steps, prefix): steps_left = num_steps while steps_left > 0: print prefix + " epoch: ", epoch, "steps_left: ", steps_left terminal = RLGlue.RL_episode(steps_left) if not terminal: RLGlue.RL_agent_message("episode_end") steps_left -= RLGlue.RL_num_steps()
def runEpisode(stepLimit, trial): global whichEpisode terminal=RLGlue.RL_episode(stepLimit) totalSteps=RLGlue.RL_num_steps() totalReward=RLGlue.RL_return() print "Experiment "+str(trial + 1)+"\t Episode "+str(whichEpisode)+"\t "+str(totalSteps)+ " steps \t" + str(totalReward) + " total reward\t " + str(terminal) + " natural end" whichEpisode=whichEpisode+1
def recordTrajectory(): RLGlue.RL_start() trajectory = [] while True: roat = RLGlue.RL_step() trajectory.append(roat) if roat.terminal: break return trajectory
def main(): num_episodes = 10000000 max_steps_per_episode = 50000 RLGlue.RL_init() for episode in range(0,num_episodes): RLGlue.RL_episode(max_steps_per_episode) #print "Episode finished.", time.time() #print "Score: ", RLGlue.RL_return() RLGlue.RL_agent_message("save_data data.pkl");
def run_experiment(maxsteps=100, numeps=1): taskSpec = RLGlue.RL_init() for ep in range(numeps): terminal = RLGlue.RL_episode(maxsteps) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() print "Episode " + str(ep) + "\t " + str( totalSteps) + " steps \t" + str( totalReward) + " total reward\t " + str( terminal) + " natural end" RLGlue.RL_cleanup()
def main(): RLGlue.RL_init() is_testing = len(sys.argv) > 1 and sys.argv[1] == 'test' for epoch in xrange(NUM_EPOCHS): if is_testing: RLGlue.RL_agent_message("start_testing") run_epoch(epoch, TEST_LENGTH, "testing") RLGlue.RL_agent_message("finish_testing " + str(epoch)) else: run_epoch(epoch, EPOCH_LENGTH, "training") RLGlue.RL_agent_message("finish_epoch " + str(epoch))
def runEpisode(stepLimit): # stepLimit of 0 implies no limit global whichEpisode terminal = RLGlue.RL_episode(stepLimit) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() print "Episode " + str(whichEpisode) + "\t " + str( totalSteps) + " steps \t" + str(totalReward) + " total reward\t " whichEpisode = whichEpisode + 1
def runEpisode(stepLimit): global whichEpisode RLGlue.RL_agent_message('reset') terminal = RLGlue.RL_episode(stepLimit) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() print("Episode " + str(whichEpisode)), print("\t " + str(totalSteps)), print(" steps \t" + str(totalReward)), print " total reward\t " + str(terminal) + " natural end" RLGlue.RL_agent_message('episode_end') whichEpisode = whichEpisode + 1
def run(self): """ Run the desired number of training epochs, a testing epoch is conducted after each training epoch. """ RLGlue.RL_init() for epoch in range(1, self.num_epochs + 1): self.run_epoch(epoch, self.epoch_length, "training") RLGlue.RL_agent_message("finish_epoch " + str(epoch)) if self.test_length > 0: RLGlue.RL_agent_message("start_testing") self.run_epoch(epoch, self.test_length, "testing") RLGlue.RL_agent_message("finish_testing " + str(epoch))
def runEpisode(is_learning_episode): global whichEpisode, learningEpisode RLGlue.RL_episode(0) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() whichEpisode += 1 if is_learning_episode: learningEpisode += 1 print("Episode " + str(learningEpisode) + "\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t ") else: print("Evaluation ::\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t ")
def demo(): statistics = [] episodeLength = 100 #this_score = evaluateAgent() #printScore(0, this_score) #statistics.append(this_score) for i in range(1, 1000): RLGlue.RL_env_message("set-start-state " + S) RLGlue.RL_start() RLGlue.RL_episode(episodeLength) this_return = RLGlue.RL_return() print "%d\t\t%.2f" % (i, this_return) statistics.append(this_return) saveResultToCSV(statistics, "MyResults_sarsa1000_ver2.csv")
def run_episode(self): """ Run a single episode """ terminal = RLGlue.RL_episode(10) steps = RLGlue.RL_num_steps() reward = RLGlue.RL_return() #print "Episode %d\t %d steps\t reward: %d" % (episode_number, steps, reward) #print "Episode "+str(episode_number)+"\t "+str(totalSteps)+ " steps \t" + str(totalReward) + " total reward\t " + str(terminal) + " natural end" self.total_reward += reward # Update average x = self.total_reward / (self.episode_number + 1) self.results[self.episode_number] += (x - self.results[self.episode_number]) / self.instance self.episode_number += 1
def agent_step(self, reward, observation): action = None self.window.erase() self.window.addstr('STATE: %s\n' % (observation.intArray)) self.window.addstr('REWARD: %s\n' % (reward)) self.window.addstr('HIT UP, DOWN, LEFT or RIGHT to move...\n') self.window.refresh() try: c = self.window.getch() if c == curses.KEY_UP: action = 'N' elif c == curses.KEY_DOWN: action = 'S' elif c == curses.KEY_LEFT: action = 'W' elif c == curses.KEY_RIGHT: action = 'E' self.window.refresh() except KeyboardInterrupt: RLGlue.RL_cleanup() a = Action() if action: a.charArray = [action] return a
def __init__(self, episodes = 100): """ Initialize experiment """ self.episodes = episodes self.results = [0] * episodes RLGlue.RL_init() self.has_inited = True
def run_epoch(epoch, num_steps, prefix): """ Run one 'epoch' of training or testing, where an epoch is defined by the number of steps executed. Prints a progress report after every trial Arguments: num_steps - steps per epoch prefix - string to print ('training' or 'testing') """ steps_left = num_steps while steps_left > 0: print prefix + " epoch: ", epoch, "steps_left: ", steps_left terminal = RLGlue.RL_episode(steps_left) if not terminal: RLGlue.RL_agent_message("episode_end") steps_left -= RLGlue.RL_num_steps()
def start(self): print "\nExperiment starting!" taskSpec = RLGlue.RL_init() print taskSpec exp_params_for_agent = {} self.agent_params = self.message_agent(MessageType.exchange_params, exp_params_for_agent) # Keep overhead a bit lower by having functions inline def should_report(): self.step % args.report_freq == 0 def should_evaluate(): step % args.eval_freq == 0 and step > self.agent_params[ 'learn_start'] def should_save(): step % args.save_freq == 0 observ_action = RLGlue.RL_start() while self.step <= self.steps: observ_action_term = RLGlue.RL_step() # If game ends, start another if observ_action_term.terminal: # Not sure if we need to clean up after every episode, don't think so RLGlue.RL_start() self.n_train_episodes += 1 if should_report(): # TODO assert agent steps is equal print 'Steps: {}'.format(step) self.message_agent(MessageType.report) if should_evaluate(): pass if should_save(): pass print "A job well done." RLGlue.RL_cleanup()
def run(self): """ Run the experiment """ if self.has_inited: RLGlue.RL_cleanup() self.instance += 1 self.total_reward = 0 self.episode_number = 0 for i in xrange(self.episodes): self.run_episode()
def evaluateAgent(): sum = 0 sum_of_squares = 0 n = 10 episodeLength = 100 RLGlue.RL_agent_message("freeze learning") #print "FREEZE LEARNING" for i in range(0, n): RLGlue.RL_episode(100) this_return = RLGlue.RL_return() sum += this_return sum_of_squares += this_return**2 mean = sum / n variance = (sum_of_squares - n * mean * mean) / (n - 1.0) standard_dev = math.sqrt(variance) RLGlue.RL_agent_message("unfreeze learning") #print "UNFREEZE LEARNING" return mean, standard_dev
def __init__(self, **kwargs): """ Initialize experiment """ for k, v in kwargs.items(): setattr(self, k, v) self.returns = [0] * self.episodes self.steps = [0] * self.episodes self.po = ProgressOutput() RLGlue.RL_init() self.has_inited = True
def offlineDemo(): this_score = evaluateAgent() printScore(0, this_score) theFile = open("results.csv", "w") theFile.close() if os.path.isfile("Archive.csv"): os.remove('Archive.csv') for i in range(0, 200): for j in range(0, 50): RLGlue.RL_episode(0) RLGlue.RL_env_message("stop print") if j % 20 == 0 and i > 0: RLGlue.RL_env_message("print") printScore((i + 1) * 50, this_score) this_score = evaluateAgent() printScore((i + 1) * 50, this_score) theFile = open("results.csv", "a") theFile.write("%d\t%.2f\t%.2f\n" % ((i) * 50, this_score[0], this_score[1])) theFile.close() os.rename('results.csv', 'Archive.csv')
def runEpisode(is_learning_episode): global whichEpisode, learningEpisode RLGlue.RL_episode(0) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() whichEpisode += 1 if is_learning_episode: learningEpisode += 1 print "Episode " + str(learningEpisode) + "\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t " else: print "Evaluation ::\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t " # write reward in csv file list_csv = [str(learningEpisode), str(totalReward)] f_csv = open('reward.csv', 'a') writer_r = csv.writer(f_csv, lineterminator = '\n') writer_r.writerow(list_csv) f_csv.close()
def runEpisode(is_learning_episode): global whichEpisode, learningEpisode RLGlue.RL_episode(0) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() whichEpisode += 1 if is_learning_episode: learningEpisode += 1 logger.info("{},{},{},{}".format( dt.now().strftime("%Y-%m-%d_%H:%M:%S"), learningEpisode, totalSteps, totalReward)) print "Episode " + str(learningEpisode) + "\t " + str( totalSteps) + " steps \t" + str( totalReward) + " total reward\t " + dt.now().strftime( "%Y%m%d_%H%M%S") else: print "Evaluation ::\t " + str(totalSteps) + " steps \t" + str( totalReward) + " total reward\t "
def main(): whichTrainingMDP = 1 # Uncomment ONE of the following lines to choose your experiment #loadTetris(whichTrainingMDP); #put the desired parameter set in where MDP is in [0,19] #loadHelicopter(whichTrainingMDP); #put the desired parameter set in where MDP is in [0,9] #loadAcrobot(whichTrainingMDP); #put the desired parameter set in where MDP is in [1,49] #0 is standard acrobot #loadPolyathlon(whichTrainingMDP); #put the desired parameter set in where MDP is in [0,5] loadMario(True, True, 121, 0, 99, whichTrainingMDP) # and then, # just run the experiment: RLGlue.RL_init() episodesToRun = 10 totalSteps = 0 for i in range(episodesToRun): RLGlue.RL_episode(20000) thisSteps = RLGlue.RL_num_steps() print "Total steps in episode %d is %d" % (i, thisSteps) totalSteps += thisSteps print "Total steps : %d\n" % (totalSteps) RLGlue.RL_cleanup()
def evaluateAgent(): sum = 0 sum_of_squares = 0 this_return = 0 mean = 0 variance = 0 n = 10 RLGlue.RL_agent_message("freeze learning") for i in range(0, n): # We use a cutoff here in case the #policy is bad and will never end an episode RLGlue.RL_episode(5000) this_return = RLGlue.RL_return() sum += this_return sum_of_squares += this_return**2 mean = sum / n variance = (sum_of_squares - n * mean * mean) / (n - 1.0) standard_dev = math.sqrt(variance) RLGlue.RL_agent_message("unfreeze learning") return mean, standard_dev
def offlineDemo(): statistics = [] this_score = evaluateAgent() printScore(0, this_score) statistics.append(this_score) for i in range(0, 20): for j in range(0, 25): RLGlue.RL_episode(0) this_score = evaluateAgent() printScore((i + 1) * 25, this_score) statistics.append(this_score) saveResultToCSV(statistics, "results.csv")
def rlterminate(): RLGlue.doCallWithNoParams(RLGlue.Network.kRLTerm)