def agent_step(self, reward, observation): action = None self.window.erase() self.window.addstr('STATE: %s\n' % (observation.intArray)) self.window.addstr('REWARD: %s\n' % (reward)) self.window.addstr('HIT UP, DOWN, LEFT or RIGHT to move...\n') self.window.refresh() try: c = self.window.getch() if c == curses.KEY_UP: action = 'N' elif c == curses.KEY_DOWN: action = 'S' elif c == curses.KEY_LEFT: action = 'W' elif c == curses.KEY_RIGHT: action = 'E' self.window.refresh() except KeyboardInterrupt: RLGlue.RL_cleanup() a = Action() if action: a.charArray = [action] return a
def run_experiment(maxsteps=100, numeps=1): taskSpec = RLGlue.RL_init() for ep in range(numeps): terminal = RLGlue.RL_episode(maxsteps) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() print "Episode " + str(ep) + "\t " + str( totalSteps) + " steps \t" + str( totalReward) + " total reward\t " + str( terminal) + " natural end" RLGlue.RL_cleanup()
def run(self): """ Run the experiment """ if self.has_inited: RLGlue.RL_cleanup() self.instance += 1 self.total_reward = 0 self.episode_number = 0 for i in xrange(self.episodes): self.run_episode()
def start(self): print "\nExperiment starting!" taskSpec = RLGlue.RL_init() print taskSpec exp_params_for_agent = {} self.agent_params = self.message_agent(MessageType.exchange_params, exp_params_for_agent) # Keep overhead a bit lower by having functions inline def should_report(): self.step % args.report_freq == 0 def should_evaluate(): step % args.eval_freq == 0 and step > self.agent_params[ 'learn_start'] def should_save(): step % args.save_freq == 0 observ_action = RLGlue.RL_start() while self.step <= self.steps: observ_action_term = RLGlue.RL_step() # If game ends, start another if observ_action_term.terminal: # Not sure if we need to clean up after every episode, don't think so RLGlue.RL_start() self.n_train_episodes += 1 if should_report(): # TODO assert agent steps is equal print 'Steps: {}'.format(step) self.message_agent(MessageType.report) if should_evaluate(): pass if should_save(): pass print "A job well done." RLGlue.RL_cleanup()
def main(): whichTrainingMDP = 1 # Uncomment ONE of the following lines to choose your experiment #loadTetris(whichTrainingMDP); #put the desired parameter set in where MDP is in [0,19] #loadHelicopter(whichTrainingMDP); #put the desired parameter set in where MDP is in [0,9] #loadAcrobot(whichTrainingMDP); #put the desired parameter set in where MDP is in [1,49] #0 is standard acrobot #loadPolyathlon(whichTrainingMDP); #put the desired parameter set in where MDP is in [0,5] loadMario(True, True, 121, 0, 99, whichTrainingMDP) # and then, # just run the experiment: RLGlue.RL_init() episodesToRun = 10 totalSteps = 0 for i in range(episodesToRun): RLGlue.RL_episode(20000) thisSteps = RLGlue.RL_num_steps() print "Total steps in episode %d is %d" % (i, thisSteps) totalSteps += thisSteps print "Total steps : %d\n" % (totalSteps) RLGlue.RL_cleanup()
def run(self): """ Run the experiment """ if self.has_inited: RLGlue.RL_cleanup() self.instance += 1 self.total_reward = 0 self.episode_number = 0 self.po.reset() # Progress output: #if not self.quiet: # print "Running experiment #%d with %d episodes..." % (self.instance, self.episodes), pad = len('%d' % self.episodes) fmt = '%' + str(pad) + 'd/%d' for i in xrange(self.episodes): self.run_episode() self.po.out(str(self)) print
print str(experiment) # Store data to file env_name = RLGlue.RL_env_message('name') data_file = env_name + '_' + time.strftime('%Y-%m-%d_%H:%M:%S.dat') data_path = os.path.join(settings.results_dir, data_file) print print "Storing results into %s..." % (data_path), """ Save result data to file """ f = open(data_path, 'w') f.write("# Settings:\n") for k in dir(settings): if k.startswith('__'): continue f.write("# %s = %s\n" % (k, getattr(settings, k))) data = experiment.get_result_data() f.write(data) f.close() #experiment.save_result(data_path) print "Done!" RLGlue.RL_cleanup()
def runEpisode(step_limit): global which_episode global total_win global total_draw global total_lose global ns_epoch global pcts_win global pcts_win_or_draw global pcts_lose which_episode += 1 # ゲーム1回 開始 terminal = RLGlue.RL_episode(step_limit) # 勝負がつくまでのステップ数と報酬を取得 total_steps = RLGlue.RL_num_steps() total_reward = RLGlue.RL_return() # 今回の結果を表示 r_win = 1.0 r_draw = -0.5 r_lose = -1.0 if total_reward == r_win: total_win += 1 elif total_reward == r_draw: total_draw += 1 elif total_reward == r_lose: total_lose += 1 print("Episode " + str(which_episode) + "\t " + str(total_steps) + " steps \t" + str(total_reward) + " total reward\t " + str(terminal) + " natural end") # 100回毎に勝敗を集計 record_interval = 100 if which_episode % record_interval == 0: line = 'Episode: {}, {} wins, {} draws, {} loses'.format( which_episode, total_win, total_draw, total_lose) print( '---------------------------------------------------------------') print(line) print( '---------------------------------------------------------------') # 集計結果をファイルに出力 with open('result.txt', 'a') as f: f.writelines(line + '\n') ns_epoch.append(which_episode) pcts_win.append(float(total_win) / record_interval * 100) pcts_win_or_draw.append( float(total_win + total_draw) / record_interval * 100) pcts_lose.append(float(total_win) / record_interval * 100) total_win = 0 total_draw = 0 total_lose = 0 # 1000回毎にモデルの書き出し model_write_interval = 200 if which_episode % model_write_interval == 0: # 途中結果の書き出しの呼び出し RLGlue.RL_cleanup()