def main(): """ Run the desired number of training epochs, a testing epoch is conducted after each training epoch. """ parser = argparse.ArgumentParser(description='Neural rl experiment.') parser.add_argument('--num_epochs', type=int, default=100, help='Number of training epochs') parser.add_argument('--epoch_length', type=int, default=50000, help='Number of steps per epoch') parser.add_argument('--test_length', type=int, default=10000, help='Number of steps per test') args = parser.parse_args() RLGlue.RL_init() for epoch in range(1, args.num_epochs + 1): RLGlue.RL_agent_message("training") run_epoch(epoch, args.epoch_length, "training") RLGlue.RL_agent_message("start_testing") run_epoch(epoch, args.test_length, "testing") RLGlue.RL_agent_message("finish_testing " + str(epoch))
def main(): RLGlue.RL_init() is_testing = len(sys.argv) > 1 and sys.argv[1] == 'test' for epoch in xrange(NUM_EPOCHS): if is_testing: RLGlue.RL_agent_message("start_testing") run_epoch(epoch, TEST_LENGTH, "testing") RLGlue.RL_agent_message("finish_testing " + str(epoch)) else: run_epoch(epoch, EPOCH_LENGTH, "training") RLGlue.RL_agent_message("finish_epoch " + str(epoch))
def runEpisode(stepLimit): global whichEpisode RLGlue.RL_agent_message('reset') terminal = RLGlue.RL_episode(stepLimit) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() print("Episode " + str(whichEpisode)), print("\t " + str(totalSteps)), print(" steps \t" + str(totalReward)), print " total reward\t " + str(terminal) + " natural end" RLGlue.RL_agent_message('episode_end') whichEpisode = whichEpisode + 1
def run(self): """ Run the desired number of training epochs, a testing epoch is conducted after each training epoch. """ RLGlue.RL_init() for epoch in range(1, self.num_epochs + 1): self.run_epoch(epoch, self.epoch_length, "training") RLGlue.RL_agent_message("finish_epoch " + str(epoch)) if self.test_length > 0: RLGlue.RL_agent_message("start_testing") self.run_epoch(epoch, self.test_length, "testing") RLGlue.RL_agent_message("finish_testing " + str(epoch))
def run_epoch(self, epoch, num_steps, prefix, collect_reward=False): """ Run one 'epoch' of training or testing, where an epoch is defined by the number of steps executed. Prints a progress report after every trial Arguments: num_steps - steps per epoch prefix - string to print ('training' or 'testing') """ steps_left = num_steps if prefix == "training" or not collect_reward: while steps_left > 0: print prefix + " epoch: ", epoch, "steps_left: ", steps_left sys.stdout.flush() terminal = RLGlue.RL_episode(steps_left) if not terminal: RLGlue.RL_agent_message("episode_end") steps_left -= RLGlue.RL_num_steps() elif prefix == "testing": total_reward = 0 episode_counter = 0 terminal = False while steps_left > 0: if terminal: print prefix + " epoch: ", epoch, "steps_left: ", steps_left sys.stdout.flush() roat = RLGlue.RL_step() reward = roat.r terminal = roat.terminal total_reward += reward episode_counter += terminal steps_left -= 1 return total_reward, episode_counter
def run_epoch(epoch, num_steps, prefix): steps_left = num_steps while steps_left > 0: print prefix + " epoch: ", epoch, "steps_left: ", steps_left terminal = RLGlue.RL_episode(steps_left) if not terminal: RLGlue.RL_agent_message("episode_end") steps_left -= RLGlue.RL_num_steps()
def main(): num_episodes = 10000000 max_steps_per_episode = 50000 RLGlue.RL_init() for episode in range(0,num_episodes): RLGlue.RL_episode(max_steps_per_episode) #print "Episode finished.", time.time() #print "Score: ", RLGlue.RL_return() RLGlue.RL_agent_message("save_data data.pkl");
def evaluateAgent(): sum = 0 sum_of_squares = 0 n = 10 episodeLength = 100 RLGlue.RL_agent_message("freeze learning") #print "FREEZE LEARNING" for i in range(0, n): RLGlue.RL_episode(100) this_return = RLGlue.RL_return() sum += this_return sum_of_squares += this_return**2 mean = sum / n variance = (sum_of_squares - n * mean * mean) / (n - 1.0) standard_dev = math.sqrt(variance) RLGlue.RL_agent_message("unfreeze learning") #print "UNFREEZE LEARNING" return mean, standard_dev
def evaluateAgent(): sum = 0 sum_of_squares = 0 this_return = 0 mean = 0 variance = 0 n = 10 RLGlue.RL_agent_message("freeze learning") for i in range(0, n): # We use a cutoff here in case the #policy is bad and will never end an episode RLGlue.RL_episode(5000) this_return = RLGlue.RL_return() sum += this_return sum_of_squares += this_return**2 mean = sum / n variance = (sum_of_squares - n * mean * mean) / (n - 1.0) standard_dev = math.sqrt(variance) RLGlue.RL_agent_message("unfreeze learning") return mean, standard_dev
def evaluateAgent(): sum=0; sum_of_squares=0; this_return=0; mean=0; variance=0; n=10; RLGlue.RL_agent_message("freeze learning"); for i in range(0,n): # We use a cutoff here in case the #policy is bad and will never end an episode RLGlue.RL_episode(5000); this_return=RLGlue.RL_return(); sum+=this_return; sum_of_squares+=this_return**2; mean=sum/n; variance = (sum_of_squares - n*mean*mean)/(n - 1.0); standard_dev=math.sqrt(variance); RLGlue.RL_agent_message("unfreeze learning"); return mean,standard_dev;
def run_epoch(epoch, num_steps, prefix): """ Run one 'epoch' of training or testing, where an epoch is defined by the number of steps executed. Prints a progress report after every trial Arguments: num_steps - steps per epoch prefix - string to print ('training' or 'testing') """ steps_left = num_steps while steps_left > 0: print prefix + " epoch: ", epoch, "steps_left: ", steps_left terminal = RLGlue.RL_episode(steps_left) if not terminal: RLGlue.RL_agent_message("episode_end") steps_left -= RLGlue.RL_num_steps()
def run(self, logtofile=False): """ Run the desired number of training epochs, a testing epoch is conducted after each training epoch. """ args = self.args time_str = time.strftime("_%m-%d-%H-%M_", time.gmtime()) experiment_dir = os.path.join(args.dir, args.agent_prefix + time_str + args.agent_suffix) try: os.stat(experiment_dir) except: os.makedirs(experiment_dir) if logtofile: outfile = os.path.join(experiment_dir, "experiment_" + str(os.getpid()) + ".out") sys.stdout = open(outfile, "w") results_file = self.open_results_file(experiment_dir) RLGlue.RL_init() RLGlue.RL_agent_message("set_dir " + experiment_dir) for epoch in range(1, args.num_epochs + 1): RLGlue.RL_agent_message("start_epoch " + str(epoch)) # curtime = time.time() self.run_epoch(epoch, args.epoch_length, "training") # duration = time.time() - curtime # print "training epoch " + str(epoch) + " " + str(duration) diverging = RLGlue.RL_agent_message("query_divergence") if diverging == "True": print("The agent has diverged. Quiting.") rlterminate() RLGlue.RL_agent_message("finish_epoch " + str(epoch)) if args.test_length > 0: RLGlue.RL_agent_message("start_testing") # curtime = time.time() if args.collect_rewards: total_reward, num_episodes = self.run_epoch(epoch, args.test_length, "testing", True) self.update_results_file(epoch, total_reward, num_episodes, results_file) else: self.run_epoch(epoch, args.test_length, "testing") # duration = time.time() - curtime # print "testing epoch " + str(epoch) + " " + str(duration) RLGlue.RL_agent_message("finish_testing " + str(epoch)) rlterminate()
# $Author$ # $HeadURL$ import sys import rlglue.RLGlue as RLGlue from glue_test import glue_test tester = glue_test("test_message") task_spec = RLGlue.RL_init() tester.check_fail("empty" != RLGlue.RL_env_message(None)) tester.check_fail("empty" != RLGlue.RL_env_message("")) tester.check_fail("empty" != RLGlue.RL_agent_message(None)) tester.check_fail("empty" != RLGlue.RL_agent_message("")) tester.check_fail("" != RLGlue.RL_env_message("empty")) tester.check_fail("" != RLGlue.RL_agent_message("empty")) theResponse = RLGlue.RL_env_message("null") tester.check_fail(not (theResponse != None or "" != theResponse)) theResponse = RLGlue.RL_agent_message("null") tester.check_fail(not (theResponse != None or "" != theResponse)) tester.check_fail("1" != RLGlue.RL_env_message("1")) tester.check_fail("1" != RLGlue.RL_agent_message("1"))
# $Author: gabalz $ # $HeadURL: http://rl-glue-ext.googlecode.com/svn/trunk/projects/codecs/Python/src/tests/test_1_experiment.py $ import sys import rlglue.RLGlue as RLGlue from glue_test import glue_test tester = glue_test("test_1") task_spec = RLGlue.RL_init() RLGlue.RL_start() roat = RLGlue.RL_step() tester.check_fail("one|1.|one" != RLGlue.RL_env_message("one")) tester.check_fail("one|1.|one" != RLGlue.RL_agent_message("one")) tester.check_fail(roat.terminal != 0) tester.check_fail(roat.r != 1.0) tester.check_fail(len(roat.o.intArray) != 1) tester.check_fail(len(roat.o.doubleArray) != 0) tester.check_fail(len(roat.o.charArray) != 0) tester.check_fail(roat.o.intArray[0] != 0) roat = RLGlue.RL_step() tester.check_fail("two|2.2.|two" != RLGlue.RL_env_message("two")) tester.check_fail("two|2.2.|two" != RLGlue.RL_agent_message("two")) tester.check_fail(roat.terminal != 0) tester.check_fail(roat.r != 1.0) tester.check_fail(len(roat.o.intArray) != 1) tester.check_fail(len(roat.o.doubleArray) != 0) tester.check_fail(len(roat.o.charArray) != 0)
# Run experiment if __name__ == "__main__": import settings # Create a new experiment experiment = Experiment(settings.episodes) # Set up agent print "Agent settings:" keys = ['marble_count', 'marble_inc', 'marble_win_reward', 'marble_win_inc', 'marble_remove', 'save_to', 'load_from'] for k in keys: msg = '%s=%s' % (k, getattr(settings, k)) print "\t", msg RLGlue.RL_agent_message(msg) # Run experiments for i in xrange(settings.instances): print "Running experiment #%d with %d episodes..." % (i + 1, settings.episodes), sys.stdout.flush() experiment.run() # Experiment completed, show summary print "Done!" print str(experiment) # Store data to file env_name = RLGlue.RL_env_message('name') data_file = env_name + '_' + time.strftime('%Y-%m-%d_%H:%M:%S.dat')
experiment = Experiment(**settings.experiment) # Set up environment print print "Environment settings:" for k, v in settings.environment.items(): msg = 'set %s %s' % (k, v) print " ", msg RLGlue.RL_env_message(msg) # Set up agent print "Agent settings:" for k, v in settings.agent.items(): msg = 'set %s %s' % (k, v) print " ", msg RLGlue.RL_agent_message(msg) print # Run experiments for i in xrange(settings.experiment['instances']): experiment.run() #print str(experiment) #print str(experiment.returns) #print str(experiment.steps) print # Store data to file returns = experiment.returns
" steps \t" + str(totalReward) + " total reward\t " + str(terminal) + " natural end") whichEpisode = whichEpisode + 1 #Main Program starts here print("\n\nExperiment starting up!") taskSpec = RLGlue.RL_init() print("RL_init called, the environment sent task spec: " + taskSpec) print("\n\n----------Sending some sample messages----------") #Talk to the agent and environment a bit...*/ responseMessage = RLGlue.RL_agent_message("what is your name?") print("Agent responded to \"what is your name?\" with: " + responseMessage) responseMessage = RLGlue.RL_agent_message( "If at first you don't succeed; call it version 1.0") print( "Agent responded to \"If at first you don't succeed; call it version 1.0 \" with: " + responseMessage + "\n") responseMessage = RLGlue.RL_env_message("what is your name?") print("Environment responded to \"what is your name?\" with: " + responseMessage) responseMessage = RLGlue.RL_env_message( "If at first you don't succeed; call it version 1.0") print( "Environment responded to \"If at first you don't succeed; call it version 1.0 \" with: "
# # Just do a single evaluateAgent and print it # def single_evaluation(): this_score = evaluateAgent() printScore(0, this_score) print "Starting offline demo\n----------------------------\nWill alternate learning for 25 episodes, then freeze policy and evaluate for 10 episodes.\n" print "After Episode\tMean Return\tStandard Deviation\n-------------------------------------------------------------------------" RLGlue.RL_init() offlineDemo() print "\nNow we will save the agent's learned value function to a file...." RLGlue.RL_agent_message("save_policy results.dat") print "\nCalling RL_cleanup and RL_init to clear the agent's memory..." RLGlue.RL_cleanup() RLGlue.RL_init() print "Evaluating the agent's default policy:\n\t\tMean Return\tStandardDeviation\n------------------------------------------------------" single_evaluation() print "\nLoading up the value function we saved earlier." RLGlue.RL_agent_message("load_policy results.dat") print "Evaluating the agent after loading the value function:\n\t\tMean Return\tStandardDeviation\n------------------------------------------------------" single_evaluation()
def message_agent(self, msg, data=None): return RLGlue.RL_agent_message(Message(msg, data).dumps())
csv_training_highscore.append([learned_episode, highscore, total_minutes, epoch]) print "Episode:", learned_episode, "epoch:", epoch, "num_steps:", num_steps, "total_reward:", total_reward, "time:", sec, "sec", "total_time:", total_minutes, "min" return num_steps, total_reward RLGlue.RL_init() while learned_episode < max_episode: epoch = int(learned_steps / time_steps_per_epoch) total_minutes = int(total_time / 60) if learned_episode % num_episode_between_evaluations == 0 and total_episode != 0: if is_evaluation_phase is False: print "Freezing the policy for evaluation." RLGlue.RL_agent_message("freeze_policy") num_finished_eval_episode = 0 is_evaluation_phase = True num_steps, total_reward = run_episode(training=False) evaluation_scores[num_finished_eval_episode] = total_reward num_finished_eval_episode += 1 print "Evaluation (", num_finished_eval_episode, "/" , num_episode_per_evaluation, ") ::", "num_steps:", num_steps, "total_reward:", total_reward if num_finished_eval_episode == num_episode_per_evaluation: is_evaluation_phase = False csv_evaluation.append([learned_episode, np.mean(evaluation_scores), np.median(evaluation_scores), total_minutes, epoch]) RLGlue.RL_agent_message("unfreeze_policy") else: continue if learned_episode % saving_freq == 0 and learned_episode != 0: print "Saving the model."
f_csv = open('reward.csv', 'a') writer_r = csv.writer(f_csv, lineterminator = '\n') writer_r.writerow(list_csv) f_csv.close() # Main Program starts here print "\n\nDDQN-ALE Experiment starting up!" RLGlue.RL_init() while learningEpisode < max_learningEpisode: # Evaluate model every 10 episodes if np.mod(whichEpisode, 10) == 0: print "Freeze learning for Evaluation" RLGlue.RL_agent_message("freeze learning") runEpisode(is_learning_episode=False) else: print "DDQN is Learning" RLGlue.RL_agent_message("unfreeze learning") runEpisode(is_learning_episode=True) # Save model every 100 learning episodes if np.mod(learningEpisode, 100) == 0 and learningEpisode != 0: print "SAVE CURRENT MODEL" RLGlue.RL_agent_message("save model") RLGlue.RL_cleanup() print "Experiment COMPLETED @ Episode ", whichEpisode