def __init__(self, episodes = 100): """ Initialize experiment """ self.episodes = episodes self.results = [0] * episodes RLGlue.RL_init() self.has_inited = True
def main(): """ Run the desired number of training epochs, a testing epoch is conducted after each training epoch. """ parser = argparse.ArgumentParser(description='Neural rl experiment.') parser.add_argument('--num_epochs', type=int, default=100, help='Number of training epochs') parser.add_argument('--epoch_length', type=int, default=50000, help='Number of steps per epoch') parser.add_argument('--test_length', type=int, default=10000, help='Number of steps per test') args = parser.parse_args() RLGlue.RL_init() for epoch in range(1, args.num_epochs + 1): RLGlue.RL_agent_message("training") run_epoch(epoch, args.epoch_length, "training") RLGlue.RL_agent_message("start_testing") run_epoch(epoch, args.test_length, "testing") RLGlue.RL_agent_message("finish_testing " + str(epoch))
def main(): num_episodes = 10000000 max_steps_per_episode = 50000 RLGlue.RL_init() for episode in range(0,num_episodes): RLGlue.RL_episode(max_steps_per_episode) #print "Episode finished.", time.time() #print "Score: ", RLGlue.RL_return() RLGlue.RL_agent_message("save_data data.pkl");
def main(): RLGlue.RL_init() is_testing = len(sys.argv) > 1 and sys.argv[1] == 'test' for epoch in xrange(NUM_EPOCHS): if is_testing: RLGlue.RL_agent_message("start_testing") run_epoch(epoch, TEST_LENGTH, "testing") RLGlue.RL_agent_message("finish_testing " + str(epoch)) else: run_epoch(epoch, EPOCH_LENGTH, "training") RLGlue.RL_agent_message("finish_epoch " + str(epoch))
def run_experiment(maxsteps=100, numeps=1): taskSpec = RLGlue.RL_init() for ep in range(numeps): terminal = RLGlue.RL_episode(maxsteps) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() print "Episode " + str(ep) + "\t " + str( totalSteps) + " steps \t" + str( totalReward) + " total reward\t " + str( terminal) + " natural end" RLGlue.RL_cleanup()
def __init__(self, **kwargs): """ Initialize experiment """ for k, v in kwargs.items(): setattr(self, k, v) self.returns = [0] * self.episodes self.steps = [0] * self.episodes self.po = ProgressOutput() RLGlue.RL_init() self.has_inited = True
def run(self): """ Run the desired number of training epochs, a testing epoch is conducted after each training epoch. """ RLGlue.RL_init() for epoch in range(1, self.num_epochs + 1): self.run_epoch(epoch, self.epoch_length, "training") RLGlue.RL_agent_message("finish_epoch " + str(epoch)) if self.test_length > 0: RLGlue.RL_agent_message("start_testing") self.run_epoch(epoch, self.test_length, "testing") RLGlue.RL_agent_message("finish_testing " + str(epoch))
def run(self, logtofile=False): """ Run the desired number of training epochs, a testing epoch is conducted after each training epoch. """ args = self.args time_str = time.strftime("_%m-%d-%H-%M_", time.gmtime()) experiment_dir = os.path.join(args.dir, args.agent_prefix + time_str + args.agent_suffix) try: os.stat(experiment_dir) except: os.makedirs(experiment_dir) if logtofile: outfile = os.path.join(experiment_dir, "experiment_" + str(os.getpid()) + ".out") sys.stdout = open(outfile, "w") results_file = self.open_results_file(experiment_dir) RLGlue.RL_init() RLGlue.RL_agent_message("set_dir " + experiment_dir) for epoch in range(1, args.num_epochs + 1): RLGlue.RL_agent_message("start_epoch " + str(epoch)) # curtime = time.time() self.run_epoch(epoch, args.epoch_length, "training") # duration = time.time() - curtime # print "training epoch " + str(epoch) + " " + str(duration) diverging = RLGlue.RL_agent_message("query_divergence") if diverging == "True": print("The agent has diverged. Quiting.") rlterminate() RLGlue.RL_agent_message("finish_epoch " + str(epoch)) if args.test_length > 0: RLGlue.RL_agent_message("start_testing") # curtime = time.time() if args.collect_rewards: total_reward, num_episodes = self.run_epoch(epoch, args.test_length, "testing", True) self.update_results_file(epoch, total_reward, num_episodes, results_file) else: self.run_epoch(epoch, args.test_length, "testing") # duration = time.time() - curtime # print "testing epoch " + str(epoch) + " " + str(duration) RLGlue.RL_agent_message("finish_testing " + str(epoch)) rlterminate()
def start(self): print "\nExperiment starting!" taskSpec = RLGlue.RL_init() print taskSpec exp_params_for_agent = {} self.agent_params = self.message_agent(MessageType.exchange_params, exp_params_for_agent) # Keep overhead a bit lower by having functions inline def should_report(): self.step % args.report_freq == 0 def should_evaluate(): step % args.eval_freq == 0 and step > self.agent_params[ 'learn_start'] def should_save(): step % args.save_freq == 0 observ_action = RLGlue.RL_start() while self.step <= self.steps: observ_action_term = RLGlue.RL_step() # If game ends, start another if observ_action_term.terminal: # Not sure if we need to clean up after every episode, don't think so RLGlue.RL_start() self.n_train_episodes += 1 if should_report(): # TODO assert agent steps is equal print 'Steps: {}'.format(step) self.message_agent(MessageType.report) if should_evaluate(): pass if should_save(): pass print "A job well done." RLGlue.RL_cleanup()
def main(): whichTrainingMDP = 1 # Uncomment ONE of the following lines to choose your experiment #loadTetris(whichTrainingMDP); #put the desired parameter set in where MDP is in [0,19] #loadHelicopter(whichTrainingMDP); #put the desired parameter set in where MDP is in [0,9] #loadAcrobot(whichTrainingMDP); #put the desired parameter set in where MDP is in [1,49] #0 is standard acrobot #loadPolyathlon(whichTrainingMDP); #put the desired parameter set in where MDP is in [0,5] loadMario(True, True, 121, 0, 99, whichTrainingMDP) # and then, # just run the experiment: RLGlue.RL_init() episodesToRun = 10 totalSteps = 0 for i in range(episodesToRun): RLGlue.RL_episode(20000) thisSteps = RLGlue.RL_num_steps() print "Total steps in episode %d is %d" % (i, thisSteps) totalSteps += thisSteps print "Total steps : %d\n" % (totalSteps) RLGlue.RL_cleanup()
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # $Revision$ # $Date$ # $Author$ # $HeadURL$ import sys import rlglue.RLGlue as RLGlue from glue_test import glue_test tester = glue_test("test_message") task_spec = RLGlue.RL_init() tester.check_fail("empty" != RLGlue.RL_env_message(None)) tester.check_fail("empty" != RLGlue.RL_env_message("")) tester.check_fail("empty" != RLGlue.RL_agent_message(None)) tester.check_fail("empty" != RLGlue.RL_agent_message("")) tester.check_fail("" != RLGlue.RL_env_message("empty")) tester.check_fail("" != RLGlue.RL_agent_message("empty")) theResponse = RLGlue.RL_env_message("null") tester.check_fail(not (theResponse != None or "" != theResponse))
#http://rl-glue-ext.googlecode.com/ # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # $Revision: 617 $ # $Date: 2009-02-05 11:24:12 +0200 (Thu, 05 Feb 2009) $ # $Author: gabalz $ # $HeadURL: http://rl-glue-ext.googlecode.com/svn/trunk/projects/codecs/Python/src/tests/test_sanity_experiment.py $ import sys import rlglue.RLGlue as RLGlue from glue_test import glue_test tester = glue_test("test_sanity") taskSpec = RLGlue.RL_init() tester.check_fail(taskSpec != "sample task spec") print tester.get_summary() sys.exit(tester.getFailCount())
#To compute the average number of steps as well as the average reward import rlglue.RLGlue as RLGlue import sys import matplotlib.pyplot as plt #def q_experiment(): # for i in RLGlue.RL_init() '''for i in range(100): RLGlue.RL_episode(0) print RLGlue.RL_return() ''' #q_experiment() ''' avg_steps_a = [] avg_reward_a = [] RLGlue.RL_env_message("set-start-state 0"); for i in range (100): num_of_steps = 0 reward = 0 for j in range(50): RLGlue.RL_episode(0) num_of_steps = num_of_steps + RLGlue.RL_num_steps() reward = reward + RLGlue.RL_return() avg_reward_a.append(reward/50) avg_steps_a.append(num_of_steps/50) ''' '''