def run_epoch(self, epoch, num_steps, prefix, collect_reward=False): """ Run one 'epoch' of training or testing, where an epoch is defined by the number of steps executed. Prints a progress report after every trial Arguments: num_steps - steps per epoch prefix - string to print ('training' or 'testing') """ steps_left = num_steps if prefix == "training" or not collect_reward: while steps_left > 0: print prefix + " epoch: ", epoch, "steps_left: ", steps_left sys.stdout.flush() terminal = RLGlue.RL_episode(steps_left) if not terminal: RLGlue.RL_agent_message("episode_end") steps_left -= RLGlue.RL_num_steps() elif prefix == "testing": total_reward = 0 episode_counter = 0 terminal = False while steps_left > 0: if terminal: print prefix + " epoch: ", epoch, "steps_left: ", steps_left sys.stdout.flush() roat = RLGlue.RL_step() reward = roat.r terminal = roat.terminal total_reward += reward episode_counter += terminal steps_left -= 1 return total_reward, episode_counter
def recordTrajectory(): RLGlue.RL_start() trajectory = [] while True: roat = RLGlue.RL_step() trajectory.append(roat) if roat.terminal: break return trajectory
def start(self): print "\nExperiment starting!" taskSpec = RLGlue.RL_init() print taskSpec exp_params_for_agent = {} self.agent_params = self.message_agent(MessageType.exchange_params, exp_params_for_agent) # Keep overhead a bit lower by having functions inline def should_report(): self.step % args.report_freq == 0 def should_evaluate(): step % args.eval_freq == 0 and step > self.agent_params[ 'learn_start'] def should_save(): step % args.save_freq == 0 observ_action = RLGlue.RL_start() while self.step <= self.steps: observ_action_term = RLGlue.RL_step() # If game ends, start another if observ_action_term.terminal: # Not sure if we need to clean up after every episode, don't think so RLGlue.RL_start() self.n_train_episodes += 1 if should_report(): # TODO assert agent steps is equal print 'Steps: {}'.format(step) self.message_agent(MessageType.report) if should_evaluate(): pass if should_save(): pass print "A job well done." RLGlue.RL_cleanup()
# $Revision: 617 $ # $Date: 2009-02-05 11:24:12 +0200 (Thu, 05 Feb 2009) $ # $Author: gabalz $ # $HeadURL: http://rl-glue-ext.googlecode.com/svn/trunk/projects/codecs/Python/src/tests/test_1_experiment.py $ import sys import rlglue.RLGlue as RLGlue from glue_test import glue_test tester = glue_test("test_1") task_spec = RLGlue.RL_init() RLGlue.RL_start() roat = RLGlue.RL_step() tester.check_fail("one|1.|one" != RLGlue.RL_env_message("one")) tester.check_fail("one|1.|one" != RLGlue.RL_agent_message("one")) tester.check_fail(roat.terminal != 0) tester.check_fail(roat.r != 1.0) tester.check_fail(len(roat.o.intArray) != 1) tester.check_fail(len(roat.o.doubleArray) != 0) tester.check_fail(len(roat.o.charArray) != 0) tester.check_fail(roat.o.intArray[0] != 0) roat = RLGlue.RL_step() tester.check_fail("two|2.2.|two" != RLGlue.RL_env_message("two")) tester.check_fail("two|2.2.|two" != RLGlue.RL_agent_message("two")) tester.check_fail(roat.terminal != 0) tester.check_fail(roat.r != 1.0) tester.check_fail(len(roat.o.intArray) != 1)
tester.check_fail(len(startTuple.a.charArray) != 0) tester.check_fail(len(startTuple.o.intArray) != 0) tester.check_fail(len(startTuple.o.doubleArray) != 0) tester.check_fail(len(startTuple.o.charArray) != 0) else: tester.check_fail(len(startTuple.a.intArray) != 7) tester.check_fail(len(startTuple.a.doubleArray) != 3) tester.check_fail(len(startTuple.a.charArray) != 1) tester.check_fail(len(startTuple.o.intArray) != 2) tester.check_fail(len(startTuple.o.doubleArray) != 4) tester.check_fail(len(startTuple.o.charArray) != 5) for whichStep in range(0, 5): stepTuple = RLGlue.RL_step() tester.check_fail(stepTuple.terminal != 0) tester.check_fail(stepTuple.r != 0) if (whichEpisode % 2 == 0): tester.check_fail(len(stepTuple.a.intArray) != 0) tester.check_fail(len(stepTuple.a.doubleArray) != 0) tester.check_fail(len(stepTuple.a.charArray) != 0) tester.check_fail(len(stepTuple.o.intArray) != 0) tester.check_fail(len(stepTuple.o.doubleArray) != 0) tester.check_fail(len(stepTuple.o.charArray) != 0) else: tester.check_fail(len(stepTuple.a.intArray) != 7) tester.check_fail(len(stepTuple.a.doubleArray) != 3) tester.check_fail(len(stepTuple.a.charArray) != 1)
print("\n\n----------Stepping through an episode----------") #We could also start over and do another experiment */ taskSpec = RLGlue.RL_init() #We could run one step at a time instead of one episode at a time */ #Start the episode */ startResponse = RLGlue.RL_start() firstObservation = startResponse.o.intArray[0] firstAction = startResponse.a.intArray[0] print("First observation and action were: " + str(firstObservation) + " and: " + str(firstAction)) #Run one step */ stepResponse = RLGlue.RL_step() #Run until the episode ends*/ while (stepResponse.terminal != 1): stepResponse = RLGlue.RL_step() #if (stepResponse.terminal != 1) #Could optionally print state,action pairs */ #printf("(%d,%d) ",stepResponse.o.intArray[0],stepResponse.a.intArray[0])*/ print("\n\n----------Summary----------") totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() print("It ran for " + str(totalSteps) + " steps, total reward was: " + str(totalReward)) RLGlue.RL_cleanup()
# $Revision: 617 $ # $Date: 2009-02-05 04:24:12 -0500 (Thu, 05 Feb 2009) $ # $Author: gabalz $ # $HeadURL: http://rl-glue-ext.googlecode.com/svn/trunk/projects/codecs/Python/src/tests/test_1_experiment.py $ import sys import rlglue.RLGlue as RLGlue from glue_test import glue_test tester=glue_test("test_1") task_spec=RLGlue.RL_init(); RLGlue.RL_start(); roat=RLGlue.RL_step(); tester.check_fail("one|1.|one"!=RLGlue.RL_env_message("one")); tester.check_fail("one|1.|one"!=RLGlue.RL_agent_message("one")); tester.check_fail(roat.terminal!=0); tester.check_fail(roat.r!=1.0); tester.check_fail(len(roat.o.intArray)!=1); tester.check_fail(len(roat.o. doubleArray)!=0); tester.check_fail(len(roat.o. charArray)!=0); tester.check_fail(roat.o.intArray[0]!=0); roat=RLGlue.RL_step(); tester.check_fail("two|2.2.|two"!=RLGlue.RL_env_message("two")); tester.check_fail("two|2.2.|two"!=RLGlue.RL_agent_message("two")); tester.check_fail(roat.terminal!=0); tester.check_fail(roat.r!=1.0); tester.check_fail(len(roat.o.intArray)!=1);
""" Manual experiment for testing the environment """ import sys import os import time from rlglue import RLGlue # Initialize RL Glue RLGlue.RL_init() RLGlue.RL_env_message('debug=True') RLGlue.RL_start() running = True reward = 0 while running: result = RLGlue.RL_step() running = not result.terminal steps = RLGlue.RL_num_steps() R = RLGlue.RL_return() print 'Experiment ended after %d steps with a return of %d' % (steps, R) RLGlue.RL_cleanup()