def recordTrajectory(): RLGlue.RL_start() trajectory = [] while True: roat = RLGlue.RL_step() trajectory.append(roat) if roat.terminal: break return trajectory
def start(self): print "\nExperiment starting!" taskSpec = RLGlue.RL_init() print taskSpec exp_params_for_agent = {} self.agent_params = self.message_agent(MessageType.exchange_params, exp_params_for_agent) # Keep overhead a bit lower by having functions inline def should_report(): self.step % args.report_freq == 0 def should_evaluate(): step % args.eval_freq == 0 and step > self.agent_params[ 'learn_start'] def should_save(): step % args.save_freq == 0 observ_action = RLGlue.RL_start() while self.step <= self.steps: observ_action_term = RLGlue.RL_step() # If game ends, start another if observ_action_term.terminal: # Not sure if we need to clean up after every episode, don't think so RLGlue.RL_start() self.n_train_episodes += 1 if should_report(): # TODO assert agent steps is equal print 'Steps: {}'.format(step) self.message_agent(MessageType.report) if should_evaluate(): pass if should_save(): pass print "A job well done." RLGlue.RL_cleanup()
def demo(): statistics = [] episodeLength = 100 #this_score = evaluateAgent() #printScore(0, this_score) #statistics.append(this_score) for i in range(1, 1000): RLGlue.RL_env_message("set-start-state " + S) RLGlue.RL_start() RLGlue.RL_episode(episodeLength) this_return = RLGlue.RL_return() print "%d\t\t%.2f" % (i, this_return) statistics.append(this_return) saveResultToCSV(statistics, "MyResults_sarsa1000_ver2.csv")
# limitations under the License. # # $Revision: 617 $ # $Date: 2009-02-05 11:24:12 +0200 (Thu, 05 Feb 2009) $ # $Author: gabalz $ # $HeadURL: http://rl-glue-ext.googlecode.com/svn/trunk/projects/codecs/Python/src/tests/test_1_experiment.py $ import sys import rlglue.RLGlue as RLGlue from glue_test import glue_test tester = glue_test("test_1") task_spec = RLGlue.RL_init() RLGlue.RL_start() roat = RLGlue.RL_step() tester.check_fail("one|1.|one" != RLGlue.RL_env_message("one")) tester.check_fail("one|1.|one" != RLGlue.RL_agent_message("one")) tester.check_fail(roat.terminal != 0) tester.check_fail(roat.r != 1.0) tester.check_fail(len(roat.o.intArray) != 1) tester.check_fail(len(roat.o.doubleArray) != 0) tester.check_fail(len(roat.o.charArray) != 0) tester.check_fail(roat.o.intArray[0] != 0) roat = RLGlue.RL_step() tester.check_fail("two|2.2.|two" != RLGlue.RL_env_message("two")) tester.check_fail("two|2.2.|two" != RLGlue.RL_agent_message("two")) tester.check_fail(roat.terminal != 0)
# # $Revision: 617 $ # $Date: 2009-02-05 11:24:12 +0200 (Thu, 05 Feb 2009) $ # $Author: gabalz $ # $HeadURL: http://rl-glue-ext.googlecode.com/svn/trunk/projects/codecs/Python/src/tests/test_empty_experiment.py $ import sys import rlglue.RLGlue as RLGlue from glue_test import glue_test tester = glue_test("test_empty") task_spec = RLGlue.RL_init() for whichEpisode in range(1, 5): startTuple = RLGlue.RL_start() if (whichEpisode % 2 == 0): tester.check_fail(len(startTuple.a.intArray) != 0) tester.check_fail(len(startTuple.a.doubleArray) != 0) tester.check_fail(len(startTuple.a.charArray) != 0) tester.check_fail(len(startTuple.o.intArray) != 0) tester.check_fail(len(startTuple.o.doubleArray) != 0) tester.check_fail(len(startTuple.o.charArray) != 0) else: tester.check_fail(len(startTuple.a.intArray) != 7) tester.check_fail(len(startTuple.a.doubleArray) != 3) tester.check_fail(len(startTuple.a.charArray) != 1) tester.check_fail(len(startTuple.o.intArray) != 2)
runEpisode(100) runEpisode(100) runEpisode(100) runEpisode(100) runEpisode(1) # Remember that stepLimit of 0 means there is no limit at all!*/ runEpisode(0) RLGlue.RL_cleanup() print("\n\n----------Stepping through an episode----------") #We could also start over and do another experiment */ taskSpec = RLGlue.RL_init() #We could run one step at a time instead of one episode at a time */ #Start the episode */ startResponse = RLGlue.RL_start() firstObservation = startResponse.o.intArray[0] firstAction = startResponse.a.intArray[0] print("First observation and action were: " + str(firstObservation) + " and: " + str(firstAction)) #Run one step */ stepResponse = RLGlue.RL_step() #Run until the episode ends*/ while (stepResponse.terminal != 1): stepResponse = RLGlue.RL_step() #if (stepResponse.terminal != 1) #Could optionally print state,action pairs */ #printf("(%d,%d) ",stepResponse.o.intArray[0],stepResponse.a.intArray[0])*/