def offlineDemo(): this_score = evaluateAgent() printScore(0, this_score) theFile = open("results.csv", "w") theFile.close() if os.path.isfile("Archive.csv"): os.remove('Archive.csv') for i in range(0, 200): for j in range(0, 50): RLGlue.RL_episode(0) RLGlue.RL_env_message("stop print") if j % 20 == 0 and i > 0: RLGlue.RL_env_message("print") printScore((i + 1) * 50, this_score) this_score = evaluateAgent() printScore((i + 1) * 50, this_score) theFile = open("results.csv", "a") theFile.write("%d\t%.2f\t%.2f\n" % ((i) * 50, this_score[0], this_score[1])) theFile.close() os.rename('results.csv', 'Archive.csv')
def demo(): statistics = [] episodeLength = 100 #this_score = evaluateAgent() #printScore(0, this_score) #statistics.append(this_score) for i in range(1, 1000): RLGlue.RL_env_message("set-start-state " + S) RLGlue.RL_start() RLGlue.RL_episode(episodeLength) this_return = RLGlue.RL_return() print "%d\t\t%.2f" % (i, this_return) statistics.append(this_return) saveResultToCSV(statistics, "MyResults_sarsa1000_ver2.csv")
def preload(envNameString): theRequest = "TO=%d FROM=%d CMD=%d VALTYPE=%d VALS=NULL" % ( ENVSHELL, BENCHMARK, LISTQUERY, NOVALUE) theResponse = RLGlue.RL_env_message(theRequest) lastColonPos = theResponse.rfind("=") thePayLoad = theResponse[lastColonPos + 1:] if thePayLoad[-1] == ':': thePayLoad = thePayLoad[:-1] items = thePayLoad.split(':') theNames = [] theParamHolders = [] for i in range(1, len(items), 2): theNames.append(items[i]) theParamHolders.append(ParameterHolder(items[i + 1])) for i in range(len(theNames)): if theNames[i] == envNameString: indexOfMyEnv = i return theParamHolders[indexOfMyEnv]
# limitations under the License. # # $Revision$ # $Date$ # $Author$ # $HeadURL$ import sys import rlglue.RLGlue as RLGlue from glue_test import glue_test tester = glue_test("test_message") task_spec = RLGlue.RL_init() tester.check_fail("empty" != RLGlue.RL_env_message(None)) tester.check_fail("empty" != RLGlue.RL_env_message("")) tester.check_fail("empty" != RLGlue.RL_agent_message(None)) tester.check_fail("empty" != RLGlue.RL_agent_message("")) tester.check_fail("" != RLGlue.RL_env_message("empty")) tester.check_fail("" != RLGlue.RL_agent_message("empty")) theResponse = RLGlue.RL_env_message("null") tester.check_fail(not (theResponse != None or "" != theResponse)) theResponse = RLGlue.RL_agent_message("null")
# $Date: 2009-02-05 11:24:12 +0200 (Thu, 05 Feb 2009) $ # $Author: gabalz $ # $HeadURL: http://rl-glue-ext.googlecode.com/svn/trunk/projects/codecs/Python/src/tests/test_1_experiment.py $ import sys import rlglue.RLGlue as RLGlue from glue_test import glue_test tester = glue_test("test_1") task_spec = RLGlue.RL_init() RLGlue.RL_start() roat = RLGlue.RL_step() tester.check_fail("one|1.|one" != RLGlue.RL_env_message("one")) tester.check_fail("one|1.|one" != RLGlue.RL_agent_message("one")) tester.check_fail(roat.terminal != 0) tester.check_fail(roat.r != 1.0) tester.check_fail(len(roat.o.intArray) != 1) tester.check_fail(len(roat.o.doubleArray) != 0) tester.check_fail(len(roat.o.charArray) != 0) tester.check_fail(roat.o.intArray[0] != 0) roat = RLGlue.RL_step() tester.check_fail("two|2.2.|two" != RLGlue.RL_env_message("two")) tester.check_fail("two|2.2.|two" != RLGlue.RL_agent_message("two")) tester.check_fail(roat.terminal != 0) tester.check_fail(roat.r != 1.0) tester.check_fail(len(roat.o.intArray) != 1) tester.check_fail(len(roat.o.doubleArray) != 0)
print "\t", msg RLGlue.RL_agent_message(msg) # Run experiments for i in xrange(settings.instances): print "Running experiment #%d with %d episodes..." % (i + 1, settings.episodes), sys.stdout.flush() experiment.run() # Experiment completed, show summary print "Done!" print str(experiment) # Store data to file env_name = RLGlue.RL_env_message('name') data_file = env_name + '_' + time.strftime('%Y-%m-%d_%H:%M:%S.dat') data_path = os.path.join(settings.results_dir, data_file) print print "Storing results into %s..." % (data_path), """ Save result data to file """ f = open(data_path, 'w') f.write("# Settings:\n") for k in dir(settings): if k.startswith('__'): continue f.write("# %s = %s\n" % (k, getattr(settings, k)))
# Run experiment if __name__ == "__main__": import settings # Create a new experiment experiment = Experiment(**settings.experiment) # Set up environment print print "Environment settings:" for k, v in settings.environment.items(): msg = 'set %s %s' % (k, v) print " ", msg RLGlue.RL_env_message(msg) # Set up agent print "Agent settings:" for k, v in settings.agent.items(): msg = 'set %s %s' % (k, v) print " ", msg RLGlue.RL_agent_message(msg) print # Run experiments for i in xrange(settings.experiment['instances']): experiment.run() #print str(experiment)
taskSpec = RLGlue.RL_init() print("RL_init called, the environment sent task spec: " + taskSpec) print("\n\n----------Sending some sample messages----------") #Talk to the agent and environment a bit...*/ responseMessage = RLGlue.RL_agent_message("what is your name?") print("Agent responded to \"what is your name?\" with: " + responseMessage) responseMessage = RLGlue.RL_agent_message( "If at first you don't succeed; call it version 1.0") print( "Agent responded to \"If at first you don't succeed; call it version 1.0 \" with: " + responseMessage + "\n") responseMessage = RLGlue.RL_env_message("what is your name?") print("Environment responded to \"what is your name?\" with: " + responseMessage) responseMessage = RLGlue.RL_env_message( "If at first you don't succeed; call it version 1.0") print( "Environment responded to \"If at first you don't succeed; call it version 1.0 \" with: " + responseMessage) print("\n\n----------Running a few episodes----------") runEpisode(100) runEpisode(100) runEpisode(100) runEpisode(100) runEpisode(100) runEpisode(1)
#To compute the average number of steps as well as the average reward import rlglue.RLGlue as RLGlue import sys import matplotlib.pyplot as plt #def q_experiment(): # for i in RLGlue.RL_init() '''for i in range(100): RLGlue.RL_episode(0) print RLGlue.RL_return() ''' #q_experiment() ''' avg_steps_a = [] avg_reward_a = [] RLGlue.RL_env_message("set-start-state 0"); for i in range (100): num_of_steps = 0 reward = 0 for j in range(50): RLGlue.RL_episode(0) num_of_steps = num_of_steps + RLGlue.RL_num_steps() reward = reward + RLGlue.RL_return() avg_reward_a.append(reward/50) avg_steps_a.append(num_of_steps/50) ''' '''
def load(envNameString, theParams): loadPayLoad = envNameString + ":" + theParams.stringSerialize() theRequest = "TO=%d FROM=%d CMD=%d VALTYPE=%d VALS=%s" % ( ENVSHELL, BENCHMARK, LOADQUERY, STRINGLIST, loadPayLoad) RLGlue.RL_env_message(theRequest)
RLGlue.RL_agent_message("save_policy results.dat") print "\nCalling RL_cleanup and RL_init to clear the agent's memory..." RLGlue.RL_cleanup() RLGlue.RL_init() print "Evaluating the agent's default policy:\n\t\tMean Return\tStandardDeviation\n------------------------------------------------------" single_evaluation() print "\nLoading up the value function we saved earlier." RLGlue.RL_agent_message("load_policy results.dat") print "Evaluating the agent after loading the value function:\n\t\tMean Return\tStandardDeviation\n------------------------------------------------------" single_evaluation() print "Telling the environment to use fixed start state of 2,3." RLGlue.RL_env_message("set-start-state 2 3") RLGlue.RL_start() print "Telling the environment to print the current state to the screen." RLGlue.RL_env_message("print-state") print "Evaluating the agent a few times from a fixed start state of 2,3:\n\t\tMean Return\tStandardDeviation\n-------------------------------------------" single_evaluation() print "Evaluating the agent again with the random start state:\n\t\tMean Return\tStandardDeviation\n-----------------------------------------------------" RLGlue.RL_env_message("set-random-start-state") single_evaluation() RLGlue.RL_cleanup() print "\nProgram Complete."
def runEpisode(stepLimit): # stepLimit of 0 implies no limit global whichEpisode terminal = RLGlue.RL_episode(stepLimit) totalSteps = RLGlue.RL_num_steps() totalReward = RLGlue.RL_return() print "Episode " + str(whichEpisode) + "\t " + str( totalSteps) + " steps \t" + str(totalReward) + " total reward\t " whichEpisode = whichEpisode + 1 RLGlue.RL_init() #RLGlue.RL_env_message("dumptmatrix tmatrixperfect.dat") RLGlue.RL_env_message("printabstractstates") for i in xrange(NO_EPISODES): runEpisode(0) ''' returnVsEpisode = np.zeros(NO_EPISODES) timeVsEpisode = np.zeros(NO_EPISODES) def calculateCoords(state): return [state%12,state/12] policy = [12*[4*[0]] for i in xrange(12)] for run in xrange(NO_RUNS): print "Run: "+str(run+1) RLGlue.RL_init()
import sys import matplotlib.pyplot as plt #def q_experiment(): # for i in RLGlue.RL_init() '''for i in range(100): RLGlue.RL_episode(0) print RLGlue.RL_return() ''' #q_experiment() avg_steps_a = [] avg_reward_a = [] RLGlue.RL_env_message("set-start-state 0") for i in range(100): num_of_steps = 0 reward = 0 for j in range(50): RLGlue.RL_episode(0) num_of_steps = num_of_steps + RLGlue.RL_num_steps() reward = reward + RLGlue.RL_return() avg_reward_a.append(reward / 50) avg_steps_a.append(num_of_steps / 50) ''' avg_steps_b = [] avg_reward_b = [] RLGlue.RL_env_message("set-start-state 1"); for i in range (100):
# # Just do a single evaluateAgent and print it # #def single_evaluation(): # this_score = evaluateAgent() # printScore(0, this_score) RLGlue.RL_init() print "Telling the environment to use fixed start state." nbrReaches = 7 habitatSize = 4 S = array([random.randint(1, 3) for i in xrange(nbrReaches * habitatSize)]) #S=array([1,1,2, 1, 3, 3, 1]) #S=[1,2,3,3,2,1,3,2,2,3,2,1,2,2,3,2,2,1,3,1,1,2,2,3,3,2,1,1] S = ",".join(map(str, S)) # just a way to display a list in python print S RLGlue.RL_env_message("set-start-state " + S) RLGlue.RL_start() print "Starting offline demo\n----------------------------\nWill alternate learning for 10 episodes, then freeze policy and evaluate for 10 episodes.\n" print "After Episode\tMean Return\tStandard Deviation\n-------------------------------------------------------------------------" demo() print "Evaluating the agent again with the random start state:\n\t\tMean Return\tStandardDeviation\n-----------------------------------------------------" RLGlue.RL_env_message("set-random-start-state") #single_evaluation() RLGlue.RL_cleanup() print "\nProgram Complete."
""" Manual experiment for testing the environment """ import sys import os import time from rlglue import RLGlue # Initialize RL Glue RLGlue.RL_init() RLGlue.RL_env_message('debug=True') RLGlue.RL_start() running = True reward = 0 while running: result = RLGlue.RL_step() running = not result.terminal steps = RLGlue.RL_num_steps() R = RLGlue.RL_return() print 'Experiment ended after %d steps with a return of %d' % (steps, R) RLGlue.RL_cleanup()