Beispiel #1
0
def recordTrajectory():
    RLGlue.RL_start()
    trajectory = []
    while True:
        roat = RLGlue.RL_step()
        trajectory.append(roat)
        if roat.terminal:
            break
    return trajectory
Beispiel #2
0
    def start(self):
        print "\nExperiment starting!"
        taskSpec = RLGlue.RL_init()
        print taskSpec

        exp_params_for_agent = {}
        self.agent_params = self.message_agent(MessageType.exchange_params,
                                               exp_params_for_agent)

        # Keep overhead a bit lower by having functions inline
        def should_report():
            self.step % args.report_freq == 0

        def should_evaluate():
            step % args.eval_freq == 0 and step > self.agent_params[
                'learn_start']

        def should_save():
            step % args.save_freq == 0

        observ_action = RLGlue.RL_start()

        while self.step <= self.steps:
            observ_action_term = RLGlue.RL_step()

            # If game ends, start another
            if observ_action_term.terminal:
                # Not sure if we need to clean up after every episode, don't think so
                RLGlue.RL_start()
                self.n_train_episodes += 1

            if should_report():
                # TODO assert agent steps is equal
                print 'Steps: {}'.format(step)
                self.message_agent(MessageType.report)

            if should_evaluate():
                pass

            if should_save():
                pass

        print "A job well done."
        RLGlue.RL_cleanup()
Beispiel #3
0
def demo():
    statistics = []
    episodeLength = 100
    #this_score = evaluateAgent()
    #printScore(0, this_score)
    #statistics.append(this_score)

    for i in range(1, 1000):

        RLGlue.RL_env_message("set-start-state " + S)
        RLGlue.RL_start()
        RLGlue.RL_episode(episodeLength)
        this_return = RLGlue.RL_return()
        print "%d\t\t%.2f" % (i, this_return)
        statistics.append(this_return)
        saveResultToCSV(statistics, "MyResults_sarsa1000_ver2.csv")
Beispiel #4
0
# limitations under the License.
#
#  $Revision: 617 $
#  $Date: 2009-02-05 11:24:12 +0200 (Thu, 05 Feb 2009) $
#  $Author: gabalz $
#  $HeadURL: http://rl-glue-ext.googlecode.com/svn/trunk/projects/codecs/Python/src/tests/test_1_experiment.py $

import sys

import rlglue.RLGlue as RLGlue
from glue_test import glue_test
tester = glue_test("test_1")

task_spec = RLGlue.RL_init()

RLGlue.RL_start()

roat = RLGlue.RL_step()
tester.check_fail("one|1.|one" != RLGlue.RL_env_message("one"))
tester.check_fail("one|1.|one" != RLGlue.RL_agent_message("one"))
tester.check_fail(roat.terminal != 0)
tester.check_fail(roat.r != 1.0)
tester.check_fail(len(roat.o.intArray) != 1)
tester.check_fail(len(roat.o.doubleArray) != 0)
tester.check_fail(len(roat.o.charArray) != 0)
tester.check_fail(roat.o.intArray[0] != 0)

roat = RLGlue.RL_step()
tester.check_fail("two|2.2.|two" != RLGlue.RL_env_message("two"))
tester.check_fail("two|2.2.|two" != RLGlue.RL_agent_message("two"))
tester.check_fail(roat.terminal != 0)
#
#  $Revision: 617 $
#  $Date: 2009-02-05 11:24:12 +0200 (Thu, 05 Feb 2009) $
#  $Author: gabalz $
#  $HeadURL: http://rl-glue-ext.googlecode.com/svn/trunk/projects/codecs/Python/src/tests/test_empty_experiment.py $

import sys

import rlglue.RLGlue as RLGlue
from glue_test import glue_test
tester = glue_test("test_empty")

task_spec = RLGlue.RL_init()

for whichEpisode in range(1, 5):
    startTuple = RLGlue.RL_start()

    if (whichEpisode % 2 == 0):
        tester.check_fail(len(startTuple.a.intArray) != 0)
        tester.check_fail(len(startTuple.a.doubleArray) != 0)
        tester.check_fail(len(startTuple.a.charArray) != 0)

        tester.check_fail(len(startTuple.o.intArray) != 0)
        tester.check_fail(len(startTuple.o.doubleArray) != 0)
        tester.check_fail(len(startTuple.o.charArray) != 0)
    else:
        tester.check_fail(len(startTuple.a.intArray) != 7)
        tester.check_fail(len(startTuple.a.doubleArray) != 3)
        tester.check_fail(len(startTuple.a.charArray) != 1)

        tester.check_fail(len(startTuple.o.intArray) != 2)
Beispiel #6
0
runEpisode(100)
runEpisode(100)
runEpisode(100)
runEpisode(100)
runEpisode(1)
# Remember that stepLimit of 0 means there is no limit at all!*/
runEpisode(0)
RLGlue.RL_cleanup()

print("\n\n----------Stepping through an episode----------")
#We could also start over and do another experiment */
taskSpec = RLGlue.RL_init()

#We could run one step at a time instead of one episode at a time */
#Start the episode */
startResponse = RLGlue.RL_start()

firstObservation = startResponse.o.intArray[0]
firstAction = startResponse.a.intArray[0]
print("First observation and action were: " + str(firstObservation) +
      " and: " + str(firstAction))

#Run one step */
stepResponse = RLGlue.RL_step()

#Run until the episode ends*/
while (stepResponse.terminal != 1):
    stepResponse = RLGlue.RL_step()
    #if (stepResponse.terminal != 1)
    #Could optionally print state,action pairs */
    #printf("(%d,%d) ",stepResponse.o.intArray[0],stepResponse.a.intArray[0])*/