Exemple #1
0
	def __init__(self, episodes = 100):
		""" Initialize experiment """
		self.episodes = episodes
		self.results = [0] * episodes
		
		RLGlue.RL_init()
		self.has_inited = True
def main():
    """
    Run the desired number of training epochs, a testing epoch
    is conducted after each training epoch.
    """

    parser = argparse.ArgumentParser(description='Neural rl experiment.')
    parser.add_argument('--num_epochs',
                        type=int,
                        default=100,
                        help='Number of training epochs')
    parser.add_argument('--epoch_length',
                        type=int,
                        default=50000,
                        help='Number of steps per epoch')
    parser.add_argument('--test_length',
                        type=int,
                        default=10000,
                        help='Number of steps per test')
    args = parser.parse_args()

    RLGlue.RL_init()

    for epoch in range(1, args.num_epochs + 1):
        RLGlue.RL_agent_message("training")
        run_epoch(epoch, args.epoch_length, "training")

        RLGlue.RL_agent_message("start_testing")
        run_epoch(epoch, args.test_length, "testing")
        RLGlue.RL_agent_message("finish_testing " + str(epoch))
def main():
    num_episodes = 10000000
    max_steps_per_episode = 50000
    RLGlue.RL_init()
    for episode in range(0,num_episodes):
        RLGlue.RL_episode(max_steps_per_episode)
        #print "Episode finished.", time.time()
        #print "Score: ", RLGlue.RL_return()
    
    RLGlue.RL_agent_message("save_data data.pkl");
Exemple #4
0
def main():
    RLGlue.RL_init()
    is_testing = len(sys.argv) > 1 and sys.argv[1] == 'test'
    for epoch in xrange(NUM_EPOCHS):
        if is_testing:
            RLGlue.RL_agent_message("start_testing")
            run_epoch(epoch, TEST_LENGTH, "testing")
            RLGlue.RL_agent_message("finish_testing " + str(epoch))
        else:
            run_epoch(epoch, EPOCH_LENGTH, "training")
            RLGlue.RL_agent_message("finish_epoch " + str(epoch))
Exemple #5
0
def run_experiment(maxsteps=100, numeps=1):
    taskSpec = RLGlue.RL_init()
    for ep in range(numeps):
        terminal = RLGlue.RL_episode(maxsteps)
        totalSteps = RLGlue.RL_num_steps()
        totalReward = RLGlue.RL_return()
        print "Episode " + str(ep) + "\t " + str(
            totalSteps) + " steps \t" + str(
                totalReward) + " total reward\t " + str(
                    terminal) + " natural end"
    RLGlue.RL_cleanup()
    def __init__(self, **kwargs):
        """ Initialize experiment """
        for k, v in kwargs.items():
            setattr(self, k, v)

        self.returns = [0] * self.episodes
        self.steps = [0] * self.episodes

        self.po = ProgressOutput()

        RLGlue.RL_init()
        self.has_inited = True
    def run(self):
        """
        Run the desired number of training epochs, a testing epoch
        is conducted after each training epoch.
        """
        RLGlue.RL_init()

        for epoch in range(1, self.num_epochs + 1):
            self.run_epoch(epoch, self.epoch_length, "training")
            RLGlue.RL_agent_message("finish_epoch " + str(epoch))

            if self.test_length > 0:
                RLGlue.RL_agent_message("start_testing")
                self.run_epoch(epoch, self.test_length, "testing")
                RLGlue.RL_agent_message("finish_testing " + str(epoch))
Exemple #8
0
    def run(self, logtofile=False):
        """
        Run the desired number of training epochs, a testing epoch
        is conducted after each training epoch.
        """
        args = self.args

        time_str = time.strftime("_%m-%d-%H-%M_", time.gmtime())
        experiment_dir = os.path.join(args.dir, args.agent_prefix + time_str + args.agent_suffix)

        try:
            os.stat(experiment_dir)
        except:
            os.makedirs(experiment_dir)

        if logtofile:
            outfile = os.path.join(experiment_dir, "experiment_" + str(os.getpid()) + ".out")
            sys.stdout = open(outfile, "w")

        results_file = self.open_results_file(experiment_dir)
        RLGlue.RL_init()
        RLGlue.RL_agent_message("set_dir " + experiment_dir)

        for epoch in range(1, args.num_epochs + 1):
            RLGlue.RL_agent_message("start_epoch " + str(epoch))
            # curtime = time.time()
            self.run_epoch(epoch, args.epoch_length, "training")
            # duration = time.time() - curtime
            # print "training epoch " + str(epoch) + " " + str(duration)
            diverging = RLGlue.RL_agent_message("query_divergence")
            if diverging == "True":
                print("The agent has diverged. Quiting.")
                rlterminate()
            RLGlue.RL_agent_message("finish_epoch " + str(epoch))

            if args.test_length > 0:
                RLGlue.RL_agent_message("start_testing")
                # curtime = time.time()
                if args.collect_rewards:
                    total_reward, num_episodes = self.run_epoch(epoch, args.test_length, "testing", True)
                    self.update_results_file(epoch, total_reward, num_episodes, results_file)
                else:
                    self.run_epoch(epoch, args.test_length, "testing")
                # duration = time.time() - curtime
                # print "testing epoch " + str(epoch) + " " + str(duration)
                RLGlue.RL_agent_message("finish_testing " + str(epoch))
        rlterminate()
Exemple #9
0
    def start(self):
        print "\nExperiment starting!"
        taskSpec = RLGlue.RL_init()
        print taskSpec

        exp_params_for_agent = {}
        self.agent_params = self.message_agent(MessageType.exchange_params,
                                               exp_params_for_agent)

        # Keep overhead a bit lower by having functions inline
        def should_report():
            self.step % args.report_freq == 0

        def should_evaluate():
            step % args.eval_freq == 0 and step > self.agent_params[
                'learn_start']

        def should_save():
            step % args.save_freq == 0

        observ_action = RLGlue.RL_start()

        while self.step <= self.steps:
            observ_action_term = RLGlue.RL_step()

            # If game ends, start another
            if observ_action_term.terminal:
                # Not sure if we need to clean up after every episode, don't think so
                RLGlue.RL_start()
                self.n_train_episodes += 1

            if should_report():
                # TODO assert agent steps is equal
                print 'Steps: {}'.format(step)
                self.message_agent(MessageType.report)

            if should_evaluate():
                pass

            if should_save():
                pass

        print "A job well done."
        RLGlue.RL_cleanup()
Exemple #10
0
def main():
    whichTrainingMDP = 1
    # Uncomment ONE of the following lines to choose your experiment
    #loadTetris(whichTrainingMDP); #put the desired parameter set in where MDP is in [0,19]
    #loadHelicopter(whichTrainingMDP); #put the desired parameter set in where MDP is in [0,9]
    #loadAcrobot(whichTrainingMDP); #put the desired parameter set in where MDP is in [1,49] #0 is standard acrobot
    #loadPolyathlon(whichTrainingMDP); #put the desired parameter set in where MDP is in [0,5]
    loadMario(True, True, 121, 0, 99, whichTrainingMDP)

    # and then,
    #		just run the experiment:
    RLGlue.RL_init()
    episodesToRun = 10
    totalSteps = 0
    for i in range(episodesToRun):
        RLGlue.RL_episode(20000)
        thisSteps = RLGlue.RL_num_steps()
        print "Total steps in episode %d is %d" % (i, thisSteps)
        totalSteps += thisSteps

    print "Total steps : %d\n" % (totalSteps)
    RLGlue.RL_cleanup()
Exemple #11
0
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#  $Revision$
#  $Date$
#  $Author$
#  $HeadURL$

import sys

import rlglue.RLGlue as RLGlue
from glue_test import glue_test
tester = glue_test("test_message")

task_spec = RLGlue.RL_init()

tester.check_fail("empty" != RLGlue.RL_env_message(None))

tester.check_fail("empty" != RLGlue.RL_env_message(""))

tester.check_fail("empty" != RLGlue.RL_agent_message(None))

tester.check_fail("empty" != RLGlue.RL_agent_message(""))

tester.check_fail("" != RLGlue.RL_env_message("empty"))

tester.check_fail("" != RLGlue.RL_agent_message("empty"))

theResponse = RLGlue.RL_env_message("null")
tester.check_fail(not (theResponse != None or "" != theResponse))
#http://rl-glue-ext.googlecode.com/
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#  $Revision: 617 $
#  $Date: 2009-02-05 11:24:12 +0200 (Thu, 05 Feb 2009) $
#  $Author: gabalz $
#  $HeadURL: http://rl-glue-ext.googlecode.com/svn/trunk/projects/codecs/Python/src/tests/test_sanity_experiment.py $

import sys

import rlglue.RLGlue as RLGlue
from glue_test import glue_test
tester = glue_test("test_sanity")

taskSpec = RLGlue.RL_init()
tester.check_fail(taskSpec != "sample task spec")

print tester.get_summary()
sys.exit(tester.getFailCount())
Exemple #13
0
#To compute the average number of steps as well as the average reward

import rlglue.RLGlue as RLGlue
import sys
import matplotlib.pyplot as plt

#def q_experiment():
#	for i in

RLGlue.RL_init()
'''for i in range(100):
	RLGlue.RL_episode(0)
	print RLGlue.RL_return()
'''
#q_experiment()
'''

avg_steps_a = []
avg_reward_a = []
RLGlue.RL_env_message("set-start-state 0");
for i in range (100):
	num_of_steps = 0
	reward = 0 
	for j  in range(50):
		RLGlue.RL_episode(0)
		num_of_steps = num_of_steps + RLGlue.RL_num_steps()
		reward = reward + RLGlue.RL_return()
	avg_reward_a.append(reward/50)
	avg_steps_a.append(num_of_steps/50)
'''
'''