예제 #1
0
def main():
    """
    Run the desired number of training epochs, a testing epoch
    is conducted after each training epoch.
    """

    parser = argparse.ArgumentParser(description='Neural rl experiment.')
    parser.add_argument('--num_epochs',
                        type=int,
                        default=100,
                        help='Number of training epochs')
    parser.add_argument('--epoch_length',
                        type=int,
                        default=50000,
                        help='Number of steps per epoch')
    parser.add_argument('--test_length',
                        type=int,
                        default=10000,
                        help='Number of steps per test')
    args = parser.parse_args()

    RLGlue.RL_init()

    for epoch in range(1, args.num_epochs + 1):
        RLGlue.RL_agent_message("training")
        run_epoch(epoch, args.epoch_length, "training")

        RLGlue.RL_agent_message("start_testing")
        run_epoch(epoch, args.test_length, "testing")
        RLGlue.RL_agent_message("finish_testing " + str(epoch))
예제 #2
0
파일: experiment.py 프로젝트: afcarl/atari
def main():
    RLGlue.RL_init()
    is_testing = len(sys.argv) > 1 and sys.argv[1] == 'test'
    for epoch in xrange(NUM_EPOCHS):
        if is_testing:
            RLGlue.RL_agent_message("start_testing")
            run_epoch(epoch, TEST_LENGTH, "testing")
            RLGlue.RL_agent_message("finish_testing " + str(epoch))
        else:
            run_epoch(epoch, EPOCH_LENGTH, "training")
            RLGlue.RL_agent_message("finish_epoch " + str(epoch))
예제 #3
0
def runEpisode(stepLimit):
    global whichEpisode
    RLGlue.RL_agent_message('reset')
    terminal = RLGlue.RL_episode(stepLimit)
    totalSteps = RLGlue.RL_num_steps()
    totalReward = RLGlue.RL_return()

    print("Episode " + str(whichEpisode)),
    print("\t " + str(totalSteps)),
    print(" steps \t" + str(totalReward)),
    print " total reward\t " + str(terminal) + " natural end"

    RLGlue.RL_agent_message('episode_end')

    whichEpisode = whichEpisode + 1
예제 #4
0
    def run(self):
        """
        Run the desired number of training epochs, a testing epoch
        is conducted after each training epoch.
        """
        RLGlue.RL_init()

        for epoch in range(1, self.num_epochs + 1):
            self.run_epoch(epoch, self.epoch_length, "training")
            RLGlue.RL_agent_message("finish_epoch " + str(epoch))

            if self.test_length > 0:
                RLGlue.RL_agent_message("start_testing")
                self.run_epoch(epoch, self.test_length, "testing")
                RLGlue.RL_agent_message("finish_testing " + str(epoch))
예제 #5
0
    def run_epoch(self, epoch, num_steps, prefix, collect_reward=False):
        """ Run one 'epoch' of training or testing, where an epoch is defined
        by the number of steps executed.  Prints a progress report after
        every trial

        Arguments:
           num_steps - steps per epoch
           prefix - string to print ('training' or 'testing')

        """
        steps_left = num_steps
        if prefix == "training" or not collect_reward:
            while steps_left > 0:
                print prefix + " epoch: ", epoch, "steps_left: ", steps_left
                sys.stdout.flush()
                terminal = RLGlue.RL_episode(steps_left)
                if not terminal:
                    RLGlue.RL_agent_message("episode_end")
                steps_left -= RLGlue.RL_num_steps()
        elif prefix == "testing":
            total_reward = 0
            episode_counter = 0
            terminal = False
            while steps_left > 0:
                if terminal:
                    print prefix + " epoch: ", epoch, "steps_left: ", steps_left
                    sys.stdout.flush()
                roat = RLGlue.RL_step()
                reward = roat.r
                terminal = roat.terminal
                total_reward += reward
                episode_counter += terminal
                steps_left -= 1
            return total_reward, episode_counter
예제 #6
0
파일: experiment.py 프로젝트: afcarl/atari
def run_epoch(epoch, num_steps, prefix):
    steps_left = num_steps
    while steps_left > 0:
        print prefix + " epoch: ", epoch, "steps_left: ", steps_left
        terminal = RLGlue.RL_episode(steps_left)
        if not terminal:
            RLGlue.RL_agent_message("episode_end")
        steps_left -= RLGlue.RL_num_steps()
def main():
    num_episodes = 10000000
    max_steps_per_episode = 50000
    RLGlue.RL_init()
    for episode in range(0,num_episodes):
        RLGlue.RL_episode(max_steps_per_episode)
        #print "Episode finished.", time.time()
        #print "Score: ", RLGlue.RL_return()
    
    RLGlue.RL_agent_message("save_data data.pkl");
예제 #8
0
def evaluateAgent():
    sum = 0
    sum_of_squares = 0
    n = 10
    episodeLength = 100
    RLGlue.RL_agent_message("freeze learning")
    #print "FREEZE LEARNING"
    for i in range(0, n):
        RLGlue.RL_episode(100)
        this_return = RLGlue.RL_return()
        sum += this_return
        sum_of_squares += this_return**2

    mean = sum / n
    variance = (sum_of_squares - n * mean * mean) / (n - 1.0)
    standard_dev = math.sqrt(variance)

    RLGlue.RL_agent_message("unfreeze learning")
    #print "UNFREEZE LEARNING"
    return mean, standard_dev
예제 #9
0
def evaluateAgent():
    sum = 0
    sum_of_squares = 0
    this_return = 0
    mean = 0
    variance = 0
    n = 10

    RLGlue.RL_agent_message("freeze learning")
    for i in range(0, n):
        # We use a cutoff here in case the
        #policy is bad and will never end an episode
        RLGlue.RL_episode(5000)
        this_return = RLGlue.RL_return()
        sum += this_return
        sum_of_squares += this_return**2

    mean = sum / n
    variance = (sum_of_squares - n * mean * mean) / (n - 1.0)
    standard_dev = math.sqrt(variance)

    RLGlue.RL_agent_message("unfreeze learning")
    return mean, standard_dev
예제 #10
0
def evaluateAgent():
        sum=0;
        sum_of_squares=0;
        this_return=0;
        mean=0;
        variance=0;
        n=10;
        
        RLGlue.RL_agent_message("freeze learning");
        for i in range(0,n):
                # We use a cutoff here in case the 
                #policy is bad and will never end an episode
                RLGlue.RL_episode(5000);
                this_return=RLGlue.RL_return();
                sum+=this_return;
                sum_of_squares+=this_return**2;
        
        mean=sum/n;
        variance = (sum_of_squares - n*mean*mean)/(n - 1.0);
        standard_dev=math.sqrt(variance);

        RLGlue.RL_agent_message("unfreeze learning");
        return mean,standard_dev;
예제 #11
0
def run_epoch(epoch, num_steps, prefix):
    """ Run one 'epoch' of training or testing, where an epoch is defined
    by the number of steps executed.  Prints a progress report after
    every trial

    Arguments:
       num_steps - steps per epoch
       prefix - string to print ('training' or 'testing')

    """
    steps_left = num_steps
    while steps_left > 0:
        print prefix + " epoch: ", epoch, "steps_left: ", steps_left
        terminal = RLGlue.RL_episode(steps_left)
        if not terminal:
            RLGlue.RL_agent_message("episode_end")
        steps_left -= RLGlue.RL_num_steps()
예제 #12
0
    def run(self, logtofile=False):
        """
        Run the desired number of training epochs, a testing epoch
        is conducted after each training epoch.
        """
        args = self.args

        time_str = time.strftime("_%m-%d-%H-%M_", time.gmtime())
        experiment_dir = os.path.join(args.dir, args.agent_prefix + time_str + args.agent_suffix)

        try:
            os.stat(experiment_dir)
        except:
            os.makedirs(experiment_dir)

        if logtofile:
            outfile = os.path.join(experiment_dir, "experiment_" + str(os.getpid()) + ".out")
            sys.stdout = open(outfile, "w")

        results_file = self.open_results_file(experiment_dir)
        RLGlue.RL_init()
        RLGlue.RL_agent_message("set_dir " + experiment_dir)

        for epoch in range(1, args.num_epochs + 1):
            RLGlue.RL_agent_message("start_epoch " + str(epoch))
            # curtime = time.time()
            self.run_epoch(epoch, args.epoch_length, "training")
            # duration = time.time() - curtime
            # print "training epoch " + str(epoch) + " " + str(duration)
            diverging = RLGlue.RL_agent_message("query_divergence")
            if diverging == "True":
                print("The agent has diverged. Quiting.")
                rlterminate()
            RLGlue.RL_agent_message("finish_epoch " + str(epoch))

            if args.test_length > 0:
                RLGlue.RL_agent_message("start_testing")
                # curtime = time.time()
                if args.collect_rewards:
                    total_reward, num_episodes = self.run_epoch(epoch, args.test_length, "testing", True)
                    self.update_results_file(epoch, total_reward, num_episodes, results_file)
                else:
                    self.run_epoch(epoch, args.test_length, "testing")
                # duration = time.time() - curtime
                # print "testing epoch " + str(epoch) + " " + str(duration)
                RLGlue.RL_agent_message("finish_testing " + str(epoch))
        rlterminate()
예제 #13
0
#  $Author$
#  $HeadURL$

import sys

import rlglue.RLGlue as RLGlue
from glue_test import glue_test
tester = glue_test("test_message")

task_spec = RLGlue.RL_init()

tester.check_fail("empty" != RLGlue.RL_env_message(None))

tester.check_fail("empty" != RLGlue.RL_env_message(""))

tester.check_fail("empty" != RLGlue.RL_agent_message(None))

tester.check_fail("empty" != RLGlue.RL_agent_message(""))

tester.check_fail("" != RLGlue.RL_env_message("empty"))

tester.check_fail("" != RLGlue.RL_agent_message("empty"))

theResponse = RLGlue.RL_env_message("null")
tester.check_fail(not (theResponse != None or "" != theResponse))

theResponse = RLGlue.RL_agent_message("null")
tester.check_fail(not (theResponse != None or "" != theResponse))

tester.check_fail("1" != RLGlue.RL_env_message("1"))
tester.check_fail("1" != RLGlue.RL_agent_message("1"))
예제 #14
0
#  $Author: gabalz $
#  $HeadURL: http://rl-glue-ext.googlecode.com/svn/trunk/projects/codecs/Python/src/tests/test_1_experiment.py $

import sys

import rlglue.RLGlue as RLGlue
from glue_test import glue_test
tester = glue_test("test_1")

task_spec = RLGlue.RL_init()

RLGlue.RL_start()

roat = RLGlue.RL_step()
tester.check_fail("one|1.|one" != RLGlue.RL_env_message("one"))
tester.check_fail("one|1.|one" != RLGlue.RL_agent_message("one"))
tester.check_fail(roat.terminal != 0)
tester.check_fail(roat.r != 1.0)
tester.check_fail(len(roat.o.intArray) != 1)
tester.check_fail(len(roat.o.doubleArray) != 0)
tester.check_fail(len(roat.o.charArray) != 0)
tester.check_fail(roat.o.intArray[0] != 0)

roat = RLGlue.RL_step()
tester.check_fail("two|2.2.|two" != RLGlue.RL_env_message("two"))
tester.check_fail("two|2.2.|two" != RLGlue.RL_agent_message("two"))
tester.check_fail(roat.terminal != 0)
tester.check_fail(roat.r != 1.0)
tester.check_fail(len(roat.o.intArray) != 1)
tester.check_fail(len(roat.o.doubleArray) != 0)
tester.check_fail(len(roat.o.charArray) != 0)
예제 #15
0
# Run experiment
if __name__ == "__main__":
	
	import settings
	
	# Create a new experiment
	experiment = Experiment(settings.episodes)
	
	# Set up agent
	print "Agent settings:"
	keys = ['marble_count', 'marble_inc', 'marble_win_reward', 
		    'marble_win_inc', 'marble_remove', 'save_to', 'load_from']
	for k in keys:
		msg = '%s=%s' % (k, getattr(settings, k))
		print "\t", msg 
		RLGlue.RL_agent_message(msg)
	
	# Run experiments
	for i in xrange(settings.instances):
		print "Running experiment #%d with %d episodes..." % (i + 1, settings.episodes),
		sys.stdout.flush()
		experiment.run()
		
		# Experiment completed, show summary
		print "Done!"
		print str(experiment)
		
	
	# Store data to file
	env_name = RLGlue.RL_env_message('name')
	data_file = env_name + '_' + time.strftime('%Y-%m-%d_%H:%M:%S.dat')
예제 #16
0
    experiment = Experiment(**settings.experiment)

    # Set up environment
    print
    print "Environment settings:"
    for k, v in settings.environment.items():
        msg = 'set %s %s' % (k, v)
        print "  ", msg
        RLGlue.RL_env_message(msg)

    # Set up agent
    print "Agent settings:"
    for k, v in settings.agent.items():
        msg = 'set %s %s' % (k, v)
        print "  ", msg
        RLGlue.RL_agent_message(msg)

    print

    # Run experiments
    for i in xrange(settings.experiment['instances']):
        experiment.run()

        #print str(experiment)
        #print str(experiment.returns)
        #print str(experiment.steps)

    print

    # Store data to file
    returns = experiment.returns
예제 #17
0
          " steps \t" + str(totalReward) + " total reward\t " + str(terminal) +
          " natural end")

    whichEpisode = whichEpisode + 1


#Main Program starts here

print("\n\nExperiment starting up!")
taskSpec = RLGlue.RL_init()
print("RL_init called, the environment sent task spec: " + taskSpec)

print("\n\n----------Sending some sample messages----------")

#Talk to the agent and environment a bit...*/
responseMessage = RLGlue.RL_agent_message("what is your name?")
print("Agent responded to \"what is your name?\" with: " + responseMessage)

responseMessage = RLGlue.RL_agent_message(
    "If at first you don't succeed; call it version 1.0")
print(
    "Agent responded to \"If at first you don't succeed; call it version 1.0  \" with: "
    + responseMessage + "\n")

responseMessage = RLGlue.RL_env_message("what is your name?")
print("Environment responded to \"what is your name?\" with: " +
      responseMessage)
responseMessage = RLGlue.RL_env_message(
    "If at first you don't succeed; call it version 1.0")
print(
    "Environment responded to \"If at first you don't succeed; call it version 1.0  \" with: "
예제 #18
0
#
# Just do a single evaluateAgent and print it
#
def single_evaluation():
    this_score = evaluateAgent()
    printScore(0, this_score)


print "Starting offline demo\n----------------------------\nWill alternate learning for 25 episodes, then freeze policy and evaluate for 10 episodes.\n"
print "After Episode\tMean Return\tStandard Deviation\n-------------------------------------------------------------------------"
RLGlue.RL_init()
offlineDemo()

print "\nNow we will save the agent's learned value function to a file...."

RLGlue.RL_agent_message("save_policy results.dat")

print "\nCalling RL_cleanup and RL_init to clear the agent's memory..."

RLGlue.RL_cleanup()
RLGlue.RL_init()

print "Evaluating the agent's default policy:\n\t\tMean Return\tStandardDeviation\n------------------------------------------------------"
single_evaluation()

print "\nLoading up the value function we saved earlier."
RLGlue.RL_agent_message("load_policy results.dat")

print "Evaluating the agent after loading the value function:\n\t\tMean Return\tStandardDeviation\n------------------------------------------------------"
single_evaluation()
예제 #19
0
 def message_agent(self, msg, data=None):
     return RLGlue.RL_agent_message(Message(msg, data).dumps())
예제 #20
0
			csv_training_highscore.append([learned_episode, highscore, total_minutes, epoch])
		print "Episode:", learned_episode, "epoch:", epoch, "num_steps:", num_steps, "total_reward:", total_reward, "time:", sec, "sec",  "total_time:", total_minutes, "min"

	return num_steps, total_reward


RLGlue.RL_init()

while learned_episode < max_episode:
	epoch = int(learned_steps / time_steps_per_epoch)
	total_minutes = int(total_time / 60)

	if learned_episode % num_episode_between_evaluations == 0 and total_episode != 0:
		if is_evaluation_phase is False:
			print "Freezing the policy for evaluation."
			RLGlue.RL_agent_message("freeze_policy")
			num_finished_eval_episode = 0
			is_evaluation_phase = True
		num_steps, total_reward = run_episode(training=False)
		evaluation_scores[num_finished_eval_episode] = total_reward
		num_finished_eval_episode += 1
		print "Evaluation (", num_finished_eval_episode, "/" , num_episode_per_evaluation, ") ::", "num_steps:", num_steps, "total_reward:", total_reward
		if num_finished_eval_episode == num_episode_per_evaluation:
			is_evaluation_phase = False
			csv_evaluation.append([learned_episode, np.mean(evaluation_scores), np.median(evaluation_scores), total_minutes, epoch])
			RLGlue.RL_agent_message("unfreeze_policy")
		else:
			continue

	if learned_episode % saving_freq == 0 and learned_episode != 0:
		print "Saving the model."
예제 #21
0
        f_csv = open('reward.csv', 'a')
        writer_r = csv.writer(f_csv, lineterminator = '\n')
        writer_r.writerow(list_csv)
        f_csv.close()



# Main Program starts here
print "\n\nDDQN-ALE Experiment starting up!"
RLGlue.RL_init()

while learningEpisode < max_learningEpisode:
    # Evaluate model every 10 episodes
    if np.mod(whichEpisode, 10) == 0:
        print "Freeze learning for Evaluation"
        RLGlue.RL_agent_message("freeze learning")
        runEpisode(is_learning_episode=False)
    else:
        print "DDQN is Learning"
        RLGlue.RL_agent_message("unfreeze learning")
        runEpisode(is_learning_episode=True)

    # Save model every 100 learning episodes
    if np.mod(learningEpisode, 100) == 0 and learningEpisode != 0:
        print "SAVE CURRENT MODEL"
        RLGlue.RL_agent_message("save model")

RLGlue.RL_cleanup()

print "Experiment COMPLETED @ Episode ", whichEpisode