コード例 #1
0
    def run_epoch(self, epoch, num_steps, prefix, collect_reward=False):
        """ Run one 'epoch' of training or testing, where an epoch is defined
        by the number of steps executed.  Prints a progress report after
        every trial

        Arguments:
           num_steps - steps per epoch
           prefix - string to print ('training' or 'testing')

        """
        steps_left = num_steps
        if prefix == "training" or not collect_reward:
            while steps_left > 0:
                print prefix + " epoch: ", epoch, "steps_left: ", steps_left
                sys.stdout.flush()
                terminal = RLGlue.RL_episode(steps_left)
                if not terminal:
                    RLGlue.RL_agent_message("episode_end")
                steps_left -= RLGlue.RL_num_steps()
        elif prefix == "testing":
            total_reward = 0
            episode_counter = 0
            terminal = False
            while steps_left > 0:
                if terminal:
                    print prefix + " epoch: ", epoch, "steps_left: ", steps_left
                    sys.stdout.flush()
                roat = RLGlue.RL_step()
                reward = roat.r
                terminal = roat.terminal
                total_reward += reward
                episode_counter += terminal
                steps_left -= 1
            return total_reward, episode_counter
コード例 #2
0
def runEpisode(step_limit):
    global which_episode
    global total_win
    global total_draw
    global total_lose
    global ns_epoch
    global pcts_win
    global pcts_win_or_draw
    global pcts_lose

    which_episode += 1

    # ゲーム1回 開始
    terminal = RLGlue.RL_episode(step_limit)

    # 勝負がつくまでのステップ数と報酬を取得
    total_steps = RLGlue.RL_num_steps()
    total_reward = RLGlue.RL_return()

    # 今回の結果を表示
    r_win = 1.0
    r_draw = -0.5
    r_lose = -1.0

    if total_reward == r_win:
        total_win += 1
    elif total_reward == r_draw:
        total_draw += 1
    elif total_reward == r_lose:
        total_lose += 1

    print("Episode " + str(which_episode) + "\t " + str(total_steps) +
          " steps \t" + str(total_reward) + " total reward\t " +
          str(terminal) + " natural end")

    # 100回毎に勝敗を集計
    record_interval = 100

    if which_episode % record_interval == 0:
        line = 'Episode: {}, {} wins, {} draws, {} loses'.format(
            which_episode, total_win, total_draw, total_lose)
        print(
            '---------------------------------------------------------------')
        print(line)
        print(
            '---------------------------------------------------------------')

        # 集計結果をファイルに出力
        with open('result.txt', 'a') as f:
            f.writelines(line + '\n')

        ns_epoch.append(which_episode)
        pcts_win.append(float(total_win) / record_interval * 100)
        pcts_win_or_draw.append(
            float(total_win + total_draw) / record_interval * 100)
        pcts_lose.append(float(total_win) / record_interval * 100)

        total_win = 0
        total_draw = 0
        total_lose = 0
コード例 #3
0
ファイル: exp_vrep.py プロジェクト: yeonjisong/dqn-vrep
def runEpisode(is_learning_episode):
    global whichEpisode, learningEpisode

    RLGlue.RL_episode(10000)
    totalSteps = RLGlue.RL_num_steps()
    totalReward = RLGlue.RL_return()

    whichEpisode += 1

    if is_learning_episode:
        learningEpisode += 1
        #print "Episode " + str(learningEpisode) + "/" + str(whichEpisode) + "\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t "
        print "Episode %d/%d\t %d steps \t %.1f total reward\t" % (
            learningEpisode, whichEpisode, totalSteps, totalReward)
    else:
        #print "Evaluation ::\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t "
        print "Evaluation ::\t %d steps \t %.1f total reward" % (totalSteps,
                                                                 totalReward)
        with open('eval_dump.json', 'a') as f:
            json.dump(
                {
                    "Steps": totalSteps,
                    "Episode": whichEpisode,
                    "Reward": totalReward
                }, f)
            f.write('\n')
        return totalSteps
コード例 #4
0
ファイル: experiment.py プロジェクト: Jeevantk/rl_experiments
    def run_episode(self):
        """ Run a single episode """
        # Update epsilon
        '''
        phase_len = self.episodes / 3
        if self.episode_number == phase_len * 2:
            # Start low phase
            RLGlue.RL_agent_message('set epsilon %f' % (self.epsilon_low))
        elif self.episode_number >= phase_len and self.episode_number < phase_len * 2:
            # In decr phase
            epsilon = float(RLGlue.RL_agent_message('get epsilon'))
            epsilon += (self.epsilon_decr - self.epsilon_high) / phase_len
            RLGlue.RL_agent_message('set epsilon %f' % (epsilon))
        elif self.episode_number == 0:
            # Start high phase
            RLGlue.RL_agent_message('set epsilon %f' % (self.epsilon_high))
        '''
        terminal = RLGlue.RL_episode(0)  # 0 - run until terminal
        steps = RLGlue.RL_num_steps()
        reward = RLGlue.RL_return()

        #print "\nEpisode %d\t %d steps\t reward: %d" % (self.episode_number, steps, reward)
        #print "Episode "+str(episode_number)+"\t "+str(totalSteps)+ " steps \t" + str(totalReward) + " total reward\t " + str(terminal) + " natural end"

        self.returns[self.episode_number] = (
            reward + self.returns[self.episode_number] *
            (self.instance - 1)) / self.instance
        self.steps[self.episode_number] = (steps +
                                           self.steps[self.episode_number] *
                                           (self.instance - 1)) / self.instance

        self.episode_number += 1
コード例 #5
0
ファイル: experiment.py プロジェクト: afcarl/atari
def run_epoch(epoch, num_steps, prefix):
    steps_left = num_steps
    while steps_left > 0:
        print prefix + " epoch: ", epoch, "steps_left: ", steps_left
        terminal = RLGlue.RL_episode(steps_left)
        if not terminal:
            RLGlue.RL_agent_message("episode_end")
        steps_left -= RLGlue.RL_num_steps()
コード例 #6
0
def runEpisode(stepLimit, trial):
	global whichEpisode
	terminal=RLGlue.RL_episode(stepLimit)
	totalSteps=RLGlue.RL_num_steps()
	totalReward=RLGlue.RL_return()
	
	print "Experiment "+str(trial + 1)+"\t Episode "+str(whichEpisode)+"\t "+str(totalSteps)+ " steps \t" + str(totalReward) + " total reward\t " + str(terminal) + " natural end"
	
	whichEpisode=whichEpisode+1
コード例 #7
0
def main():
    num_episodes = 10000000
    max_steps_per_episode = 50000
    RLGlue.RL_init()
    for episode in range(0,num_episodes):
        RLGlue.RL_episode(max_steps_per_episode)
        #print "Episode finished.", time.time()
        #print "Score: ", RLGlue.RL_return()
    
    RLGlue.RL_agent_message("save_data data.pkl");
コード例 #8
0
def run_experiment(maxsteps=100, numeps=1):
    taskSpec = RLGlue.RL_init()
    for ep in range(numeps):
        terminal = RLGlue.RL_episode(maxsteps)
        totalSteps = RLGlue.RL_num_steps()
        totalReward = RLGlue.RL_return()
        print "Episode " + str(ep) + "\t " + str(
            totalSteps) + " steps \t" + str(
                totalReward) + " total reward\t " + str(
                    terminal) + " natural end"
    RLGlue.RL_cleanup()
コード例 #9
0
def runEpisode(stepLimit):
    # stepLimit of 0 implies no limit
    global whichEpisode
    terminal = RLGlue.RL_episode(stepLimit)
    totalSteps = RLGlue.RL_num_steps()
    totalReward = RLGlue.RL_return()

    print "Episode " + str(whichEpisode) + "\t " + str(
        totalSteps) + " steps \t" + str(totalReward) + " total reward\t "

    whichEpisode = whichEpisode + 1
コード例 #10
0
def offlineDemo():
    statistics = []
    this_score = evaluateAgent()
    printScore(0, this_score)
    statistics.append(this_score)

    for i in range(0, 20):
        for j in range(0, 25):
            RLGlue.RL_episode(0)
        this_score = evaluateAgent()
        printScore((i + 1) * 25, this_score)
        statistics.append(this_score)

    saveResultToCSV(statistics, "results.csv")
コード例 #11
0
def runEpisode(stepLimit):
    global whichEpisode
    RLGlue.RL_agent_message('reset')
    terminal = RLGlue.RL_episode(stepLimit)
    totalSteps = RLGlue.RL_num_steps()
    totalReward = RLGlue.RL_return()

    print("Episode " + str(whichEpisode)),
    print("\t " + str(totalSteps)),
    print(" steps \t" + str(totalReward)),
    print " total reward\t " + str(terminal) + " natural end"

    RLGlue.RL_agent_message('episode_end')

    whichEpisode = whichEpisode + 1
コード例 #12
0
def runEpisode(is_learning_episode):
    global whichEpisode, learningEpisode

    RLGlue.RL_episode(0)
    totalSteps = RLGlue.RL_num_steps()
    totalReward = RLGlue.RL_return()

    whichEpisode += 1

    if is_learning_episode:
        learningEpisode += 1
        print("Episode " + str(learningEpisode) + "\t " + str(totalSteps) +
              " steps \t" + str(totalReward) + " total reward\t ")
    else:
        print("Evaluation ::\t " + str(totalSteps) + " steps \t" +
              str(totalReward) + " total reward\t ")
コード例 #13
0
def demo():
    statistics = []
    episodeLength = 100
    #this_score = evaluateAgent()
    #printScore(0, this_score)
    #statistics.append(this_score)

    for i in range(1, 1000):

        RLGlue.RL_env_message("set-start-state " + S)
        RLGlue.RL_start()
        RLGlue.RL_episode(episodeLength)
        this_return = RLGlue.RL_return()
        print "%d\t\t%.2f" % (i, this_return)
        statistics.append(this_return)
        saveResultToCSV(statistics, "MyResults_sarsa1000_ver2.csv")
コード例 #14
0
	def run_episode(self):
		""" Run a single episode """
		terminal = RLGlue.RL_episode(10)
		steps = RLGlue.RL_num_steps()
		reward = RLGlue.RL_return()
		
		#print "Episode %d\t %d steps\t reward: %d" % (episode_number, steps, reward)
		#print "Episode "+str(episode_number)+"\t "+str(totalSteps)+ " steps \t" + str(totalReward) + " total reward\t " + str(terminal) + " natural end"
	
		self.total_reward += reward
		
		# Update average
		x = self.total_reward / (self.episode_number + 1)
		self.results[self.episode_number] += (x - self.results[self.episode_number]) / self.instance
		
		self.episode_number += 1
コード例 #15
0
def run_epoch(epoch, num_steps, prefix):
    """ Run one 'epoch' of training or testing, where an epoch is defined
    by the number of steps executed.  Prints a progress report after
    every trial

    Arguments:
       num_steps - steps per epoch
       prefix - string to print ('training' or 'testing')

    """
    steps_left = num_steps
    while steps_left > 0:
        print prefix + " epoch: ", epoch, "steps_left: ", steps_left
        terminal = RLGlue.RL_episode(steps_left)
        if not terminal:
            RLGlue.RL_agent_message("episode_end")
        steps_left -= RLGlue.RL_num_steps()
コード例 #16
0
def evaluateAgent():
    sum = 0
    sum_of_squares = 0
    n = 10
    episodeLength = 100
    RLGlue.RL_agent_message("freeze learning")
    #print "FREEZE LEARNING"
    for i in range(0, n):
        RLGlue.RL_episode(100)
        this_return = RLGlue.RL_return()
        sum += this_return
        sum_of_squares += this_return**2

    mean = sum / n
    variance = (sum_of_squares - n * mean * mean) / (n - 1.0)
    standard_dev = math.sqrt(variance)

    RLGlue.RL_agent_message("unfreeze learning")
    #print "UNFREEZE LEARNING"
    return mean, standard_dev
コード例 #17
0
def runEpisode(is_learning_episode):
    global whichEpisode, learningEpisode

    RLGlue.RL_episode(0)
    totalSteps = RLGlue.RL_num_steps()
    totalReward = RLGlue.RL_return()

    whichEpisode += 1

    if is_learning_episode:
        learningEpisode += 1
        logger.info("{},{},{},{}".format(
            dt.now().strftime("%Y-%m-%d_%H:%M:%S"), learningEpisode,
            totalSteps, totalReward))
        print "Episode " + str(learningEpisode) + "\t " + str(
            totalSteps) + " steps \t" + str(
                totalReward) + " total reward\t " + dt.now().strftime(
                    "%Y%m%d_%H%M%S")
    else:
        print "Evaluation ::\t " + str(totalSteps) + " steps \t" + str(
            totalReward) + " total reward\t "
コード例 #18
0
def offlineDemo():
    this_score = evaluateAgent()
    printScore(0, this_score)
    theFile = open("results.csv", "w")
    theFile.close()
    if os.path.isfile("Archive.csv"):
        os.remove('Archive.csv')
    for i in range(0, 200):
        for j in range(0, 50):
            RLGlue.RL_episode(0)
            RLGlue.RL_env_message("stop print")
            if j % 20 == 0 and i > 0:
                RLGlue.RL_env_message("print")
            printScore((i + 1) * 50, this_score)
        this_score = evaluateAgent()
        printScore((i + 1) * 50, this_score)
        theFile = open("results.csv", "a")
        theFile.write("%d\t%.2f\t%.2f\n" %
                      ((i) * 50, this_score[0], this_score[1]))
        theFile.close()
    os.rename('results.csv', 'Archive.csv')
コード例 #19
0
def runEpisode(is_learning_episode):
    global whichEpisode, learningEpisode

    RLGlue.RL_episode(0)
    totalSteps = RLGlue.RL_num_steps()
    totalReward = RLGlue.RL_return()

    whichEpisode += 1

    if is_learning_episode:
        learningEpisode += 1
        print "Episode " + str(learningEpisode) + "\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t "
    else:
        print "Evaluation ::\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t "

# write reward in csv file
        list_csv = [str(learningEpisode), str(totalReward)]
        f_csv = open('reward.csv', 'a')
        writer_r = csv.writer(f_csv, lineterminator = '\n')
        writer_r.writerow(list_csv)
        f_csv.close()
コード例 #20
0
def offlineDemo():
    statistics = []
    this_score = evaluateAgent()
    printScore(0, this_score)
    statistics.append(this_score)

    for i in range(0, 20):
        for j in range(0, 25):
            RLGlue.RL_episode(0)
        this_score = evaluateAgent()
        printScore((i + 1) * 25, this_score)
        statistics.append(this_score)

    print "Printing out sample trajectory"
    trajectory = recordTrajectory()
    for roat in trajectory:
        print "State:", roat.o.intArray[0], "Action:", roat.a.intArray[
            0], "Reward:", roat.r
    print "Done"

    saveResultToCSV(statistics, "results.csv")
コード例 #21
0
def main():
    whichTrainingMDP = 1
    # Uncomment ONE of the following lines to choose your experiment
    #loadTetris(whichTrainingMDP); #put the desired parameter set in where MDP is in [0,19]
    #loadHelicopter(whichTrainingMDP); #put the desired parameter set in where MDP is in [0,9]
    #loadAcrobot(whichTrainingMDP); #put the desired parameter set in where MDP is in [1,49] #0 is standard acrobot
    #loadPolyathlon(whichTrainingMDP); #put the desired parameter set in where MDP is in [0,5]
    loadMario(True, True, 121, 0, 99, whichTrainingMDP)

    # and then,
    #		just run the experiment:
    RLGlue.RL_init()
    episodesToRun = 10
    totalSteps = 0
    for i in range(episodesToRun):
        RLGlue.RL_episode(20000)
        thisSteps = RLGlue.RL_num_steps()
        print "Total steps in episode %d is %d" % (i, thisSteps)
        totalSteps += thisSteps

    print "Total steps : %d\n" % (totalSteps)
    RLGlue.RL_cleanup()
コード例 #22
0
def evaluateAgent():
    sum = 0
    sum_of_squares = 0
    this_return = 0
    mean = 0
    variance = 0
    n = 10

    RLGlue.RL_agent_message("freeze learning")
    for i in range(0, n):
        # We use a cutoff here in case the
        #policy is bad and will never end an episode
        RLGlue.RL_episode(5000)
        this_return = RLGlue.RL_return()
        sum += this_return
        sum_of_squares += this_return**2

    mean = sum / n
    variance = (sum_of_squares - n * mean * mean) / (n - 1.0)
    standard_dev = math.sqrt(variance)

    RLGlue.RL_agent_message("unfreeze learning")
    return mean, standard_dev
コード例 #23
0
def evaluateAgent():
        sum=0;
        sum_of_squares=0;
        this_return=0;
        mean=0;
        variance=0;
        n=10;
        
        RLGlue.RL_agent_message("freeze learning");
        for i in range(0,n):
                # We use a cutoff here in case the 
                #policy is bad and will never end an episode
                RLGlue.RL_episode(5000);
                this_return=RLGlue.RL_return();
                sum+=this_return;
                sum_of_squares+=this_return**2;
        
        mean=sum/n;
        variance = (sum_of_squares - n*mean*mean)/(n - 1.0);
        standard_dev=math.sqrt(variance);

        RLGlue.RL_agent_message("unfreeze learning");
        return mean,standard_dev;
コード例 #24
0
ファイル: experiment.py プロジェクト: gandalfvn/double-dqn
def run_episode(training=True):
	global total_episode, learned_episode, total_time, learned_steps, csv_episode, highscore, num_finished_eval_episode, evaluation_scores
	start_time = time.time()
	RLGlue.RL_episode(0)
	num_steps = RLGlue.RL_num_steps()
	total_reward = RLGlue.RL_return()
	total_episode += 1
	elapsed_time = time.time() - start_time
	total_time += elapsed_time
	epoch = int(learned_steps / time_steps_per_epoch)

	if training:
		learned_steps += num_steps
		learned_episode += 1
		sec = int(elapsed_time)
		total_minutes = int(total_time / 60)
		csv_episode.append([learned_episode, total_reward, num_steps, sec, total_minutes, epoch, learned_steps])
		if total_reward > highscore:
			highscore = total_reward
			csv_training_highscore.append([learned_episode, highscore, total_minutes, epoch])
		print "Episode:", learned_episode, "epoch:", epoch, "num_steps:", num_steps, "total_reward:", total_reward, "time:", sec, "sec",  "total_time:", total_minutes, "min"

	return num_steps, total_reward
コード例 #25
0
# limitations under the License.
#
#  $Revision$
#  $Date$
#  $Author$
#  $HeadURL$

import sys

import rlglue.RLGlue as RLGlue
from glue_test import glue_test
tester = glue_test("test_rl_episode")

task_spec = RLGlue.RL_init()

isTerminal = RLGlue.RL_episode(0)
tester.check_fail(isTerminal != 1)
tester.check_fail(RLGlue.RL_num_steps() != 5)

isTerminal = RLGlue.RL_episode(1)
tester.check_fail(isTerminal != 0)
tester.check_fail(RLGlue.RL_num_steps() != 1)

isTerminal = RLGlue.RL_episode(2)
tester.check_fail(isTerminal != 0)
tester.check_fail(RLGlue.RL_num_steps() != 2)

isTerminal = RLGlue.RL_episode(4)
tester.check_fail(isTerminal != 0)
tester.check_fail(RLGlue.RL_num_steps() != 4)
コード例 #26
0
ファイル: Qexperiment.py プロジェクト: nivedn3/RL_Assignments
	for j  in range(50):
		RLGlue.RL_episode(0)
		num_of_steps = num_of_steps + RLGlue.RL_num_steps()
		reward = reward + RLGlue.RL_return()
	avg_reward_b.append(reward/50)
	avg_steps_b.append(num_of_steps/50)
'''

avg_steps_c = []
avg_reward_c = []
RLGlue.RL_env_message("set-start-state 2")
for i in range(100):
    num_of_steps = 0
    reward = 0
    for j in range(50):
        RLGlue.RL_episode(0)
        num_of_steps = num_of_steps + RLGlue.RL_num_steps()
        reward = reward + RLGlue.RL_return()
    avg_reward_c.append(reward / 50)
    avg_steps_c.append(num_of_steps / 50)
'''

plt.plot(avg_steps_a,'r')
plt.ylabel('Average_Steps')
plt.xlabel('Number of 50 episode runs')
plt.title('Average_steps of A')	
plt.show()

plt.plot(avg_reward_a,'r')
plt.ylabel('Average_Reward')
plt.xlabel('Number of 50 episode runs')
コード例 #27
0
'''

avg_steps_c = []
avg_reward_c = []
RLGlue.RL_env_message("set-start-state 2")
for i in range(50):
    num_of_steps = 0
    reward = 0
    j = 0
    jobs = []
    while j < 20:

        #p=multiprocessing.Process(target=RLGlue.RL_episode,args=(100000,))
        #jobs.append(p)
        #p.start
        RLGlue.RL_episode(100000)
        j = j + 1
        if RLGlue.RL_return() == 0:
            j = j - 1
        else:
            num_of_steps = num_of_steps + RLGlue.RL_num_steps()
            reward = reward + RLGlue.RL_return()

        print RLGlue.RL_return()
    avg_reward_c.append(reward / 50)
    avg_steps_c.append(num_of_steps / 50)

file = open("values.txt", 'w')
file.write(str(avg_steps_c))
file.write("                                    ")
file.write(str(avg_reward_c))
コード例 #28
0
#
#  $Revision: 617 $
#  $Date: 2009-02-05 02:24:12 -0700 (Thu, 05 Feb 2009) $
#  $Author: gabalz $
#  $HeadURL: http://rl-glue-ext.googlecode.com/svn/trunk/projects/codecs/Python/src/tests/test_rl_episode_experiment.py $

import sys

import rlglue.RLGlue as RLGlue
from glue_test import glue_test
tester =glue_test("test_rl_episode")


task_spec=RLGlue.RL_init()

isTerminal = RLGlue.RL_episode(0);
tester.check_fail(isTerminal!=1);
tester.check_fail(RLGlue.RL_num_steps()!=5);

isTerminal = RLGlue.RL_episode(1);
tester.check_fail(isTerminal!=0);
tester.check_fail(RLGlue.RL_num_steps()!=1);

isTerminal = RLGlue.RL_episode(2);
tester.check_fail(isTerminal!=0);
tester.check_fail(RLGlue.RL_num_steps()!=2);

isTerminal = RLGlue.RL_episode(4);
tester.check_fail(isTerminal!=0);
tester.check_fail(RLGlue.RL_num_steps()!=4);