def runEpisode(step_limit):
    global which_episode
    global total_win
    global total_draw
    global total_lose
    global ns_epoch
    global pcts_win
    global pcts_win_or_draw
    global pcts_lose

    which_episode += 1

    # ゲーム1回 開始
    terminal = RLGlue.RL_episode(step_limit)

    # 勝負がつくまでのステップ数と報酬を取得
    total_steps = RLGlue.RL_num_steps()
    total_reward = RLGlue.RL_return()

    # 今回の結果を表示
    r_win = 1.0
    r_draw = -0.5
    r_lose = -1.0

    if total_reward == r_win:
        total_win += 1
    elif total_reward == r_draw:
        total_draw += 1
    elif total_reward == r_lose:
        total_lose += 1

    print("Episode " + str(which_episode) + "\t " + str(total_steps) +
          " steps \t" + str(total_reward) + " total reward\t " +
          str(terminal) + " natural end")

    # 100回毎に勝敗を集計
    record_interval = 100

    if which_episode % record_interval == 0:
        line = 'Episode: {}, {} wins, {} draws, {} loses'.format(
            which_episode, total_win, total_draw, total_lose)
        print(
            '---------------------------------------------------------------')
        print(line)
        print(
            '---------------------------------------------------------------')

        # 集計結果をファイルに出力
        with open('result.txt', 'a') as f:
            f.writelines(line + '\n')

        ns_epoch.append(which_episode)
        pcts_win.append(float(total_win) / record_interval * 100)
        pcts_win_or_draw.append(
            float(total_win + total_draw) / record_interval * 100)
        pcts_lose.append(float(total_win) / record_interval * 100)

        total_win = 0
        total_draw = 0
        total_lose = 0
Ejemplo n.º 2
0
def main():
    """
    Run the desired number of training epochs, a testing epoch
    is conducted after each training epoch.
    """

    parser = argparse.ArgumentParser(description='Neural rl experiment.')
    parser.add_argument('--num_epochs',
                        type=int,
                        default=100,
                        help='Number of training epochs')
    parser.add_argument('--epoch_length',
                        type=int,
                        default=50000,
                        help='Number of steps per epoch')
    parser.add_argument('--test_length',
                        type=int,
                        default=10000,
                        help='Number of steps per test')
    args = parser.parse_args()

    RLGlue.RL_init()

    for epoch in range(1, args.num_epochs + 1):
        RLGlue.RL_agent_message("training")
        run_epoch(epoch, args.epoch_length, "training")

        RLGlue.RL_agent_message("start_testing")
        run_epoch(epoch, args.test_length, "testing")
        RLGlue.RL_agent_message("finish_testing " + str(epoch))
Ejemplo n.º 3
0
    def run_episode(self):
        """ Run a single episode """
        # Update epsilon
        '''
        phase_len = self.episodes / 3
        if self.episode_number == phase_len * 2:
            # Start low phase
            RLGlue.RL_agent_message('set epsilon %f' % (self.epsilon_low))
        elif self.episode_number >= phase_len and self.episode_number < phase_len * 2:
            # In decr phase
            epsilon = float(RLGlue.RL_agent_message('get epsilon'))
            epsilon += (self.epsilon_decr - self.epsilon_high) / phase_len
            RLGlue.RL_agent_message('set epsilon %f' % (epsilon))
        elif self.episode_number == 0:
            # Start high phase
            RLGlue.RL_agent_message('set epsilon %f' % (self.epsilon_high))
        '''
        terminal = RLGlue.RL_episode(0)  # 0 - run until terminal
        steps = RLGlue.RL_num_steps()
        reward = RLGlue.RL_return()

        #print "\nEpisode %d\t %d steps\t reward: %d" % (self.episode_number, steps, reward)
        #print "Episode "+str(episode_number)+"\t "+str(totalSteps)+ " steps \t" + str(totalReward) + " total reward\t " + str(terminal) + " natural end"

        self.returns[self.episode_number] = (
            reward + self.returns[self.episode_number] *
            (self.instance - 1)) / self.instance
        self.steps[self.episode_number] = (steps +
                                           self.steps[self.episode_number] *
                                           (self.instance - 1)) / self.instance

        self.episode_number += 1
Ejemplo n.º 4
0
    def run_epoch(self, epoch, num_steps, prefix, collect_reward=False):
        """ Run one 'epoch' of training or testing, where an epoch is defined
        by the number of steps executed.  Prints a progress report after
        every trial

        Arguments:
           num_steps - steps per epoch
           prefix - string to print ('training' or 'testing')

        """
        steps_left = num_steps
        if prefix == "training" or not collect_reward:
            while steps_left > 0:
                print prefix + " epoch: ", epoch, "steps_left: ", steps_left
                sys.stdout.flush()
                terminal = RLGlue.RL_episode(steps_left)
                if not terminal:
                    RLGlue.RL_agent_message("episode_end")
                steps_left -= RLGlue.RL_num_steps()
        elif prefix == "testing":
            total_reward = 0
            episode_counter = 0
            terminal = False
            while steps_left > 0:
                if terminal:
                    print prefix + " epoch: ", epoch, "steps_left: ", steps_left
                    sys.stdout.flush()
                roat = RLGlue.RL_step()
                reward = roat.r
                terminal = roat.terminal
                total_reward += reward
                episode_counter += terminal
                steps_left -= 1
            return total_reward, episode_counter
Ejemplo n.º 5
0
def runEpisode(is_learning_episode):
    global whichEpisode, learningEpisode

    RLGlue.RL_episode(10000)
    totalSteps = RLGlue.RL_num_steps()
    totalReward = RLGlue.RL_return()

    whichEpisode += 1

    if is_learning_episode:
        learningEpisode += 1
        #print "Episode " + str(learningEpisode) + "/" + str(whichEpisode) + "\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t "
        print "Episode %d/%d\t %d steps \t %.1f total reward\t" % (
            learningEpisode, whichEpisode, totalSteps, totalReward)
    else:
        #print "Evaluation ::\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t "
        print "Evaluation ::\t %d steps \t %.1f total reward" % (totalSteps,
                                                                 totalReward)
        with open('eval_dump.json', 'a') as f:
            json.dump(
                {
                    "Steps": totalSteps,
                    "Episode": whichEpisode,
                    "Reward": totalReward
                }, f)
            f.write('\n')
        return totalSteps
Ejemplo n.º 6
0
def run_epoch(epoch, num_steps, prefix):
    steps_left = num_steps
    while steps_left > 0:
        print prefix + " epoch: ", epoch, "steps_left: ", steps_left
        terminal = RLGlue.RL_episode(steps_left)
        if not terminal:
            RLGlue.RL_agent_message("episode_end")
        steps_left -= RLGlue.RL_num_steps()
Ejemplo n.º 7
0
def runEpisode(stepLimit, trial):
	global whichEpisode
	terminal=RLGlue.RL_episode(stepLimit)
	totalSteps=RLGlue.RL_num_steps()
	totalReward=RLGlue.RL_return()
	
	print "Experiment "+str(trial + 1)+"\t Episode "+str(whichEpisode)+"\t "+str(totalSteps)+ " steps \t" + str(totalReward) + " total reward\t " + str(terminal) + " natural end"
	
	whichEpisode=whichEpisode+1
Ejemplo n.º 8
0
def recordTrajectory():
    RLGlue.RL_start()
    trajectory = []
    while True:
        roat = RLGlue.RL_step()
        trajectory.append(roat)
        if roat.terminal:
            break
    return trajectory
def main():
    num_episodes = 10000000
    max_steps_per_episode = 50000
    RLGlue.RL_init()
    for episode in range(0,num_episodes):
        RLGlue.RL_episode(max_steps_per_episode)
        #print "Episode finished.", time.time()
        #print "Score: ", RLGlue.RL_return()
    
    RLGlue.RL_agent_message("save_data data.pkl");
Ejemplo n.º 10
0
def run_experiment(maxsteps=100, numeps=1):
    taskSpec = RLGlue.RL_init()
    for ep in range(numeps):
        terminal = RLGlue.RL_episode(maxsteps)
        totalSteps = RLGlue.RL_num_steps()
        totalReward = RLGlue.RL_return()
        print "Episode " + str(ep) + "\t " + str(
            totalSteps) + " steps \t" + str(
                totalReward) + " total reward\t " + str(
                    terminal) + " natural end"
    RLGlue.RL_cleanup()
Ejemplo n.º 11
0
def main():
    RLGlue.RL_init()
    is_testing = len(sys.argv) > 1 and sys.argv[1] == 'test'
    for epoch in xrange(NUM_EPOCHS):
        if is_testing:
            RLGlue.RL_agent_message("start_testing")
            run_epoch(epoch, TEST_LENGTH, "testing")
            RLGlue.RL_agent_message("finish_testing " + str(epoch))
        else:
            run_epoch(epoch, EPOCH_LENGTH, "training")
            RLGlue.RL_agent_message("finish_epoch " + str(epoch))
Ejemplo n.º 12
0
def runEpisode(stepLimit):
    # stepLimit of 0 implies no limit
    global whichEpisode
    terminal = RLGlue.RL_episode(stepLimit)
    totalSteps = RLGlue.RL_num_steps()
    totalReward = RLGlue.RL_return()

    print "Episode " + str(whichEpisode) + "\t " + str(
        totalSteps) + " steps \t" + str(totalReward) + " total reward\t "

    whichEpisode = whichEpisode + 1
Ejemplo n.º 13
0
def runEpisode(stepLimit):
    global whichEpisode
    RLGlue.RL_agent_message('reset')
    terminal = RLGlue.RL_episode(stepLimit)
    totalSteps = RLGlue.RL_num_steps()
    totalReward = RLGlue.RL_return()

    print("Episode " + str(whichEpisode)),
    print("\t " + str(totalSteps)),
    print(" steps \t" + str(totalReward)),
    print " total reward\t " + str(terminal) + " natural end"

    RLGlue.RL_agent_message('episode_end')

    whichEpisode = whichEpisode + 1
Ejemplo n.º 14
0
    def run(self):
        """
        Run the desired number of training epochs, a testing epoch
        is conducted after each training epoch.
        """
        RLGlue.RL_init()

        for epoch in range(1, self.num_epochs + 1):
            self.run_epoch(epoch, self.epoch_length, "training")
            RLGlue.RL_agent_message("finish_epoch " + str(epoch))

            if self.test_length > 0:
                RLGlue.RL_agent_message("start_testing")
                self.run_epoch(epoch, self.test_length, "testing")
                RLGlue.RL_agent_message("finish_testing " + str(epoch))
Ejemplo n.º 15
0
def runEpisode(is_learning_episode):
    global whichEpisode, learningEpisode

    RLGlue.RL_episode(0)
    totalSteps = RLGlue.RL_num_steps()
    totalReward = RLGlue.RL_return()

    whichEpisode += 1

    if is_learning_episode:
        learningEpisode += 1
        print("Episode " + str(learningEpisode) + "\t " + str(totalSteps) +
              " steps \t" + str(totalReward) + " total reward\t ")
    else:
        print("Evaluation ::\t " + str(totalSteps) + " steps \t" +
              str(totalReward) + " total reward\t ")
Ejemplo n.º 16
0
def demo():
    statistics = []
    episodeLength = 100
    #this_score = evaluateAgent()
    #printScore(0, this_score)
    #statistics.append(this_score)

    for i in range(1, 1000):

        RLGlue.RL_env_message("set-start-state " + S)
        RLGlue.RL_start()
        RLGlue.RL_episode(episodeLength)
        this_return = RLGlue.RL_return()
        print "%d\t\t%.2f" % (i, this_return)
        statistics.append(this_return)
        saveResultToCSV(statistics, "MyResults_sarsa1000_ver2.csv")
Ejemplo n.º 17
0
	def run_episode(self):
		""" Run a single episode """
		terminal = RLGlue.RL_episode(10)
		steps = RLGlue.RL_num_steps()
		reward = RLGlue.RL_return()
		
		#print "Episode %d\t %d steps\t reward: %d" % (episode_number, steps, reward)
		#print "Episode "+str(episode_number)+"\t "+str(totalSteps)+ " steps \t" + str(totalReward) + " total reward\t " + str(terminal) + " natural end"
	
		self.total_reward += reward
		
		# Update average
		x = self.total_reward / (self.episode_number + 1)
		self.results[self.episode_number] += (x - self.results[self.episode_number]) / self.instance
		
		self.episode_number += 1
Ejemplo n.º 18
0
    def agent_step(self, reward, observation):
        action = None

        self.window.erase()
        self.window.addstr('STATE: %s\n' % (observation.intArray))
        self.window.addstr('REWARD: %s\n' % (reward))
        self.window.addstr('HIT UP, DOWN, LEFT or RIGHT to move...\n')
        self.window.refresh()

        try:
            c = self.window.getch()
            if c == curses.KEY_UP:
                action = 'N'
            elif c == curses.KEY_DOWN:
                action = 'S'
            elif c == curses.KEY_LEFT:
                action = 'W'
            elif c == curses.KEY_RIGHT:
                action = 'E'

            self.window.refresh()

        except KeyboardInterrupt:
            RLGlue.RL_cleanup()

        a = Action()

        if action:
            a.charArray = [action]

        return a
Ejemplo n.º 19
0
	def __init__(self, episodes = 100):
		""" Initialize experiment """
		self.episodes = episodes
		self.results = [0] * episodes
		
		RLGlue.RL_init()
		self.has_inited = True
Ejemplo n.º 20
0
def run_epoch(epoch, num_steps, prefix):
    """ Run one 'epoch' of training or testing, where an epoch is defined
    by the number of steps executed.  Prints a progress report after
    every trial

    Arguments:
       num_steps - steps per epoch
       prefix - string to print ('training' or 'testing')

    """
    steps_left = num_steps
    while steps_left > 0:
        print prefix + " epoch: ", epoch, "steps_left: ", steps_left
        terminal = RLGlue.RL_episode(steps_left)
        if not terminal:
            RLGlue.RL_agent_message("episode_end")
        steps_left -= RLGlue.RL_num_steps()
Ejemplo n.º 21
0
    def start(self):
        print "\nExperiment starting!"
        taskSpec = RLGlue.RL_init()
        print taskSpec

        exp_params_for_agent = {}
        self.agent_params = self.message_agent(MessageType.exchange_params,
                                               exp_params_for_agent)

        # Keep overhead a bit lower by having functions inline
        def should_report():
            self.step % args.report_freq == 0

        def should_evaluate():
            step % args.eval_freq == 0 and step > self.agent_params[
                'learn_start']

        def should_save():
            step % args.save_freq == 0

        observ_action = RLGlue.RL_start()

        while self.step <= self.steps:
            observ_action_term = RLGlue.RL_step()

            # If game ends, start another
            if observ_action_term.terminal:
                # Not sure if we need to clean up after every episode, don't think so
                RLGlue.RL_start()
                self.n_train_episodes += 1

            if should_report():
                # TODO assert agent steps is equal
                print 'Steps: {}'.format(step)
                self.message_agent(MessageType.report)

            if should_evaluate():
                pass

            if should_save():
                pass

        print "A job well done."
        RLGlue.RL_cleanup()
Ejemplo n.º 22
0
	def run(self):
		""" Run the experiment """
		if self.has_inited:
			RLGlue.RL_cleanup()
		
		self.instance += 1
		self.total_reward = 0
		self.episode_number = 0
		
		for i in xrange(self.episodes):
			self.run_episode()
Ejemplo n.º 23
0
def evaluateAgent():
    sum = 0
    sum_of_squares = 0
    n = 10
    episodeLength = 100
    RLGlue.RL_agent_message("freeze learning")
    #print "FREEZE LEARNING"
    for i in range(0, n):
        RLGlue.RL_episode(100)
        this_return = RLGlue.RL_return()
        sum += this_return
        sum_of_squares += this_return**2

    mean = sum / n
    variance = (sum_of_squares - n * mean * mean) / (n - 1.0)
    standard_dev = math.sqrt(variance)

    RLGlue.RL_agent_message("unfreeze learning")
    #print "UNFREEZE LEARNING"
    return mean, standard_dev
Ejemplo n.º 24
0
    def __init__(self, **kwargs):
        """ Initialize experiment """
        for k, v in kwargs.items():
            setattr(self, k, v)

        self.returns = [0] * self.episodes
        self.steps = [0] * self.episodes

        self.po = ProgressOutput()

        RLGlue.RL_init()
        self.has_inited = True
Ejemplo n.º 25
0
def offlineDemo():
    this_score = evaluateAgent()
    printScore(0, this_score)
    theFile = open("results.csv", "w")
    theFile.close()
    if os.path.isfile("Archive.csv"):
        os.remove('Archive.csv')
    for i in range(0, 200):
        for j in range(0, 50):
            RLGlue.RL_episode(0)
            RLGlue.RL_env_message("stop print")
            if j % 20 == 0 and i > 0:
                RLGlue.RL_env_message("print")
            printScore((i + 1) * 50, this_score)
        this_score = evaluateAgent()
        printScore((i + 1) * 50, this_score)
        theFile = open("results.csv", "a")
        theFile.write("%d\t%.2f\t%.2f\n" %
                      ((i) * 50, this_score[0], this_score[1]))
        theFile.close()
    os.rename('results.csv', 'Archive.csv')
Ejemplo n.º 26
0
def runEpisode(is_learning_episode):
    global whichEpisode, learningEpisode

    RLGlue.RL_episode(0)
    totalSteps = RLGlue.RL_num_steps()
    totalReward = RLGlue.RL_return()

    whichEpisode += 1

    if is_learning_episode:
        learningEpisode += 1
        print "Episode " + str(learningEpisode) + "\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t "
    else:
        print "Evaluation ::\t " + str(totalSteps) + " steps \t" + str(totalReward) + " total reward\t "

# write reward in csv file
        list_csv = [str(learningEpisode), str(totalReward)]
        f_csv = open('reward.csv', 'a')
        writer_r = csv.writer(f_csv, lineterminator = '\n')
        writer_r.writerow(list_csv)
        f_csv.close()
Ejemplo n.º 27
0
def runEpisode(is_learning_episode):
    global whichEpisode, learningEpisode

    RLGlue.RL_episode(0)
    totalSteps = RLGlue.RL_num_steps()
    totalReward = RLGlue.RL_return()

    whichEpisode += 1

    if is_learning_episode:
        learningEpisode += 1
        logger.info("{},{},{},{}".format(
            dt.now().strftime("%Y-%m-%d_%H:%M:%S"), learningEpisode,
            totalSteps, totalReward))
        print "Episode " + str(learningEpisode) + "\t " + str(
            totalSteps) + " steps \t" + str(
                totalReward) + " total reward\t " + dt.now().strftime(
                    "%Y%m%d_%H%M%S")
    else:
        print "Evaluation ::\t " + str(totalSteps) + " steps \t" + str(
            totalReward) + " total reward\t "
Ejemplo n.º 28
0
def main():
    whichTrainingMDP = 1
    # Uncomment ONE of the following lines to choose your experiment
    #loadTetris(whichTrainingMDP); #put the desired parameter set in where MDP is in [0,19]
    #loadHelicopter(whichTrainingMDP); #put the desired parameter set in where MDP is in [0,9]
    #loadAcrobot(whichTrainingMDP); #put the desired parameter set in where MDP is in [1,49] #0 is standard acrobot
    #loadPolyathlon(whichTrainingMDP); #put the desired parameter set in where MDP is in [0,5]
    loadMario(True, True, 121, 0, 99, whichTrainingMDP)

    # and then,
    #		just run the experiment:
    RLGlue.RL_init()
    episodesToRun = 10
    totalSteps = 0
    for i in range(episodesToRun):
        RLGlue.RL_episode(20000)
        thisSteps = RLGlue.RL_num_steps()
        print "Total steps in episode %d is %d" % (i, thisSteps)
        totalSteps += thisSteps

    print "Total steps : %d\n" % (totalSteps)
    RLGlue.RL_cleanup()
Ejemplo n.º 29
0
def evaluateAgent():
    sum = 0
    sum_of_squares = 0
    this_return = 0
    mean = 0
    variance = 0
    n = 10

    RLGlue.RL_agent_message("freeze learning")
    for i in range(0, n):
        # We use a cutoff here in case the
        #policy is bad and will never end an episode
        RLGlue.RL_episode(5000)
        this_return = RLGlue.RL_return()
        sum += this_return
        sum_of_squares += this_return**2

    mean = sum / n
    variance = (sum_of_squares - n * mean * mean) / (n - 1.0)
    standard_dev = math.sqrt(variance)

    RLGlue.RL_agent_message("unfreeze learning")
    return mean, standard_dev
Ejemplo n.º 30
0
def offlineDemo():
    statistics = []
    this_score = evaluateAgent()
    printScore(0, this_score)
    statistics.append(this_score)

    for i in range(0, 20):
        for j in range(0, 25):
            RLGlue.RL_episode(0)
        this_score = evaluateAgent()
        printScore((i + 1) * 25, this_score)
        statistics.append(this_score)

    saveResultToCSV(statistics, "results.csv")
Ejemplo n.º 31
0
def rlterminate():
    RLGlue.doCallWithNoParams(RLGlue.Network.kRLTerm)