Esempio n. 1
0
    def agent_step(self, reward, observation):
        action = None

        self.window.erase()
        self.window.addstr('STATE: %s\n' % (observation.intArray))
        self.window.addstr('REWARD: %s\n' % (reward))
        self.window.addstr('HIT UP, DOWN, LEFT or RIGHT to move...\n')
        self.window.refresh()

        try:
            c = self.window.getch()
            if c == curses.KEY_UP:
                action = 'N'
            elif c == curses.KEY_DOWN:
                action = 'S'
            elif c == curses.KEY_LEFT:
                action = 'W'
            elif c == curses.KEY_RIGHT:
                action = 'E'

            self.window.refresh()

        except KeyboardInterrupt:
            RLGlue.RL_cleanup()

        a = Action()

        if action:
            a.charArray = [action]

        return a
Esempio n. 2
0
def run_experiment(maxsteps=100, numeps=1):
    taskSpec = RLGlue.RL_init()
    for ep in range(numeps):
        terminal = RLGlue.RL_episode(maxsteps)
        totalSteps = RLGlue.RL_num_steps()
        totalReward = RLGlue.RL_return()
        print "Episode " + str(ep) + "\t " + str(
            totalSteps) + " steps \t" + str(
                totalReward) + " total reward\t " + str(
                    terminal) + " natural end"
    RLGlue.RL_cleanup()
Esempio n. 3
0
	def run(self):
		""" Run the experiment """
		if self.has_inited:
			RLGlue.RL_cleanup()
		
		self.instance += 1
		self.total_reward = 0
		self.episode_number = 0
		
		for i in xrange(self.episodes):
			self.run_episode()
Esempio n. 4
0
    def start(self):
        print "\nExperiment starting!"
        taskSpec = RLGlue.RL_init()
        print taskSpec

        exp_params_for_agent = {}
        self.agent_params = self.message_agent(MessageType.exchange_params,
                                               exp_params_for_agent)

        # Keep overhead a bit lower by having functions inline
        def should_report():
            self.step % args.report_freq == 0

        def should_evaluate():
            step % args.eval_freq == 0 and step > self.agent_params[
                'learn_start']

        def should_save():
            step % args.save_freq == 0

        observ_action = RLGlue.RL_start()

        while self.step <= self.steps:
            observ_action_term = RLGlue.RL_step()

            # If game ends, start another
            if observ_action_term.terminal:
                # Not sure if we need to clean up after every episode, don't think so
                RLGlue.RL_start()
                self.n_train_episodes += 1

            if should_report():
                # TODO assert agent steps is equal
                print 'Steps: {}'.format(step)
                self.message_agent(MessageType.report)

            if should_evaluate():
                pass

            if should_save():
                pass

        print "A job well done."
        RLGlue.RL_cleanup()
Esempio n. 5
0
def main():
    whichTrainingMDP = 1
    # Uncomment ONE of the following lines to choose your experiment
    #loadTetris(whichTrainingMDP); #put the desired parameter set in where MDP is in [0,19]
    #loadHelicopter(whichTrainingMDP); #put the desired parameter set in where MDP is in [0,9]
    #loadAcrobot(whichTrainingMDP); #put the desired parameter set in where MDP is in [1,49] #0 is standard acrobot
    #loadPolyathlon(whichTrainingMDP); #put the desired parameter set in where MDP is in [0,5]
    loadMario(True, True, 121, 0, 99, whichTrainingMDP)

    # and then,
    #		just run the experiment:
    RLGlue.RL_init()
    episodesToRun = 10
    totalSteps = 0
    for i in range(episodesToRun):
        RLGlue.RL_episode(20000)
        thisSteps = RLGlue.RL_num_steps()
        print "Total steps in episode %d is %d" % (i, thisSteps)
        totalSteps += thisSteps

    print "Total steps : %d\n" % (totalSteps)
    RLGlue.RL_cleanup()
Esempio n. 6
0
    def run(self):
        """ Run the experiment """
        if self.has_inited:
            RLGlue.RL_cleanup()

        self.instance += 1
        self.total_reward = 0
        self.episode_number = 0

        self.po.reset()

        # Progress output:
        #if not self.quiet:
        #    print "Running experiment #%d with %d episodes..." % (self.instance, self.episodes),

        pad = len('%d' % self.episodes)
        fmt = '%' + str(pad) + 'd/%d'

        for i in xrange(self.episodes):
            self.run_episode()
            self.po.out(str(self))

        print
Esempio n. 7
0
		print str(experiment)
		
	
	# Store data to file
	env_name = RLGlue.RL_env_message('name')
	data_file = env_name + '_' + time.strftime('%Y-%m-%d_%H:%M:%S.dat')
	data_path = os.path.join(settings.results_dir, data_file)
	
	print
	print "Storing results into %s..." % (data_path),
	
	""" Save result data to file """
	f = open(data_path, 'w')
	
	f.write("# Settings:\n")
	for k in dir(settings):
		if k.startswith('__'):
			continue
		f.write("#   %s = %s\n" % (k, getattr(settings, k)))
	
	data = experiment.get_result_data()
	f.write(data)
	f.close()
	
	
	#experiment.save_result(data_path)
	print "Done!"
	
	RLGlue.RL_cleanup()

Esempio n. 8
0
def runEpisode(step_limit):
    global which_episode
    global total_win
    global total_draw
    global total_lose
    global ns_epoch
    global pcts_win
    global pcts_win_or_draw
    global pcts_lose

    which_episode += 1

    # ゲーム1回 開始
    terminal = RLGlue.RL_episode(step_limit)

    # 勝負がつくまでのステップ数と報酬を取得
    total_steps = RLGlue.RL_num_steps()
    total_reward = RLGlue.RL_return()

    # 今回の結果を表示
    r_win = 1.0
    r_draw = -0.5
    r_lose = -1.0

    if total_reward == r_win:
        total_win += 1
    elif total_reward == r_draw:
        total_draw += 1
    elif total_reward == r_lose:
        total_lose += 1

    print("Episode " + str(which_episode) + "\t " + str(total_steps) +
          " steps \t" + str(total_reward) + " total reward\t " +
          str(terminal) + " natural end")

    # 100回毎に勝敗を集計
    record_interval = 100

    if which_episode % record_interval == 0:
        line = 'Episode: {}, {} wins, {} draws, {} loses'.format(
            which_episode, total_win, total_draw, total_lose)
        print(
            '---------------------------------------------------------------')
        print(line)
        print(
            '---------------------------------------------------------------')

        # 集計結果をファイルに出力
        with open('result.txt', 'a') as f:
            f.writelines(line + '\n')

        ns_epoch.append(which_episode)
        pcts_win.append(float(total_win) / record_interval * 100)
        pcts_win_or_draw.append(
            float(total_win + total_draw) / record_interval * 100)
        pcts_lose.append(float(total_win) / record_interval * 100)

        total_win = 0
        total_draw = 0
        total_lose = 0

    # 1000回毎にモデルの書き出し
    model_write_interval = 200

    if which_episode % model_write_interval == 0:
        # 途中結果の書き出しの呼び出し
        RLGlue.RL_cleanup()