def setUp(self): ParameterizedObject.print_level = WARNING TDAgent.action_selection = 'epsilon_greedy' TDAgent.initial_epsilon = 1.0 TDAgent.min_epsilon = 0.0 self.env = gridworld.GridWorld(grid=self.grid) self.setUp_agent() self.rli = LoggingRLI(name=self.__class__.__name__) self.rli.init(self.agent, self.env) self.rli.episodes(self.num_episodes, 100)
def main(): # Make a grid environment with the given grid. env = PendulumEZ() # Make a linear-tabular agent, i.e. an agent that takes a single # integer as the state and does linear updating agent = UniformTiledAgent(actions=env.actions, num_tilings=8, num_features=1024, tile_width=2*pi/16) # set up the reinforcement-learning interface with agent and env rli = LoggingRLI(name = "Pendulum Demo", rename_old_data = False) rli.add_step_variable('last value',agent_last_value) rli.init(agent,env) # Run the rli GUI with a GridWorldDisplay widget and a widget that # plots the length of each episode. rli.gui(PendulumGUI, lambda root,rli: VarPlotter(root,rli=rli, var='length', source='episode_data', title='Steps to Reach Goal'), lambda root,rli: VarPlotter(root,rli=rli, var='last value', source='step_data', title='Q Value')) #rli.episodes(2,100000) return rli
def main(): # Make a grid environment with the given grid. env = GridWorld() # Make a tabular agent. agent = TabularTDAgent(actions=env.actions) # set up the reinforcement-learning interface with agent and env rli = LoggingRLI(name="Gridworld Demo", rename_old_data=False) # add a step variable to record the value estimate of the last step rli.add_step_variable('last_value', dtype=float, fn=agent_last_value) # init the RLI with the agent and environment rli.init(agent, env) # Run the rli GUI with a GridWorldDisplay widget, a widget that # plots the length of each episode, and a widget that plots the # agent's estimate of the Q-value of the last state. rli.gui( GridWorldDisplay, lambda root, r: VarPlotter( root, var='length', title='Steps to Reach Goal', rli=r), lambda root, r: VarPlotter( root, var='last_value', title='Q Value', rli=r, source='step_data') ) #rli.episodes(100,10000) return rli
def proc(self, N, init_name, init_fn, grid_name, grid, num_episodes): # setup the (optimistic/pessimistic) parameters init_fn() # Make a grid environment with the given grid. env = GridWorld(grid=grid) # Make a tabular agent. agent = TabularTDAgent(actions=env.actions, num_features=env.num_states) # set up the reinforcement-learning interface with agent and env self.rli = rli = LoggingRLI(name=self.filestem(N), rename_old_data=False) rli.init(agent, env) # Run the rli GUI with a GridWorldDisplay widget and a widget that # plots the length of each episode. if self.gui: rli.gui(GridWorldDisplay, EpisodeVarPlotter('length')) else: rli.episodes(num_episodes, 10000) # save the data pkl.save(rli.episode_data, self.filestem(N) + '.pkl.gz')
def run(executor, follower, POMDPs, Robot, Recognizer, Instructions, pomdp_name='Grid'): ActionQ = Queue.Queue(1) ObservationQ = Queue.Queue(1) follower.NLUQueue = NLTKFrame.NLUQueue = executor.NLUQueue = Queue.Queue( 25) instructionQ = InstructionIDEntry.instructionQ = Queue.Queue(20) pomdpProxy = QueuePOMDPProxy(ActionQ, ObservationQ, str2meaning, False) pomdp = POMDPs[pomdp_name] pomdpProxy.setPOMDP(pomdp) executor.setRobot(Robot(pomdpProxy, pomdp_name, Recognizer)) executor.robot.NLUQueue = executor.NLUQueue rli = LoggingRLI(name='InstructionFollower', rename_old_data=False) #, gui_button_orient='vertical' rli.init(FollowerAgentProxy(ActionQ, ObservationQ), MarkovLocPOMDPWorld(pomdp=pomdp, crumbs=True)) Instructions = [ i for i in Instructions if '_%s%d_' % (pomdp_name, pomdp.PosSet) in i ] CorpusDirectors = {} for ri in Instructions: CorpusDirectors[ri.split('-')[1].split('_')[0]] = True CorpusDirectors = CorpusDirectors.keys() CorpusDirectors.sort() DirectorRow.button_texts = CorpusDirectors + ['INPUT'] EnvRow.button_texts = ['%s%d' % (pomdp_name, pomdp.PosSet)] thread.start_new_thread(follower.run, (Instructions, instructionQ)) rli.gui( InstructionIDInfo, NewColumn, ImageView, FaceGridWorldDisplay, NewColumn, NLTKFrame, NewColumn, ViewDescriptionList, SensationList, ActionConditionList, ActionList, NewColumn, ) return 0
def run(Executor, Robot, Routes, Pomdp, map_dir='Maps', Instructions=False): ActionQ = Queue.Queue(1) ObservationQ = Queue.Queue(1) if Instructions: instructionQ = InstructionIDEntry.instructionQ = Queue.Queue(20) controls = ( InstructionIDInfo, NewColumn, ) else: Executor.instructionQ = InstructionNav.instructionQ = RouteIDEntry.instructionQ = Queue.Queue( 20) controls = ( InstructionNav, RouteIDInfo, NewColumn, ) pomdpProxy = QueuePOMDPProxy(ActionQ, ObservationQ, str2meaning, False) Pomdp.map_dir = map_dir pomdpProxy.setPOMDP(Pomdp) Executor.setRobot(Robot(pomdpProxy, Pomdp.name)) rli = LoggingRLI(name='WheelchairAgent', rename_old_data=False) rli.init( AgentProxy(ActionQ, ObservationQ), MarkovLocPOMDPWorld(pomdp=Pomdp, crumbs=True, recolor_crumbs_on_pose_set=True)) #i couldn't find where these values were set in Matt's code, so I set it here, which is not good, because it eventually gets overwritten somewhere else rli.agent.new_id = True rli.agent.StartPlace = Executor.Start rli.agent.DestPlace = Executor.Target thread.start_new_thread(Executor.run, (Routes, )) guiColumns = controls + ( ImageView, FaceGridWorldDisplay, NewColumn, SensationList, ActionList, NewColumn, ) rli.gui(*guiColumns) return 0
class TestTDGridWorld(unittest.TestCase): ''' An abstract test class for RL agent classes, assumes subclasses will define self.agent to be various kinds of agents. ''' grid = ['S..', '...', '..G'] correct_reward = -3 correct_length = 4 num_episodes = 300 def setUp(self): ParameterizedObject.print_level = WARNING TDAgent.action_selection = 'epsilon_greedy' TDAgent.initial_epsilon = 1.0 TDAgent.min_epsilon = 0.0 self.env = gridworld.GridWorld(grid=self.grid) self.setUp_agent() self.rli = LoggingRLI(name=self.__class__.__name__) self.rli.init(self.agent, self.env) self.rli.episodes(self.num_episodes, 100) def testMain(self): self.myTestAgent() self.assertEqual(self.rli.episode_data['length'][-1], self.correct_length) self.assertEqual(self.rli.episode_data['reward'][-1], self.correct_reward) steps = self.rli.episode_data['length'][-1] self.agent.sim = None name = tmpnam() pkl.save(self.agent, name) new_agent = pkl.load(name) new_agent.sim = self.rli self.rli.agent = new_agent self.rli.episodes(2, 100) self.assertEqual(steps, self.rli.episode_data['length'][-1]) def myTestAgent(self): pass
################################################ # Run the an interaction between the agent and environment. # # Instantiate an agent and an environment agent = SOMTestAgent() env = SOMTestEnvironment() # Instantiate a Reinforcement Learning Interface. An RLI controls the # interaction between agent and environment, passing sensation and # reward from the environment to the agent, and actions from the agent # to the environment. In this experiment, the actions and reward are # meaningless, and only the sensations, 2D vectors, are important. # # The LoggingRLI class includes variable logging and GUI capabilities. rli = LoggingRLI(name='GNGvSOM_experiment') # Init the RLI rli.init(agent, env) # Run the RLI gui with two components, a SOM display and a GNG # display. The RLI gui takes a list of functions that take two # parameters, a the rli's GUI frame (root) and the rli object (rli), and return # instances of Tkinter.Frame that can be packed into the RLI's GUI frame. # rli.gui(lambda root, rli: SOM2DDisplay(root, rli.agent.som), lambda root, rli: GNG2DDisplay(root, gng=rli.agent.gng))