Esempio n. 1
0
    def setUp(self):
        ParameterizedObject.print_level = WARNING

        TDAgent.action_selection = 'epsilon_greedy'
        TDAgent.initial_epsilon = 1.0
        TDAgent.min_epsilon = 0.0

        self.env = gridworld.GridWorld(grid=self.grid)
        self.setUp_agent()
        self.rli = LoggingRLI(name=self.__class__.__name__)
        self.rli.init(self.agent, self.env)
        self.rli.episodes(self.num_episodes, 100)
Esempio n. 2
0
def main():
    # Make a grid environment with the given grid.
    env = PendulumEZ()


    # Make a linear-tabular agent, i.e. an agent that takes a single
    # integer as the state and does linear updating
    agent = UniformTiledAgent(actions=env.actions,
                              num_tilings=8,
                              num_features=1024,
                              tile_width=2*pi/16)

    # set up the reinforcement-learning interface with agent and env
    rli = LoggingRLI(name = "Pendulum Demo",
                     rename_old_data = False)

    rli.add_step_variable('last value',agent_last_value)
    rli.init(agent,env)


    # Run the rli GUI with a GridWorldDisplay widget and a widget that
    # plots the length of each episode.
    rli.gui(PendulumGUI,
            lambda root,rli: VarPlotter(root,rli=rli,
                                        var='length',
                                        source='episode_data',
                                        title='Steps to Reach Goal'),
            lambda root,rli: VarPlotter(root,rli=rli,
                                        var='last value',
                                        source='step_data',
                                        title='Q Value'))
    #rli.episodes(2,100000)

    return rli
Esempio n. 3
0
def main():
    # Make a grid environment with the given grid.
    env = GridWorld()

    # Make a tabular agent.
    agent = TabularTDAgent(actions=env.actions)

    # set up the reinforcement-learning interface with agent and env
    rli = LoggingRLI(name="Gridworld Demo", rename_old_data=False)

    # add a step variable to record the value estimate of the last step
    rli.add_step_variable('last_value', dtype=float, fn=agent_last_value)

    # init the RLI with the agent and environment
    rli.init(agent, env)

    # Run the rli GUI with a GridWorldDisplay widget, a widget that
    # plots the length of each episode, and a widget that plots the
    # agent's estimate of the Q-value of the last state.
    rli.gui(
        GridWorldDisplay, lambda root, r: VarPlotter(
            root, var='length', title='Steps to Reach Goal', rli=r),
        lambda root, r: VarPlotter(
            root, var='last_value', title='Q Value', rli=r, source='step_data')
    )

    #rli.episodes(100,10000)

    return rli
Esempio n. 4
0
    def proc(self, N, init_name, init_fn, grid_name, grid, num_episodes):

        # setup the (optimistic/pessimistic) parameters
        init_fn()

        # Make a grid environment with the given grid.
        env = GridWorld(grid=grid)

        # Make a tabular agent.
        agent = TabularTDAgent(actions=env.actions,
                               num_features=env.num_states)

        # set up the reinforcement-learning interface with agent and env
        self.rli = rli = LoggingRLI(name=self.filestem(N),
                                    rename_old_data=False)
        rli.init(agent, env)

        # Run the rli GUI with a GridWorldDisplay widget and a widget that
        # plots the length of each episode.
        if self.gui:
            rli.gui(GridWorldDisplay, EpisodeVarPlotter('length'))
        else:
            rli.episodes(num_episodes, 10000)

        # save the data
        pkl.save(rli.episode_data, self.filestem(N) + '.pkl.gz')
def run(executor,
        follower,
        POMDPs,
        Robot,
        Recognizer,
        Instructions,
        pomdp_name='Grid'):
    ActionQ = Queue.Queue(1)
    ObservationQ = Queue.Queue(1)
    follower.NLUQueue = NLTKFrame.NLUQueue = executor.NLUQueue = Queue.Queue(
        25)
    instructionQ = InstructionIDEntry.instructionQ = Queue.Queue(20)

    pomdpProxy = QueuePOMDPProxy(ActionQ, ObservationQ, str2meaning, False)
    pomdp = POMDPs[pomdp_name]
    pomdpProxy.setPOMDP(pomdp)
    executor.setRobot(Robot(pomdpProxy, pomdp_name, Recognizer))
    executor.robot.NLUQueue = executor.NLUQueue

    rli = LoggingRLI(name='InstructionFollower',
                     rename_old_data=False)  #, gui_button_orient='vertical'
    rli.init(FollowerAgentProxy(ActionQ, ObservationQ),
             MarkovLocPOMDPWorld(pomdp=pomdp, crumbs=True))
    Instructions = [
        i for i in Instructions if '_%s%d_' % (pomdp_name, pomdp.PosSet) in i
    ]
    CorpusDirectors = {}
    for ri in Instructions:
        CorpusDirectors[ri.split('-')[1].split('_')[0]] = True
    CorpusDirectors = CorpusDirectors.keys()
    CorpusDirectors.sort()
    DirectorRow.button_texts = CorpusDirectors + ['INPUT']
    EnvRow.button_texts = ['%s%d' % (pomdp_name, pomdp.PosSet)]
    thread.start_new_thread(follower.run, (Instructions, instructionQ))
    rli.gui(
        InstructionIDInfo,
        NewColumn,
        ImageView,
        FaceGridWorldDisplay,
        NewColumn,
        NLTKFrame,
        NewColumn,
        ViewDescriptionList,
        SensationList,
        ActionConditionList,
        ActionList,
        NewColumn,
    )
    return 0
Esempio n. 6
0
def run(Executor, Robot, Routes, Pomdp, map_dir='Maps', Instructions=False):
    ActionQ = Queue.Queue(1)
    ObservationQ = Queue.Queue(1)
    if Instructions:
        instructionQ = InstructionIDEntry.instructionQ = Queue.Queue(20)
        controls = (
            InstructionIDInfo,
            NewColumn,
        )
    else:
        Executor.instructionQ = InstructionNav.instructionQ = RouteIDEntry.instructionQ = Queue.Queue(
            20)
        controls = (
            InstructionNav,
            RouteIDInfo,
            NewColumn,
        )

    pomdpProxy = QueuePOMDPProxy(ActionQ, ObservationQ, str2meaning, False)
    Pomdp.map_dir = map_dir
    pomdpProxy.setPOMDP(Pomdp)
    Executor.setRobot(Robot(pomdpProxy, Pomdp.name))

    rli = LoggingRLI(name='WheelchairAgent', rename_old_data=False)
    rli.init(
        AgentProxy(ActionQ, ObservationQ),
        MarkovLocPOMDPWorld(pomdp=Pomdp,
                            crumbs=True,
                            recolor_crumbs_on_pose_set=True))

    #i couldn't find where these values were set in Matt's code, so I set it here, which is not good, because it eventually gets overwritten somewhere else
    rli.agent.new_id = True
    rli.agent.StartPlace = Executor.Start
    rli.agent.DestPlace = Executor.Target

    thread.start_new_thread(Executor.run, (Routes, ))

    guiColumns = controls + (
        ImageView,
        FaceGridWorldDisplay,
        NewColumn,
        SensationList,
        ActionList,
        NewColumn,
    )

    rli.gui(*guiColumns)
    return 0
Esempio n. 7
0
class TestTDGridWorld(unittest.TestCase):
    '''
    An abstract test class for RL agent classes, assumes subclasses
    will define self.agent to be various kinds of agents.
    '''
    grid = ['S..', '...', '..G']

    correct_reward = -3
    correct_length = 4
    num_episodes = 300

    def setUp(self):
        ParameterizedObject.print_level = WARNING

        TDAgent.action_selection = 'epsilon_greedy'
        TDAgent.initial_epsilon = 1.0
        TDAgent.min_epsilon = 0.0

        self.env = gridworld.GridWorld(grid=self.grid)
        self.setUp_agent()
        self.rli = LoggingRLI(name=self.__class__.__name__)
        self.rli.init(self.agent, self.env)
        self.rli.episodes(self.num_episodes, 100)

    def testMain(self):
        self.myTestAgent()

        self.assertEqual(self.rli.episode_data['length'][-1],
                         self.correct_length)
        self.assertEqual(self.rli.episode_data['reward'][-1],
                         self.correct_reward)

        steps = self.rli.episode_data['length'][-1]

        self.agent.sim = None
        name = tmpnam()
        pkl.save(self.agent, name)
        new_agent = pkl.load(name)
        new_agent.sim = self.rli
        self.rli.agent = new_agent
        self.rli.episodes(2, 100)
        self.assertEqual(steps, self.rli.episode_data['length'][-1])

    def myTestAgent(self):
        pass
Esempio n. 8
0

################################################
# Run the an interaction between the agent and environment.
#

# Instantiate an agent and an environment
agent = SOMTestAgent()
env = SOMTestEnvironment()

# Instantiate a Reinforcement Learning Interface.  An RLI controls the
# interaction between agent and environment, passing sensation and
# reward from the environment to the agent, and actions from the agent
# to the environment.  In this experiment, the actions and reward are
# meaningless, and only the sensations, 2D vectors, are important.
#
# The LoggingRLI class includes variable logging and GUI capabilities.

rli = LoggingRLI(name='GNGvSOM_experiment')

# Init the RLI
rli.init(agent, env)

# Run the RLI gui with two components, a SOM display and a GNG
# display.  The RLI gui takes a list of functions that take two
# parameters, a the rli's GUI frame (root) and the rli object (rli), and return
# instances of Tkinter.Frame that can be packed into the RLI's GUI frame.
#
rli.gui(lambda root, rli: SOM2DDisplay(root, rli.agent.som),
        lambda root, rli: GNG2DDisplay(root, gng=rli.agent.gng))