Beispiel #1
0
def main():
    # Make a grid environment with the given grid.
    env = PendulumEZ()


    # Make a linear-tabular agent, i.e. an agent that takes a single
    # integer as the state and does linear updating
    agent = UniformTiledAgent(actions=env.actions,
                              num_tilings=8,
                              num_features=1024,
                              tile_width=2*pi/16)

    # set up the reinforcement-learning interface with agent and env
    rli = LoggingRLI(name = "Pendulum Demo",
                     rename_old_data = False)

    rli.add_step_variable('last value',agent_last_value)
    rli.init(agent,env)


    # Run the rli GUI with a GridWorldDisplay widget and a widget that
    # plots the length of each episode.
    rli.gui(PendulumGUI,
            lambda root,rli: VarPlotter(root,rli=rli,
                                        var='length',
                                        source='episode_data',
                                        title='Steps to Reach Goal'),
            lambda root,rli: VarPlotter(root,rli=rli,
                                        var='last value',
                                        source='step_data',
                                        title='Q Value'))
    #rli.episodes(2,100000)

    return rli
Beispiel #2
0
def main():
    # Make a grid environment with the given grid.
    env = GridWorld()

    # Make a tabular agent.
    agent = TabularTDAgent(actions=env.actions)

    # set up the reinforcement-learning interface with agent and env
    rli = LoggingRLI(name="Gridworld Demo", rename_old_data=False)

    # add a step variable to record the value estimate of the last step
    rli.add_step_variable('last_value', dtype=float, fn=agent_last_value)

    # init the RLI with the agent and environment
    rli.init(agent, env)

    # Run the rli GUI with a GridWorldDisplay widget, a widget that
    # plots the length of each episode, and a widget that plots the
    # agent's estimate of the Q-value of the last state.
    rli.gui(
        GridWorldDisplay, lambda root, r: VarPlotter(
            root, var='length', title='Steps to Reach Goal', rli=r),
        lambda root, r: VarPlotter(
            root, var='last_value', title='Q Value', rli=r, source='step_data')
    )

    #rli.episodes(100,10000)

    return rli