def main():

    # Setup MDP, Agents.
    mdp = FourRoomMDP(11, 11, goal_locs=[(11, 11)], gamma=0.9, step_cost=0.0)
    ql_agent = QLearningAgent(mdp.get_actions(), epsilon=0.2, alpha=0.4)
    viz = parse_args()

    # Choose viz type.
    viz = "learning"

    if viz == "value":
        # Run experiment and make plot.
        mdp.visualize_value()
    elif viz == "policy":
        # Viz policy
        value_iter = ValueIteration(mdp)
        value_iter.run_vi()
        policy = value_iter.policy
        mdp.visualize_policy(policy)
    elif viz == "agent":
        # Solve problem and show agent interaction.
        print("\n", str(ql_agent), "interacting with", str(mdp))
        run_single_agent_on_mdp(ql_agent, mdp, episodes=500, steps=200)
        mdp.visualize_agent(ql_agent)
    elif viz == "learning":
        # Run experiment and make plot.
        mdp.visualize_learning(ql_agent)
    elif viz == "interactive":
        mdp.visualize_interaction()
Exemple #2
0
    pass

parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
sys.path.insert(0, parent_dir)

from simple_rl.agents import QLearningAgent, RandomAgent, RMaxAgent
from simple_rl.planning import ValueIteration
from simple_rl.tasks import GridWorldMDP
from simple_rl.run_experiments import run_agents_on_mdp

# # Setup MDP.
# mdp = GridWorldMDP(width=6, height=6, init_loc=(1, 1), goal_locs=[(6, 6)])
#
# # Setup Agents.
# ql_agent = QLearningAgent(actions=mdp.get_actions())
# rand_agent = RandomAgent(actions=mdp.get_actions())
# rmax_agent = RMaxAgent(actions=mdp.get_actions(), horizon=3, s_a_threshold=1)
#
# # Run experiment and make plot.
# run_agents_on_mdp([ql_agent, rand_agent, rmax_agent], mdp, instances=5, episodes=100, steps=40, reset_at_terminal=True,
#                   verbose=False)

from simple_rl.tasks import FourRoomMDP
from simple_rl.tasks.grid_world import grid_visualizer

four_room_mdp = FourRoomMDP(9, 9, goal_locs=[(9, 9), (5, 4)], gamma=0.95)

# Run experiment and make plot.
# four_room_mdp.visualize_value()
four_room_mdp.visualize_interaction()
# four_room_mdp.visualize_policy()