def main():

    # Setup MDP, Agents.
    mdp = FourRoomMDP(11, 11, goal_locs=[(11, 11)], gamma=0.9, step_cost=0.0)
    ql_agent = QLearningAgent(mdp.get_actions(), epsilon=0.2, alpha=0.4)
    viz = parse_args()

    # Choose viz type.
    viz = "learning"

    if viz == "value":
        # Run experiment and make plot.
        mdp.visualize_value()
    elif viz == "policy":
        # Viz policy
        value_iter = ValueIteration(mdp)
        value_iter.run_vi()
        policy = value_iter.policy
        mdp.visualize_policy(policy)
    elif viz == "agent":
        # Solve problem and show agent interaction.
        print("\n", str(ql_agent), "interacting with", str(mdp))
        run_single_agent_on_mdp(ql_agent, mdp, episodes=500, steps=200)
        mdp.visualize_agent(ql_agent)
    elif viz == "learning":
        # Run experiment and make plot.
        mdp.visualize_learning(ql_agent)
    elif viz == "interactive":
        mdp.visualize_interaction()
예제 #2
0
def main():
    # Setup MDP, Agents.
    mdp = FourRoomMDP(5, 5, goal_locs=[(5, 5)], gamma=0.99, step_cost=0.01)
    # mdp = make_grid_world_from_file("octogrid.txt", num_goals=12, randomize=False)
    ql_agent = QLearningAgent(mdp.get_actions(), epsilon=0.2, alpha=0.5) 
    rm_agent = RMaxAgent(mdp.get_actions())
    viz = parse_args()
    viz = "learning"

    if viz == "value":
        # Run experiment and make plot.
        mdp.visualize_value()
    elif viz == "policy":
        # Viz policy
        value_iter = ValueIteration(mdp)
        value_iter.run_vi()
        policy = value_iter.policy
        mdp.visualize_policy(policy)
    elif viz == "agent":
        # Solve problem and show agent interaction.
        print("\n", str(ql_agent), "interacting with", str(mdp))
        run_single_agent_on_mdp(ql_agent, mdp, episodes=500, steps=200)
        mdp.visualize_agent(ql_agent)
    elif viz == "learning":
        # Run experiment and make plot.
        mdp.visualize_learning(ql_agent)
예제 #3
0
def main():
    # Setup MDP, Agents.
    mdp = FourRoomMDP(9, 9, goal_locs=[(9, 9)], gamma=0.95)
    ql_agent = QLearnerAgent(mdp.get_actions())

    viz = parse_args()

    if viz == "value":
        # Run experiment and make plot.
        mdp.visualize_value()
    elif viz == "policy":
        # Viz policy
        vi = ValueIteration(mdp)
        vi.run_vi()
        policy = vi.policy
        mdp.visualize_policy(policy)
    elif viz == "agent":
        # Solve problem and show agent interaction.
        print "\n", str(ql_agent), "interacting with", str(mdp)
        run_single_agent_on_mdp(ql_agent, mdp, episodes=500, steps=200)
        mdp.visualize_agent(ql_agent)