def test_policy_iteration(self):
     grid = self.get_sample_grid()
     env = Environment(grid)
     planner = PolicyIterationPlanner(env)
     result = planner.plan()
     print("Policy Iteration")
     for r in result:
         print(r)
 def test_value_iteration(self):
     grid = self.get_sample_grid()
     env = Environment(grid)
     planner = ValuteIterationPlanner(env)
     result = planner.plan()
     print("Value Iteration")
     for r in result:
         print(r)
Exemple #3
0
 def test_run_environment(self):
     grid = self.get_sample_grid()
     env = Environment(grid)
     for i in range(100):
         state = env.reset()  # initialize agent position
         self.assertEqual(state.row, len(env.grid) - 1)
         self.assertEqual(state.column, 0)
         goal = False
         for t in range(10):
             action = random.choice(env.action_space)
             state, reward, done = env.step(action)
             self.assertTrue(0 <= state.row < len(env.grid))
             self.assertTrue(0 <= state.column < len(env.grid[0]))
             if done:
                 print("Episode {}: get reward {}, {} timesteps".format(
                     i, reward, t + 1))
                 goal = True
                 break
         if not goal:
             print("Episode {}: no reward".format(i))
Exemple #4
0
    def post(self):
        data = tornado.escape.json_decode(self.request.body)
        grid = data["grid"]
        plan_type = data["plan"]
        move_prob = 0.8  # default value

        try:
            move_prob = float(data["prob"])
        except ValueError:
            pass

        env = Environment(grid, move_prob=move_prob)
        if plan_type == "value":
            planner = ValuteIterationPlanner(env)
        elif plan_type == "policy":
            planner = PolicyIterationPlanner(env)

        result = planner.plan()
        planner.log.append(result)
        self.write({"log": planner.log})