Ejemplo n.º 1
0
 def test_rollout_from_node_with_horizon_2(self):
     h = NTransitionsHorizon(2)
     self.model.transitions = [(1, None, 11.), (2, None, 10.)]
     self.assertEqual(self.tree._one_rollout_from_node(3, h), 20)
     self.assertEqual(len(self.model.transitions_history), 2)
     self.assertEqual(self.model.transitions_history[0][1], 3)
     self.assertEqual(self.model.transitions_history[1][1], 1)
Ejemplo n.º 2
0
 def test_rollout_from_node_with_horizon_1_is_reward(self):
     r = 3.2
     h = NTransitionsHorizon(1)
     self.model.transitions = [(1, 0, r)]
     self.assertEqual(self.tree._one_rollout_from_node(2, h), r)
     self.assertEqual(len(self.model.transitions_history), 1)
     self.assertEqual(self.model.transitions_history[0][1], 2)
Ejemplo n.º 3
0
 def test_rollout_from_node_multiple_rollouts(self):
     self.tree.rollout_it = 10
     self.model.transitions = [(1, 1, 11.)] * 10
     belief2 = np.zeros((10))
     belief2[1] = 1.
     self.tree.rollout_from_node(self.tree.root, NTransitionsHorizon(n=1))
     self.assertEqual(len(self.model.transitions_history), 10)
     self.assertEqual(self.tree.root.n_simulations, 1)
Ejemplo n.º 4
0
 def test_simulate_from_node_with_horizon_1(self):
     self.model.transitions = [(1, 1, 11.)]
     belief2 = np.zeros((10))
     belief2[1] = 1.
     self.model.successors = [belief2]
     self.tree.horizon_gen = NTransitionsHorizon.generator(self.model, n=1)
     self.tree.simulate_from_node(self.tree.root)
     self.assertEqual(len(self.model.transitions_history), 1)
     a = self.model.transitions_history[0][0]
     self.assertEqual(str(self.tree.root), "[{}: [1: []]]".format(a))
     self.assertEqual(self.tree.root.n_simulations, 1)
Ejemplo n.º 5
0
 def test_simulate_from_node_with_horizon_1(self):
     self.model.transitions = [(1, 1, 11.)]
     belief2 = np.zeros((10))
     belief2[1] = 1.
     self.model.successors = [belief2]
     self.tree.horizon_gen = NTransitionsHorizon.generator(self.model, n=1)
     self.tree.simulate_from_node(self.tree.root)
     self.assertEqual(len(self.model.transitions_history), 1)
     a = self.model.transitions_history[0][0]
     self.assertEqual(str(self.tree.root), "[{}: [1: []]]".format(a))
     self.assertEqual(self.tree.root.n_simulations, 1)
Ejemplo n.º 6
0
 def test_horizon_generator_is_one(self):
     # Default, from int
     h = self.policy.tree.horizon_gen()
     self.assertIsInstance(h, Horizon)
     self.assertEqual(h.n, 5)
     # Explicit generator
     policy = POMCPPolicyRunner(
         self.pomdp, iterations=20,
         horizon=NTransitionsHorizon.generator(self.pomdp, 13))
     h = policy.tree.horizon_gen()
     self.assertIsInstance(h, NTransitionsHorizon)
     self.assertEqual(h.n, 13)
Ejemplo n.º 7
0
    def test_simulate_from_node_with_horizon_3(self):
        self.model.discount = 1.
        belief = np.zeros((10))
        belief[1] = 1.

        def ret_1(exploration=None, relative_exploration=None):
            return 1

        self.tree.horizon_gen = NTransitionsHorizon.generator(self.model, n=3)
        # Always use action "1" at first
        self.tree.root.get_best_action = ret_1
        # First run
        self.model.transitions = [(1, 1, 11.), (2, 0, 13.), (4, 0, 0.)]
        self.model.successors = [belief]
        self.tree.simulate_from_node(self.tree.root)
        self.assertEqual(len(self.model.transitions_history), 3)
        self.assertEqual(str(self.tree.root), "[1: [1: []]]")
        self.assertEqual(self.tree.root.n_simulations, 1)
        self.assertEqual(self.tree.root._avg.total_value, 24.)
        self.assertEqual(self.tree.get_node([1])._avg.total_value, 24.)
        self.assertEqual(self.tree.get_node([1, 1])._avg.total_value, 13.)
        # Second run
        self.model.reset()
        self.model.transitions = [(1, 1, 3.), (2, 0, 1.), (4, 0, 5.)]
        self.model.successors = [belief]
        self.tree.simulate_from_node(self.tree.root)
        self.assertEqual(len(self.model.transitions_history), 3)
        a1 = self.model.transitions_history[1][0]
        self.assertEqual(str(self.tree.root),
                         "[1: [1: [{}: [0: []]]]]".format(a1))
        self.assertEqual(self.tree.root.n_simulations, 2)
        self.assertEqual(self.tree.root._avg.total_value, 33.)
        self.assertEqual(self.tree.get_node([1])._avg.total_value, 33.)
        self.assertEqual(self.tree.get_node([1, 1])._avg.total_value, 19.)
        self.assertEqual(self.tree.get_node([1, 1, a1])._avg.total_value, 6.)
        self.assertEqual(
            self.tree.get_node([1, 1, a1, 0])._avg.total_value, 5.)
        # Third run
        self.model.reset()
        self.model.transitions = [(1, 0, 2.), (2, 1, 3.), (4, 0, 4.)]
        self.model.successors = [belief]
        self.tree.simulate_from_node(self.tree.root)
        self.assertEqual(len(self.model.transitions_history), 3)
        self.assertEqual(str(self.tree.root),
                         "[1: [0: [], 1: [{}: [0: []]]]]".format(a1))
        self.assertEqual(self.tree.root.n_simulations, 3)
        self.assertEqual(self.tree.root._avg.total_value, 42.)
        self.assertEqual(self.tree.get_node([1])._avg.total_value, 42.)
        self.assertEqual(self.tree.get_node([1, 1])._avg.total_value, 19.)
        self.assertEqual(self.tree.get_node([1, 1, a1])._avg.total_value, 6.)
        self.assertEqual(
            self.tree.get_node([1, 1, a1, 0])._avg.total_value, 5.)
        self.assertEqual(self.tree.get_node([1, 0])._avg.total_value, 7.)
Ejemplo n.º 8
0
 def test_horizon_generator_is_one(self):
     # Default, from int
     h = self.policy.tree.horizon_gen()
     self.assertIsInstance(h, Horizon)
     self.assertEqual(h.n, 5)
     # Explicit generator
     policy = POMCPPolicyRunner(self.pomdp,
                                iterations=20,
                                horizon=NTransitionsHorizon.generator(
                                    self.pomdp, 13))
     h = policy.tree.horizon_gen()
     self.assertIsInstance(h, NTransitionsHorizon)
     self.assertEqual(h.n, 13)
Ejemplo n.º 9
0
    def test_simulate_from_node_with_horizon_3(self):
        self.model.discount = 1.
        belief = np.zeros((10))
        belief[1] = 1.

        def ret_1(exploration=None, relative_exploration=None):
            return 1

        self.tree.horizon_gen = NTransitionsHorizon.generator(self.model, n=3)
        # Always use action "1" at first
        self.tree.root.get_best_action = ret_1
        # First run
        self.model.transitions = [(1, 1, 11.), (2, 0, 13.), (4, 0, 0.)]
        self.model.successors = [belief]
        self.tree.simulate_from_node(self.tree.root)
        self.assertEqual(len(self.model.transitions_history), 3)
        self.assertEqual(str(self.tree.root), "[1: [1: []]]")
        self.assertEqual(self.tree.root.n_simulations, 1)
        self.assertEqual(self.tree.root._avg.total_value, 24.)
        self.assertEqual(self.tree.get_node([1])._avg.total_value, 24.)
        self.assertEqual(self.tree.get_node([1, 1])._avg.total_value, 13.)
        # Second run
        self.model.reset()
        self.model.transitions = [(1, 1, 3.), (2, 0, 1.), (4, 0, 5.)]
        self.model.successors = [belief]
        self.tree.simulate_from_node(self.tree.root)
        self.assertEqual(len(self.model.transitions_history), 3)
        a1 = self.model.transitions_history[1][0]
        self.assertEqual(str(self.tree.root),
                         "[1: [1: [{}: [0: []]]]]".format(a1))
        self.assertEqual(self.tree.root.n_simulations, 2)
        self.assertEqual(self.tree.root._avg.total_value, 33.)
        self.assertEqual(self.tree.get_node([1])._avg.total_value, 33.)
        self.assertEqual(self.tree.get_node([1, 1])._avg.total_value, 19.)
        self.assertEqual(self.tree.get_node([1, 1, a1])._avg.total_value, 6.)
        self.assertEqual(self.tree.get_node([1, 1, a1, 0])._avg.total_value, 5.)
        # Third run
        self.model.reset()
        self.model.transitions = [(1, 0, 2.), (2, 1, 3.), (4, 0, 4.)]
        self.model.successors = [belief]
        self.tree.simulate_from_node(self.tree.root)
        self.assertEqual(len(self.model.transitions_history), 3)
        self.assertEqual(str(self.tree.root),
                         "[1: [0: [], 1: [{}: [0: []]]]]".format(a1))
        self.assertEqual(self.tree.root.n_simulations, 3)
        self.assertEqual(self.tree.root._avg.total_value, 42.)
        self.assertEqual(self.tree.get_node([1])._avg.total_value, 42.)
        self.assertEqual(self.tree.get_node([1, 1])._avg.total_value, 19.)
        self.assertEqual(self.tree.get_node([1, 1, a1])._avg.total_value, 6.)
        self.assertEqual(self.tree.get_node([1, 1, a1, 0])._avg.total_value, 5.)
        self.assertEqual(self.tree.get_node([1, 0])._avg.total_value, 7.)
Ejemplo n.º 10
0
 def test_simulate_from_node_with_horizon_0(self):
     self.tree.horizon_gen = NTransitionsHorizon.generator(self.model, n=0)
     self.tree.simulate_from_node(self.tree.root)
     self.assertEqual(len(self.model.transitions_history), 0)
     self.assertEqual(str(self.tree.root), "[]")
     self.assertEqual(self.tree.root.n_simulations, 0)
Ejemplo n.º 11
0
 def test_simulate_from_node_with_horizon_0(self):
     self.tree.horizon_gen = NTransitionsHorizon.generator(self.model, n=0)
     self.tree.simulate_from_node(self.tree.root)
     self.assertEqual(len(self.model.transitions_history), 0)
     self.assertEqual(str(self.tree.root), "[]")
     self.assertEqual(self.tree.root.n_simulations, 0)
Ejemplo n.º 12
0
 def test_rollout_from_node_with_horizon_0_is_0(self):
     h = NTransitionsHorizon(0)
     self.assertEqual(self.tree._one_rollout_from_node(1, h), 0)
     self.assertEqual(len(self.model.transitions_history), 0)