def test_rollout_from_node_with_horizon_2(self): h = NTransitionsHorizon(2) self.model.transitions = [(1, None, 11.), (2, None, 10.)] self.assertEqual(self.tree._one_rollout_from_node(3, h), 20) self.assertEqual(len(self.model.transitions_history), 2) self.assertEqual(self.model.transitions_history[0][1], 3) self.assertEqual(self.model.transitions_history[1][1], 1)
def test_rollout_from_node_with_horizon_1_is_reward(self): r = 3.2 h = NTransitionsHorizon(1) self.model.transitions = [(1, 0, r)] self.assertEqual(self.tree._one_rollout_from_node(2, h), r) self.assertEqual(len(self.model.transitions_history), 1) self.assertEqual(self.model.transitions_history[0][1], 2)
def test_rollout_from_node_multiple_rollouts(self): self.tree.rollout_it = 10 self.model.transitions = [(1, 1, 11.)] * 10 belief2 = np.zeros((10)) belief2[1] = 1. self.tree.rollout_from_node(self.tree.root, NTransitionsHorizon(n=1)) self.assertEqual(len(self.model.transitions_history), 10) self.assertEqual(self.tree.root.n_simulations, 1)
def test_simulate_from_node_with_horizon_1(self): self.model.transitions = [(1, 1, 11.)] belief2 = np.zeros((10)) belief2[1] = 1. self.model.successors = [belief2] self.tree.horizon_gen = NTransitionsHorizon.generator(self.model, n=1) self.tree.simulate_from_node(self.tree.root) self.assertEqual(len(self.model.transitions_history), 1) a = self.model.transitions_history[0][0] self.assertEqual(str(self.tree.root), "[{}: [1: []]]".format(a)) self.assertEqual(self.tree.root.n_simulations, 1)
def test_horizon_generator_is_one(self): # Default, from int h = self.policy.tree.horizon_gen() self.assertIsInstance(h, Horizon) self.assertEqual(h.n, 5) # Explicit generator policy = POMCPPolicyRunner( self.pomdp, iterations=20, horizon=NTransitionsHorizon.generator(self.pomdp, 13)) h = policy.tree.horizon_gen() self.assertIsInstance(h, NTransitionsHorizon) self.assertEqual(h.n, 13)
def test_simulate_from_node_with_horizon_3(self): self.model.discount = 1. belief = np.zeros((10)) belief[1] = 1. def ret_1(exploration=None, relative_exploration=None): return 1 self.tree.horizon_gen = NTransitionsHorizon.generator(self.model, n=3) # Always use action "1" at first self.tree.root.get_best_action = ret_1 # First run self.model.transitions = [(1, 1, 11.), (2, 0, 13.), (4, 0, 0.)] self.model.successors = [belief] self.tree.simulate_from_node(self.tree.root) self.assertEqual(len(self.model.transitions_history), 3) self.assertEqual(str(self.tree.root), "[1: [1: []]]") self.assertEqual(self.tree.root.n_simulations, 1) self.assertEqual(self.tree.root._avg.total_value, 24.) self.assertEqual(self.tree.get_node([1])._avg.total_value, 24.) self.assertEqual(self.tree.get_node([1, 1])._avg.total_value, 13.) # Second run self.model.reset() self.model.transitions = [(1, 1, 3.), (2, 0, 1.), (4, 0, 5.)] self.model.successors = [belief] self.tree.simulate_from_node(self.tree.root) self.assertEqual(len(self.model.transitions_history), 3) a1 = self.model.transitions_history[1][0] self.assertEqual(str(self.tree.root), "[1: [1: [{}: [0: []]]]]".format(a1)) self.assertEqual(self.tree.root.n_simulations, 2) self.assertEqual(self.tree.root._avg.total_value, 33.) self.assertEqual(self.tree.get_node([1])._avg.total_value, 33.) self.assertEqual(self.tree.get_node([1, 1])._avg.total_value, 19.) self.assertEqual(self.tree.get_node([1, 1, a1])._avg.total_value, 6.) self.assertEqual( self.tree.get_node([1, 1, a1, 0])._avg.total_value, 5.) # Third run self.model.reset() self.model.transitions = [(1, 0, 2.), (2, 1, 3.), (4, 0, 4.)] self.model.successors = [belief] self.tree.simulate_from_node(self.tree.root) self.assertEqual(len(self.model.transitions_history), 3) self.assertEqual(str(self.tree.root), "[1: [0: [], 1: [{}: [0: []]]]]".format(a1)) self.assertEqual(self.tree.root.n_simulations, 3) self.assertEqual(self.tree.root._avg.total_value, 42.) self.assertEqual(self.tree.get_node([1])._avg.total_value, 42.) self.assertEqual(self.tree.get_node([1, 1])._avg.total_value, 19.) self.assertEqual(self.tree.get_node([1, 1, a1])._avg.total_value, 6.) self.assertEqual( self.tree.get_node([1, 1, a1, 0])._avg.total_value, 5.) self.assertEqual(self.tree.get_node([1, 0])._avg.total_value, 7.)
def test_horizon_generator_is_one(self): # Default, from int h = self.policy.tree.horizon_gen() self.assertIsInstance(h, Horizon) self.assertEqual(h.n, 5) # Explicit generator policy = POMCPPolicyRunner(self.pomdp, iterations=20, horizon=NTransitionsHorizon.generator( self.pomdp, 13)) h = policy.tree.horizon_gen() self.assertIsInstance(h, NTransitionsHorizon) self.assertEqual(h.n, 13)
def test_simulate_from_node_with_horizon_3(self): self.model.discount = 1. belief = np.zeros((10)) belief[1] = 1. def ret_1(exploration=None, relative_exploration=None): return 1 self.tree.horizon_gen = NTransitionsHorizon.generator(self.model, n=3) # Always use action "1" at first self.tree.root.get_best_action = ret_1 # First run self.model.transitions = [(1, 1, 11.), (2, 0, 13.), (4, 0, 0.)] self.model.successors = [belief] self.tree.simulate_from_node(self.tree.root) self.assertEqual(len(self.model.transitions_history), 3) self.assertEqual(str(self.tree.root), "[1: [1: []]]") self.assertEqual(self.tree.root.n_simulations, 1) self.assertEqual(self.tree.root._avg.total_value, 24.) self.assertEqual(self.tree.get_node([1])._avg.total_value, 24.) self.assertEqual(self.tree.get_node([1, 1])._avg.total_value, 13.) # Second run self.model.reset() self.model.transitions = [(1, 1, 3.), (2, 0, 1.), (4, 0, 5.)] self.model.successors = [belief] self.tree.simulate_from_node(self.tree.root) self.assertEqual(len(self.model.transitions_history), 3) a1 = self.model.transitions_history[1][0] self.assertEqual(str(self.tree.root), "[1: [1: [{}: [0: []]]]]".format(a1)) self.assertEqual(self.tree.root.n_simulations, 2) self.assertEqual(self.tree.root._avg.total_value, 33.) self.assertEqual(self.tree.get_node([1])._avg.total_value, 33.) self.assertEqual(self.tree.get_node([1, 1])._avg.total_value, 19.) self.assertEqual(self.tree.get_node([1, 1, a1])._avg.total_value, 6.) self.assertEqual(self.tree.get_node([1, 1, a1, 0])._avg.total_value, 5.) # Third run self.model.reset() self.model.transitions = [(1, 0, 2.), (2, 1, 3.), (4, 0, 4.)] self.model.successors = [belief] self.tree.simulate_from_node(self.tree.root) self.assertEqual(len(self.model.transitions_history), 3) self.assertEqual(str(self.tree.root), "[1: [0: [], 1: [{}: [0: []]]]]".format(a1)) self.assertEqual(self.tree.root.n_simulations, 3) self.assertEqual(self.tree.root._avg.total_value, 42.) self.assertEqual(self.tree.get_node([1])._avg.total_value, 42.) self.assertEqual(self.tree.get_node([1, 1])._avg.total_value, 19.) self.assertEqual(self.tree.get_node([1, 1, a1])._avg.total_value, 6.) self.assertEqual(self.tree.get_node([1, 1, a1, 0])._avg.total_value, 5.) self.assertEqual(self.tree.get_node([1, 0])._avg.total_value, 7.)
def test_simulate_from_node_with_horizon_0(self): self.tree.horizon_gen = NTransitionsHorizon.generator(self.model, n=0) self.tree.simulate_from_node(self.tree.root) self.assertEqual(len(self.model.transitions_history), 0) self.assertEqual(str(self.tree.root), "[]") self.assertEqual(self.tree.root.n_simulations, 0)
def test_rollout_from_node_with_horizon_0_is_0(self): h = NTransitionsHorizon(0) self.assertEqual(self.tree._one_rollout_from_node(1, h), 0) self.assertEqual(len(self.model.transitions_history), 0)