def test_act_tau_0(self): config = { 'ALPHA': 0.8, 'CPUCT': 1, 'EPSILON': 0.2, 'ACTION_SIZE': 32 * 4 * 7, 'MCTS_SIMULATIONS': 3 } action_encoder = ActionEncoder(DirectionResolver()) agent = Agent(model=None, action_encoder=action_encoder, state_encoder=StateEncoder(), name='player1', config=config) game_root = Game() root_node = Node(game_root) child1 = Node(game_root.move(game_root.get_possible_moves()[0])) edge1 = Edge(root_node, child1, 0.33, 8) edge1.stats['N'] = 10 edge1.stats['Q'] = 0.2 root_node.edges.append(edge1) child2 = Node(game_root.move(game_root.get_possible_moves()[1])) edge2 = Edge(root_node, child2, 0.5, 104) edge2.stats['N'] = 20 edge2.stats['Q'] = 0.5 root_node.edges.append(edge2) child3 = Node(game_root.move(game_root.get_possible_moves()[2])) edge3 = Edge(root_node, child3, 0.17, 9) edge3.stats['N'] = 15 edge3.stats['Q'] = 0.3 root_node.edges.append(edge3) agent.prepare_mcts_for_next_action = MagicMock() mcts = MagicMock() mcts.root = root_node mcts.evaluate_leaf.return_value = 0.7 agent.mcts = mcts mcts.move_to_leaf.return_value = (root_node, 0.5, False, []) action, pi, value = agent.act(game_root, tau=0) self.assertEqual(action, [9, 14]) self.assertEqual(value, 0.5) self.assertEqual(pi[8], 10/(10 + 20 + 15)) self.assertEqual(pi[9], 15/(10 + 20 + 15)) self.assertEqual(pi[8 + 3*32], 20/(10 + 20 + 15))
def test_puct_non_root_node(self): np.random.seed(1) puct = PUCT(0.8, 0.2, 1) game = Game() parent_node = Node(game) parent_node.edges.append( Edge(parent_node, Node(game.move(game.get_possible_moves()[0])), 0.14805108, 29)) parent_node.edges.append( Edge(parent_node, Node(game.move(game.get_possible_moves()[1])), 0.14307857, 35)) parent_node.edges.append( Edge(parent_node, Node(game.move(game.get_possible_moves()[2])), 0.14475949, 37)) parent_node.edges.append( Edge(parent_node, Node(game.move(game.get_possible_moves()[3])), 0.1387326, 38)) parent_node.edges.append( Edge(parent_node, Node(game.move(game.get_possible_moves()[4])), 0.14208362, 39)) parent_node.edges.append( Edge(parent_node, Node(game.move(game.get_possible_moves()[5])), 0.14188258, 40)) parent_node.edges.append( Edge(parent_node, Node(game.move(game.get_possible_moves()[6])), 0.14141211, 41)) simulation_edge = puct.puct(parent_node, is_root=False) self.assertEquals(simulation_edge.action, 29)
def test_puct_root_node(self): np.random.seed(1) puct = PUCT(0.8, 0.2, 1) game = Game() parent_node = Node(game) parent_node.edges.append( Edge(parent_node, Node(game.move(game.get_possible_moves()[0])), 0.14285715, 35)) parent_node.edges.append( Edge(parent_node, Node(game.move(game.get_possible_moves()[1])), 0.14285715, 36)) parent_node.edges.append( Edge(parent_node, Node(game.move(game.get_possible_moves()[2])), 0.14285715, 37)) parent_node.edges.append( Edge(parent_node, Node(game.move(game.get_possible_moves()[3])), 0.14285715, 38)) parent_node.edges.append( Edge(parent_node, Node(game.move(game.get_possible_moves()[4])), 0.14285715, 39)) parent_node.edges.append( Edge(parent_node, Node(game.move(game.get_possible_moves()[5])), 0.14285715, 40)) parent_node.edges.append( Edge(parent_node, Node(game.move(game.get_possible_moves()[6])), 0.14285715, 41)) simulation_edge = puct.puct(parent_node, is_root=True) self.assertEquals(simulation_edge.action, 35)
def test_id_black(self): game = Game() game = game.move(game.get_possible_moves()[0]) expected_id = np.zeros(33) expected_id[32] = 2 expected_id[:13] = np.ones(13) expected_id[8] = 0 expected_id[20:32] = np.full(shape=12, fill_value=2) self.assertTrue(np.array_equal(game.id(), expected_id))
def test_move_to_leaf(self): game = Game() root = Node(game) action_encoder = ActionEncoder(DirectionResolver()) mcts = MCTS(root, config={ 'ALPHA': 0.8, 'CPUCT': 1, 'EPSILON': 0.2 }, model=None, state_encoder=None, action_encoder=action_encoder) puct = MagicMock() mcts.puct = puct child1 = Node(game.move(game.get_possible_moves()[0])) child2 = Node(game.move(game.get_possible_moves()[1])) child3 = Node(game.move(game.get_possible_moves()[2])) edge1 = Edge( root, child1, 0.33, action_encoder.convert_move_to_action_id( game.get_possible_moves()[0])) edge2 = Edge( root, child2, 0.34, action_encoder.convert_move_to_action_id( game.get_possible_moves()[1])) edge3 = Edge( root, child3, 0.33, action_encoder.convert_move_to_action_id( game.get_possible_moves()[2])) root.edges.append(edge1) root.edges.append(edge2) root.edges.append(edge3) puct.puct.return_value = edge2 leaf, value, done, breadcrumbs = mcts.move_to_leaf() self.assertEquals(leaf, child2) self.assertEquals(value, 0) self.assertEquals(done, 0) self.assertEquals(False, 0) self.assertEquals(True, 1)
def test_backfill(self): game_root = Game() root = Node(game_root) action_encoder = ActionEncoder(DirectionResolver()) position1 = game_root.move(game_root.get_possible_moves()[0]) child1 = Node(position1) edge1 = Edge( root, child1, 0.3, action_encoder.convert_move_to_action_id( game_root.get_possible_moves()[0])) position2 = position1.move(position1.get_possible_moves()[0]) child2 = Node(position2) edge2 = Edge( child1, child2, 0.2, action_encoder.convert_move_to_action_id( game_root.get_possible_moves()[0])) edge2.stats['N'] = 4 edge2.stats['W'] = 1 mcts = MCTS(root, config={ 'ALPHA': 0.8, 'CPUCT': 1, 'EPSILON': 0.2 }, model=None, state_encoder=None, action_encoder=action_encoder) mcts.backfill(child2, -1, [edge2, edge1]) self.assertEquals(edge2.stats['N'], 5) self.assertEquals(edge2.stats['W'], 2) self.assertEquals(edge2.stats['Q'], 2 / 5) self.assertEquals(edge1.stats['N'], 1) self.assertEquals(edge1.stats['W'], -1) self.assertEquals(edge1.stats['Q'], -1)
def test_puct_non_root_node_exploration(self): np.random.seed(1) game = Game() puct = PUCT(0.8, 0.2, 1) parent_node = Node(game) edge1 = Edge(parent_node, Node(game.move(game.get_possible_moves()[0])), 0.14805108, 29) edge1.stats['N'] = 100 parent_node.edges.append(edge1) edge2 = Edge(parent_node, Node(game.move(game.get_possible_moves()[1])), 0.14307857, 35) edge2.stats['N'] = 100 parent_node.edges.append(edge2) edge3 = Edge(parent_node, Node(game.move(game.get_possible_moves()[2])), 0.14475949, 37) edge3.stats['N'] = 100 parent_node.edges.append(edge3) edge4 = Edge(parent_node, Node(game.move(game.get_possible_moves()[3])), 0.1387326, 38) edge4.stats['N'] = 10 parent_node.edges.append(edge4) edge5 = Edge(parent_node, Node(game.move(game.get_possible_moves()[4])), 0.14208362, 39) edge5.stats['N'] = 100 parent_node.edges.append(edge5) edge6 = Edge(parent_node, Node(game.move(game.get_possible_moves()[5])), 0.14188258, 40) edge6.stats['N'] = 100 parent_node.edges.append(edge6) edge7 = Edge(parent_node, Node(game.move(game.get_possible_moves()[6])), 0.14141211, 41) edge7.stats['N'] = 100 parent_node.edges.append(edge7) simulation_edge = puct.puct(parent_node, is_root=False) self.assertEquals(simulation_edge.action, 38)
class TestPossibleMoves(unittest.TestCase): def test_possible_moves(self): self.game = Game() self.game.render() self.expect([[9, 13], [9, 14], [10, 14], [10, 15], [11, 15], [11, 16], [12, 16]]).move([10, 14]) self.expect([[21, 17], [22, 17], [22, 18], [23, 18], [23, 19], [24, 19], [24, 20]]).move([23, 18]) self.expect([[14, 23]]).move([14, 23]) self.expect([[26, 19], [27, 18]]).move([27, 18]) self.expect([[6, 10], [7, 10], [9, 13], [9, 14], [11, 15], [11, 16], [12, 16]]).move([9, 13]) self.expect([[18, 14], [18, 15], [21, 17], [22, 17], [24, 19], [24, 20], [26, 23], [31, 27], [32, 27]]).move([21, 17]) self.expect([[5, 9], [6, 9], [6, 10], [7, 10], [11, 15], [11, 16], [12, 16]]).move([6, 10]) self.expect([[17, 14], [18, 14], [18, 15], [24, 19], [24, 20], [25, 21], [26, 23], [31, 27], [32, 27]]).move([18, 14]) self.expect([[1, 6], [2, 6], [5, 9], [10, 15], [11, 15], [11, 16], [12, 16]]).move([2, 6]) self.expect([[14, 9], [22, 18], [24, 19], [24, 20], [25, 21], [26, 23], [31, 27], [32, 27]]).move([31, 27]) self.expect([[5, 9], [6, 9], [10, 15], [11, 15], [11, 16], [12, 16]]).move([11, 16]) self.expect([[14, 9], [22, 18], [24, 19], [24, 20], [25, 21], [26, 23], [27, 23]]).move([22, 18]) self.game.render() self.expect([[13, 22]]).move([13, 22]) self.expect([[22, 31]]).move( [22, 31] ) #double jump where 10-17 is also in a jumpable position if not for piece restriction self.expect([[14, 9], [18, 15], [24, 19], [24, 20], [25, 21], [25, 22], [27, 23], [30, 26]]).move([24, 19]) self.expect([[10, 17], [16, 23], [31, 24]]).move([31, 24]) self.expect([[24, 15]]).move([24, 15]) self.expect([[15, 22]]).move([15, 22]) self.expect([[25, 18]]).move([25, 18]) self.expect([[10, 17]]).move([10, 17]) self.expect([[18, 14], [18, 15], [28, 24], [29, 25], [30, 25], [30, 26], [32, 27]]).move([29, 25]) self.expect([[5, 9], [6, 9], [6, 10], [7, 10], [7, 11], [8, 11], [16, 19], [16, 20], [17, 21], [17, 22]]).move([17, 21]) self.expect([[18, 14], [18, 15], [25, 22], [28, 24], [30, 26], [32, 27]]).move([30, 26]) self.expect([[21, 30]]).move([21, 30]) self.expect([[18, 14], [18, 15], [26, 22], [26, 23], [28, 24], [32, 27]]).move([18, 15]) self.expect([[30, 23]]).move([30, 23]) self.expect([[15, 10], [15, 11], [28, 24], [32, 27]]).move([15, 11]) self.expect([[8, 15]]).move([8, 15]) self.expect([[28, 24], [32, 27]]).move([28, 24]) self.expect([[3, 8], [4, 8], [5, 9], [6, 9], [6, 10], [7, 10], [7, 11], [15, 18], [15, 19], [16, 19], [16, 20], [23, 26], [23, 27], [23, 18], [23, 19]]).move([4, 8]) self.expect([[24, 19], [24, 20], [32, 27], [32, 28]]).move([24, 19]) self.expect([[15, 24]]).move([15, 24]) self.expect([[32, 27], [32, 28]]).move([32, 27]) self.expect([[23, 32], [24, 31]]).move([23, 32]) self.expect([]) def move(self, move): self.game = self.game.move(move) def expect(self, expected_possible_moves): self.assertEqual(self.game.get_possible_moves(), expected_possible_moves) return self