def test_possible_moves(self): self.game = Game() self.game.render() self.expect([[9, 13], [9, 14], [10, 14], [10, 15], [11, 15], [11, 16], [12, 16]]).move([10, 14]) self.expect([[21, 17], [22, 17], [22, 18], [23, 18], [23, 19], [24, 19], [24, 20]]).move([23, 18]) self.expect([[14, 23]]).move([14, 23]) self.expect([[26, 19], [27, 18]]).move([27, 18]) self.expect([[6, 10], [7, 10], [9, 13], [9, 14], [11, 15], [11, 16], [12, 16]]).move([9, 13]) self.expect([[18, 14], [18, 15], [21, 17], [22, 17], [24, 19], [24, 20], [26, 23], [31, 27], [32, 27]]).move([21, 17]) self.expect([[5, 9], [6, 9], [6, 10], [7, 10], [11, 15], [11, 16], [12, 16]]).move([6, 10]) self.expect([[17, 14], [18, 14], [18, 15], [24, 19], [24, 20], [25, 21], [26, 23], [31, 27], [32, 27]]).move([18, 14]) self.expect([[1, 6], [2, 6], [5, 9], [10, 15], [11, 15], [11, 16], [12, 16]]).move([2, 6]) self.expect([[14, 9], [22, 18], [24, 19], [24, 20], [25, 21], [26, 23], [31, 27], [32, 27]]).move([31, 27]) self.expect([[5, 9], [6, 9], [10, 15], [11, 15], [11, 16], [12, 16]]).move([11, 16]) self.expect([[14, 9], [22, 18], [24, 19], [24, 20], [25, 21], [26, 23], [27, 23]]).move([22, 18]) self.game.render() self.expect([[13, 22]]).move([13, 22]) self.expect([[22, 31]]).move( [22, 31] ) #double jump where 10-17 is also in a jumpable position if not for piece restriction self.expect([[14, 9], [18, 15], [24, 19], [24, 20], [25, 21], [25, 22], [27, 23], [30, 26]]).move([24, 19]) self.expect([[10, 17], [16, 23], [31, 24]]).move([31, 24]) self.expect([[24, 15]]).move([24, 15]) self.expect([[15, 22]]).move([15, 22]) self.expect([[25, 18]]).move([25, 18]) self.expect([[10, 17]]).move([10, 17]) self.expect([[18, 14], [18, 15], [28, 24], [29, 25], [30, 25], [30, 26], [32, 27]]).move([29, 25]) self.expect([[5, 9], [6, 9], [6, 10], [7, 10], [7, 11], [8, 11], [16, 19], [16, 20], [17, 21], [17, 22]]).move([17, 21]) self.expect([[18, 14], [18, 15], [25, 22], [28, 24], [30, 26], [32, 27]]).move([30, 26]) self.expect([[21, 30]]).move([21, 30]) self.expect([[18, 14], [18, 15], [26, 22], [26, 23], [28, 24], [32, 27]]).move([18, 15]) self.expect([[30, 23]]).move([30, 23]) self.expect([[15, 10], [15, 11], [28, 24], [32, 27]]).move([15, 11]) self.expect([[8, 15]]).move([8, 15]) self.expect([[28, 24], [32, 27]]).move([28, 24]) self.expect([[3, 8], [4, 8], [5, 9], [6, 9], [6, 10], [7, 10], [7, 11], [15, 18], [15, 19], [16, 19], [16, 20], [23, 26], [23, 27], [23, 18], [23, 19]]).move([4, 8]) self.expect([[24, 19], [24, 20], [32, 27], [32, 28]]).move([24, 19]) self.expect([[15, 24]]).move([15, 24]) self.expect([[32, 27], [32, 28]]).move([32, 27]) self.expect([[23, 32], [24, 31]]).move([23, 32]) self.expect([])
def test_predict(self): game_root = Game() root = Node(game_root) model = MagicMock() prediction = [ np.array([[0.25]]), np.reshape(np.arange(0.001, 0.897, step=0.001), newshape=(1, 896)) ] model.predict.return_value = prediction action_encoder = ActionEncoder(DirectionResolver()) mcts = MCTS(root, config={ 'ALPHA': 0.8, 'CPUCT': 1, 'EPSILON': 0.2 }, model=model, state_encoder=StateEncoder(), action_encoder=action_encoder) value, probs, allowed_actions = mcts.predict_state_value(game_root) self.assertEqual(value, 0.25) self.assertCountEqual( allowed_actions, action_encoder.convert_moves_to_action_ids( game_root.get_possible_moves_from_current_player_perspective()) ) for idx, prob in enumerate(probs): if idx in allowed_actions: self.assertTrue(prob > 0.01) else: self.assertTrue(prob < np.exp(-40))
def test_id_white(self): game = Game() expected_id = np.zeros(33) expected_id[32] = 1 expected_id[:12] = np.ones(12) expected_id[20:32] = np.full(shape=12, fill_value=2) self.assertTrue(np.array_equal(game.id(), expected_id))
def test_id_black(self): game = Game() game = game.move(game.get_possible_moves()[0]) expected_id = np.zeros(33) expected_id[32] = 2 expected_id[:13] = np.ones(13) expected_id[8] = 0 expected_id[20:32] = np.full(shape=12, fill_value=2) self.assertTrue(np.array_equal(game.id(), expected_id))
def test_move_with_jumps(self): game = Game() game = game.move_with_additional_jumps([12, 16]) self.assertEqual(game.moves, [[12, 16]]) game = game.move_with_additional_jumps([23, 18]) self.assertEqual(game.moves, [[12, 16], [23, 18]]) game = game.move_with_additional_jumps([8, 12]) self.assertEqual(game.moves, [[12, 16], [23, 18], [8, 12]]) game = game.move_with_additional_jumps([27, 23]) self.assertEqual(game.moves, [[12, 16], [23, 18], [8, 12], [27, 23]]) game = game.move_with_additional_jumps([4, 8]) self.assertEqual(game.moves, [[12, 16], [23, 18], [8, 12], [27, 23], [4, 8]]) game = game.move_with_additional_jumps([18, 14]) self.assertEqual( game.moves, [[12, 16], [23, 18], [8, 12], [27, 23], [4, 8], [18, 14]]) game = game.move_with_additional_jumps([9, 18]) self.assertEqual(game.moves, [[12, 16], [23, 18], [8, 12], [27, 23], [4, 8], [18, 14], [9, 18], [18, 27]])
def test_evaluate_leaf(self): game_root = Game() root = Node(game_root) model = MagicMock() prediction = [ np.array([[0.25]]), np.reshape(np.arange(0.001, 0.897, step=0.001), newshape=(1, 896)) ] model.predict.return_value = prediction action_encoder = ActionEncoder(DirectionResolver()) mcts = MCTS(root, config={ 'ALPHA': 0.8, 'CPUCT': 1, 'EPSILON': 0.2 }, model=model, state_encoder=StateEncoder(), action_encoder=action_encoder) _, probs, _ = mcts.predict_state_value(game_root) value = mcts.evaluate_leaf(root) self.assertEqual(value, 0.25) self.assertEqual(len(root.edges), 7) self.assertEqual(root.edges[0].action, 8) self.assertEqual(root.edges[0].stats['P'], probs[8]) self.assertEqual(root.edges[1].action, 104) self.assertEqual(root.edges[1].stats['P'], probs[104])
class TestWinner(unittest.TestCase): def setUp(self): self.game = Game() def test_player_1_wins(self): self.make_non_winning_moves([[11, 15], [21, 17], [8, 11], [25, 21], [4, 8], [29, 25], [12, 16], [22, 18], [15, 22], [22, 29], [30, 25], [29, 22], [22, 13], [23, 18], [8, 12], [26, 23], [16, 20], [31, 26], [3, 8], [24, 19], [10, 14], [21, 17], [13, 22], [22, 31], [31, 24], [24, 15], [15, 22], [32, 27], [9, 13], [23, 18], [14, 23], [23, 32], [28, 24]]) self.move([20, 27]).expect(1) def test_player_2_wins(self): self.make_non_winning_moves([[10, 14], [22, 17], [9, 13], [17, 10], [6, 15], [23, 18], [15, 22], [25, 18], [13, 17], [21, 14], [5, 9], [14, 5], [1, 6], [5, 1], [11, 15], [1, 10], [10, 19], [12, 16], [19, 12], [7, 10], [26, 23], [10, 14], [18, 9], [3, 7], [12, 3], [3, 10], [2, 6], [9, 2], [4, 8], [2, 7], [8, 11]]) self.move([7, 16]).expect(2) def test_win_by_no_legal_moves(self): self.make_non_winning_moves([[11, 15], [22, 18], [15, 22], [25, 18], [12, 16], [18, 14], [9, 18], [23, 14], [10, 17], [21, 14], [5, 9], [14, 5], [6, 9], [29, 25], [9, 13], [25, 22], [2, 6], [22, 18], [13, 17], [27, 23], [17, 21], [24, 19], [8, 12], [30, 25], [21, 30], [28, 24], [4, 8], [18, 14], [6, 10], [32, 27], [10, 17], [23, 18], [16, 23], [23, 32], [24, 19], [30, 23], [23, 14], [31, 27], [32, 23]]) self.move([23, 16]).expect(1) def test_draw(self): self.make_non_winning_moves([[10, 14], [22, 17], [9, 13], [17, 10], [7, 14], [25, 22], [6, 10], [29, 25], [1, 6], [22, 18], [6, 9], [24, 19], [2, 6], [28, 24], [11, 16], [24, 20], [8, 11], [32, 28], [4, 8], [27, 24], [3, 7], [31, 27], [13, 17], [25, 22], [9, 13], [18, 9], [9, 2], [10, 14], [22, 18], [5, 9], [19, 15], [16, 19], [23, 16], [12, 19], [30, 25], [14, 23], [23, 32], [21, 14], [14, 5], [11, 18], [2, 11], [11, 4], [19, 23], [26, 19], [13, 17], [25, 21], [17, 22], [21, 17], [22, 25], [17, 14], [18, 22], [5, 1], [22, 26], [4, 8], [26, 31], [19, 15], [25, 30], [8, 11], [31, 26], [1, 6], [26, 23], [24, 19], [23, 16], [16, 7], [14, 10], [7, 14], [15, 10], [14, 7], [28, 24], [32, 28], [20, 16], [28, 19], [19, 12], [6, 9], [7, 10], [9, 13], [10, 7], [13, 9], [7, 3], [9, 6], [3, 7], [6, 1], [7, 11], [1, 6], [11, 8], [6, 9], [8, 11], [9, 6], [11, 8], [6, 9], [8, 11], [9, 6], [11, 8], [6, 9], [8, 11], [9, 6], [11, 8], [6, 9], [8, 11], [9, 6], [11, 8], [6, 9], [8, 11], [9, 6], [11, 8], [6, 9], [8, 11], [9, 6], [11, 8], [6, 9], [8, 11], [9, 6], [11, 8]]) def make_non_winning_moves(self, moves): for move in moves: self.move(move).expect(None) def move(self, move): self.game = self.game.move(move) return self def expect(self, value): self.assertIs(self.game.get_winner(), value)
def test_move_to_leaf(self): game = Game() root = Node(game) action_encoder = ActionEncoder(DirectionResolver()) mcts = MCTS(root, config={ 'ALPHA': 0.8, 'CPUCT': 1, 'EPSILON': 0.2 }, model=None, state_encoder=None, action_encoder=action_encoder) puct = MagicMock() mcts.puct = puct child1 = Node(game.move(game.get_possible_moves()[0])) child2 = Node(game.move(game.get_possible_moves()[1])) child3 = Node(game.move(game.get_possible_moves()[2])) edge1 = Edge( root, child1, 0.33, action_encoder.convert_move_to_action_id( game.get_possible_moves()[0])) edge2 = Edge( root, child2, 0.34, action_encoder.convert_move_to_action_id( game.get_possible_moves()[1])) edge3 = Edge( root, child3, 0.33, action_encoder.convert_move_to_action_id( game.get_possible_moves()[2])) root.edges.append(edge1) root.edges.append(edge2) root.edges.append(edge3) puct.puct.return_value = edge2 leaf, value, done, breadcrumbs = mcts.move_to_leaf() self.assertEquals(leaf, child2) self.assertEquals(value, 0) self.assertEquals(done, 0) self.assertEquals(False, 0) self.assertEquals(True, 1)
def test_act_tau_0(self): config = { 'ALPHA': 0.8, 'CPUCT': 1, 'EPSILON': 0.2, 'ACTION_SIZE': 32 * 4 * 7, 'MCTS_SIMULATIONS': 3 } action_encoder = ActionEncoder(DirectionResolver()) agent = Agent(model=None, action_encoder=action_encoder, state_encoder=StateEncoder(), name='player1', config=config) game_root = Game() root_node = Node(game_root) child1 = Node(game_root.move(game_root.get_possible_moves()[0])) edge1 = Edge(root_node, child1, 0.33, 8) edge1.stats['N'] = 10 edge1.stats['Q'] = 0.2 root_node.edges.append(edge1) child2 = Node(game_root.move(game_root.get_possible_moves()[1])) edge2 = Edge(root_node, child2, 0.5, 104) edge2.stats['N'] = 20 edge2.stats['Q'] = 0.5 root_node.edges.append(edge2) child3 = Node(game_root.move(game_root.get_possible_moves()[2])) edge3 = Edge(root_node, child3, 0.17, 9) edge3.stats['N'] = 15 edge3.stats['Q'] = 0.3 root_node.edges.append(edge3) agent.prepare_mcts_for_next_action = MagicMock() mcts = MagicMock() mcts.root = root_node mcts.evaluate_leaf.return_value = 0.7 agent.mcts = mcts mcts.move_to_leaf.return_value = (root_node, 0.5, False, []) action, pi, value = agent.act(game_root, tau=0) self.assertEqual(action, [9, 14]) self.assertEqual(value, 0.5) self.assertEqual(pi[8], 10/(10 + 20 + 15)) self.assertEqual(pi[9], 15/(10 + 20 + 15)) self.assertEqual(pi[8 + 3*32], 20/(10 + 20 + 15))
def test_integration(self): HIDDEN_CNN_LAYERS = [{ 'filters': 75, 'kernel_size': (4, 4) }, { 'filters': 75, 'kernel_size': (4, 4) }, { 'filters': 75, 'kernel_size': (4, 4) }, { 'filters': 75, 'kernel_size': (4, 4) }, { 'filters': 75, 'kernel_size': (4, 4) }, { 'filters': 75, 'kernel_size': (4, 4) }] model = Residual_CNN(0.0001, 0.1, (2, 4, 8), 32 * 4, HIDDEN_CNN_LAYERS, momentum=0.9) game_root = Game() root = Node(game_root) mcts = MCTS(root, config={ 'ALPHA': 0.8, 'CPUCT': 1, 'EPSILON': 0.2 }, model=model, state_encoder=StateEncoder(), action_encoder=ActionEncoder(DirectionResolver())) mcts.predict_state_value(game_root) mcts.evaluate_leaf(root)
def test_backfill(self): game_root = Game() root = Node(game_root) action_encoder = ActionEncoder(DirectionResolver()) position1 = game_root.move(game_root.get_possible_moves()[0]) child1 = Node(position1) edge1 = Edge( root, child1, 0.3, action_encoder.convert_move_to_action_id( game_root.get_possible_moves()[0])) position2 = position1.move(position1.get_possible_moves()[0]) child2 = Node(position2) edge2 = Edge( child1, child2, 0.2, action_encoder.convert_move_to_action_id( game_root.get_possible_moves()[0])) edge2.stats['N'] = 4 edge2.stats['W'] = 1 mcts = MCTS(root, config={ 'ALPHA': 0.8, 'CPUCT': 1, 'EPSILON': 0.2 }, model=None, state_encoder=None, action_encoder=action_encoder) mcts.backfill(child2, -1, [edge2, edge1]) self.assertEquals(edge2.stats['N'], 5) self.assertEquals(edge2.stats['W'], 2) self.assertEquals(edge2.stats['Q'], 2 / 5) self.assertEquals(edge1.stats['N'], 1) self.assertEquals(edge1.stats['W'], -1) self.assertEquals(edge1.stats['Q'], -1)
def test_puct_root_node(self): np.random.seed(1) puct = PUCT(0.8, 0.2, 1) game = Game() parent_node = Node(game) parent_node.edges.append( Edge(parent_node, Node(game.move(game.get_possible_moves()[0])), 0.14285715, 35)) parent_node.edges.append( Edge(parent_node, Node(game.move(game.get_possible_moves()[1])), 0.14285715, 36)) parent_node.edges.append( Edge(parent_node, Node(game.move(game.get_possible_moves()[2])), 0.14285715, 37)) parent_node.edges.append( Edge(parent_node, Node(game.move(game.get_possible_moves()[3])), 0.14285715, 38)) parent_node.edges.append( Edge(parent_node, Node(game.move(game.get_possible_moves()[4])), 0.14285715, 39)) parent_node.edges.append( Edge(parent_node, Node(game.move(game.get_possible_moves()[5])), 0.14285715, 40)) parent_node.edges.append( Edge(parent_node, Node(game.move(game.get_possible_moves()[6])), 0.14285715, 41)) simulation_edge = puct.puct(parent_node, is_root=True) self.assertEquals(simulation_edge.action, 35)
def test_moves_from_current_player_perspective_black(self): game = Game() game = game.move([9, 13]) moves = game.get_possible_moves_from_current_player_perspective() self.assertCountEqual(moves, [[9, 13], [9, 14], [10, 14], [10, 15], [11, 15], [11, 16], [12, 16]])
def test_puct_non_root_node(self): np.random.seed(1) puct = PUCT(0.8, 0.2, 1) game = Game() parent_node = Node(game) parent_node.edges.append( Edge(parent_node, Node(game.move(game.get_possible_moves()[0])), 0.14805108, 29)) parent_node.edges.append( Edge(parent_node, Node(game.move(game.get_possible_moves()[1])), 0.14307857, 35)) parent_node.edges.append( Edge(parent_node, Node(game.move(game.get_possible_moves()[2])), 0.14475949, 37)) parent_node.edges.append( Edge(parent_node, Node(game.move(game.get_possible_moves()[3])), 0.1387326, 38)) parent_node.edges.append( Edge(parent_node, Node(game.move(game.get_possible_moves()[4])), 0.14208362, 39)) parent_node.edges.append( Edge(parent_node, Node(game.move(game.get_possible_moves()[5])), 0.14188258, 40)) parent_node.edges.append( Edge(parent_node, Node(game.move(game.get_possible_moves()[6])), 0.14141211, 41)) simulation_edge = puct.puct(parent_node, is_root=False) self.assertEquals(simulation_edge.action, 29)
def test_puct_non_root_node_exploration(self): np.random.seed(1) game = Game() puct = PUCT(0.8, 0.2, 1) parent_node = Node(game) edge1 = Edge(parent_node, Node(game.move(game.get_possible_moves()[0])), 0.14805108, 29) edge1.stats['N'] = 100 parent_node.edges.append(edge1) edge2 = Edge(parent_node, Node(game.move(game.get_possible_moves()[1])), 0.14307857, 35) edge2.stats['N'] = 100 parent_node.edges.append(edge2) edge3 = Edge(parent_node, Node(game.move(game.get_possible_moves()[2])), 0.14475949, 37) edge3.stats['N'] = 100 parent_node.edges.append(edge3) edge4 = Edge(parent_node, Node(game.move(game.get_possible_moves()[3])), 0.1387326, 38) edge4.stats['N'] = 10 parent_node.edges.append(edge4) edge5 = Edge(parent_node, Node(game.move(game.get_possible_moves()[4])), 0.14208362, 39) edge5.stats['N'] = 100 parent_node.edges.append(edge5) edge6 = Edge(parent_node, Node(game.move(game.get_possible_moves()[5])), 0.14188258, 40) edge6.stats['N'] = 100 parent_node.edges.append(edge6) edge7 = Edge(parent_node, Node(game.move(game.get_possible_moves()[6])), 0.14141211, 41) edge7.stats['N'] = 100 parent_node.edges.append(edge7) simulation_edge = puct.puct(parent_node, is_root=False) self.assertEquals(simulation_edge.action, 38)
class TestPossibleMoves(unittest.TestCase): def test_possible_moves(self): self.game = Game() self.game.render() self.expect([[9, 13], [9, 14], [10, 14], [10, 15], [11, 15], [11, 16], [12, 16]]).move([10, 14]) self.expect([[21, 17], [22, 17], [22, 18], [23, 18], [23, 19], [24, 19], [24, 20]]).move([23, 18]) self.expect([[14, 23]]).move([14, 23]) self.expect([[26, 19], [27, 18]]).move([27, 18]) self.expect([[6, 10], [7, 10], [9, 13], [9, 14], [11, 15], [11, 16], [12, 16]]).move([9, 13]) self.expect([[18, 14], [18, 15], [21, 17], [22, 17], [24, 19], [24, 20], [26, 23], [31, 27], [32, 27]]).move([21, 17]) self.expect([[5, 9], [6, 9], [6, 10], [7, 10], [11, 15], [11, 16], [12, 16]]).move([6, 10]) self.expect([[17, 14], [18, 14], [18, 15], [24, 19], [24, 20], [25, 21], [26, 23], [31, 27], [32, 27]]).move([18, 14]) self.expect([[1, 6], [2, 6], [5, 9], [10, 15], [11, 15], [11, 16], [12, 16]]).move([2, 6]) self.expect([[14, 9], [22, 18], [24, 19], [24, 20], [25, 21], [26, 23], [31, 27], [32, 27]]).move([31, 27]) self.expect([[5, 9], [6, 9], [10, 15], [11, 15], [11, 16], [12, 16]]).move([11, 16]) self.expect([[14, 9], [22, 18], [24, 19], [24, 20], [25, 21], [26, 23], [27, 23]]).move([22, 18]) self.game.render() self.expect([[13, 22]]).move([13, 22]) self.expect([[22, 31]]).move( [22, 31] ) #double jump where 10-17 is also in a jumpable position if not for piece restriction self.expect([[14, 9], [18, 15], [24, 19], [24, 20], [25, 21], [25, 22], [27, 23], [30, 26]]).move([24, 19]) self.expect([[10, 17], [16, 23], [31, 24]]).move([31, 24]) self.expect([[24, 15]]).move([24, 15]) self.expect([[15, 22]]).move([15, 22]) self.expect([[25, 18]]).move([25, 18]) self.expect([[10, 17]]).move([10, 17]) self.expect([[18, 14], [18, 15], [28, 24], [29, 25], [30, 25], [30, 26], [32, 27]]).move([29, 25]) self.expect([[5, 9], [6, 9], [6, 10], [7, 10], [7, 11], [8, 11], [16, 19], [16, 20], [17, 21], [17, 22]]).move([17, 21]) self.expect([[18, 14], [18, 15], [25, 22], [28, 24], [30, 26], [32, 27]]).move([30, 26]) self.expect([[21, 30]]).move([21, 30]) self.expect([[18, 14], [18, 15], [26, 22], [26, 23], [28, 24], [32, 27]]).move([18, 15]) self.expect([[30, 23]]).move([30, 23]) self.expect([[15, 10], [15, 11], [28, 24], [32, 27]]).move([15, 11]) self.expect([[8, 15]]).move([8, 15]) self.expect([[28, 24], [32, 27]]).move([28, 24]) self.expect([[3, 8], [4, 8], [5, 9], [6, 9], [6, 10], [7, 10], [7, 11], [15, 18], [15, 19], [16, 19], [16, 20], [23, 26], [23, 27], [23, 18], [23, 19]]).move([4, 8]) self.expect([[24, 19], [24, 20], [32, 27], [32, 28]]).move([24, 19]) self.expect([[15, 24]]).move([15, 24]) self.expect([[32, 27], [32, 28]]).move([32, 27]) self.expect([[23, 32], [24, 31]]).move([23, 32]) self.expect([]) def move(self, move): self.game = self.game.move(move) def expect(self, expected_possible_moves): self.assertEqual(self.game.get_possible_moves(), expected_possible_moves) return self
def play_matches(self, player1, player2, episodes_count, turns_until_tau0, memory=None): scores = {player1.name: 0, "drawn": 0, player2.name: 0} for episode in range(episodes_count): logging.info("Running episode: " + str(episode)) if self.initial_state is None: state = Game() else: state = self.initial_state done = 0 turn = 0 player1.mcts = None player2.mcts = None player1_starts = random.randint(0, 1) * 2 - 1 if player1_starts == 1: players = { 1: { "agent": player1, "name": player1.name }, 2: { "agent": player2, "name": player2.name } } else: players = { 1: { "agent": player2, "name": player2.name }, 2: { "agent": player1, "name": player1.name } } while done == 0: turn = turn + 1 if turn < turns_until_tau0: action, pi, value = players[ state.whose_turn()]['agent'].act(state, 1) else: action, pi, value = players[ state.whose_turn()]['agent'].act(state, 0) if memory is not None: memory.commit_stmemory(state, pi) state = state.move(action) done = state.is_over() val = state.get_winner_for_learning() if state.is_over() == 1: if memory is not None: for move in memory.stmemory: if move['state'].whose_turn() == state.whose_turn( ) and val != 0: move['value'] = -1 elif val != 0: move['value'] = 1 else: move['value'] = 0 memory.commit_ltmemory() if val != 0: scores[players[state.opposite_turn()] ['name']] = scores[players[ state.opposite_turn()]['name']] + 1 else: scores['drawn'] = scores['drawn'] + 1 return scores, memory
class TestGameOver(unittest.TestCase): def setUp(self): self.game = Game() def test_new_game_not_over(self): self.expect(False) def test_win_by_capture(self): self.make_non_final_moves([[10, 14], [23, 18], [14, 23], [26, 19], [11, 15], [19, 10], [6, 15], [22, 18], [15, 22], [25, 18], [9, 13], [21, 17], [13, 22], [31, 26], [22, 31], [24, 19], [31, 24], [24, 15], [15, 22], [29, 25], [22, 29], [30, 25], [29, 22], [28, 24], [12, 16], [32, 27], [16, 20], [27, 23], [20, 27], [23, 18]]) self.move([22, 15]).expect(True) def test_win_by_no_legal_moves(self): self.make_non_final_moves([[11, 15], [22, 18], [15, 22], [25, 18], [12, 16], [18, 14], [9, 18], [23, 14], [10, 17], [21, 14], [5, 9], [14, 5], [6, 9], [29, 25], [9, 13], [25, 22], [2, 6], [22, 18], [13, 17], [27, 23], [17, 21], [24, 19], [8, 12], [30, 25], [21, 30], [28, 24], [4, 8], [18, 14], [6, 10], [32, 27], [10, 17], [23, 18], [16, 23], [23, 32], [24, 19], [30, 23], [23, 14], [31, 27], [32, 23]]) self.move([23, 16]).expect(True) def test_move_limit_draw(self): self.make_non_final_moves([[10, 14], [22, 17], [9, 13], [17, 10], [7, 14], [25, 22], [6, 10], [29, 25], [1, 6], [22, 18], [6, 9], [24, 19], [2, 6], [28, 24], [11, 16], [24, 20], [8, 11], [32, 28], [4, 8], [27, 24], [3, 7], [31, 27], [13, 17], [25, 22], [9, 13], [18, 9], [9, 2], [10, 14], [22, 18], [5, 9], [19, 15], [16, 19], [23, 16], [12, 19], [30, 25], [14, 23], [23, 32], [21, 14], [14, 5], [11, 18], [2, 11], [11, 4], [19, 23], [26, 19], [13, 17], [25, 21], [17, 22], [21, 17], [22, 25], [17, 14], [18, 22], [5, 1], [22, 26], [4, 8], [26, 31], [19, 15], [25, 30], [8, 11], [31, 26], [1, 6], [26, 23], [24, 19], [23, 16], [16, 7], [14, 10], [7, 14], [15, 10], [14, 7], [28, 24], [32, 28], [20, 16], [28, 19], [19, 12], [6, 9], [7, 10], [9, 13], [10, 7], [13, 9], [7, 3], [9, 6], [3, 7], [6, 1], [7, 11], [1, 6], [11, 8], [6, 9], [8, 11], [9, 6], [11, 8], [6, 9], [8, 11], [9, 6], [11, 8], [6, 9], [8, 11], [9, 6], [11, 8], [6, 9], [8, 11], [9, 6], [11, 8], [6, 9], [8, 11], [9, 6], [11, 8], [6, 9], [8, 11], [9, 6], [11, 8], [6, 9], [8, 11], [9, 6]]) self.move([11, 8]).expect(True) def make_non_final_moves(self, moves): for move in moves: self.move(move).expect(False) def move(self, move): self.game = self.game.move(move) return self def expect(self, value): self.assertIs(self.game.is_over(), value)
def setUp(self): self.game = Game()
return x1, x2, y1, y2 def row_and_column(self, position): row = (position - 1) // 4 column = (3 - ((position - 1) % 4)) * 2 if row % 2 == 1: column = column + 1 return row, column def read_agent(version): nn = Residual_CNN(config['REG_CONST'], config['LEARNING_RATE'], (2, 4, 8), config['ACTION_SIZE'], config['HIDDEN_CNN_LAYERS'], config['MOMENTUM']) m_tmp = nn.read(version) nn.model.set_weights(m_tmp.get_weights()) player = Agent(nn, ActionEncoder(DirectionResolver()), StateEncoder(), name='player' + str(version), config=config) return player if __name__ == "__main__": game = Game() player1 = read_agent(config['GUI_PLAYERS'][0]) player2 = read_agent(config['GUI_PLAYERS'][1]) gui = Gui(game, player1, player2)