예제 #1
0
	def test_act_tau_0(self):
		config = {
			'ALPHA': 0.8,
			'CPUCT': 1,
			'EPSILON': 0.2,
			'ACTION_SIZE': 32 * 4 * 7,
			'MCTS_SIMULATIONS': 3
		}
		action_encoder = ActionEncoder(DirectionResolver())
		agent = Agent(model=None, action_encoder=action_encoder, state_encoder=StateEncoder(), name='player1', config=config)
		game_root = Game()
		root_node = Node(game_root)

		child1 = Node(game_root.move(game_root.get_possible_moves()[0]))
		edge1 = Edge(root_node, child1, 0.33, 8)
		edge1.stats['N'] = 10
		edge1.stats['Q'] = 0.2

		root_node.edges.append(edge1)

		child2 = Node(game_root.move(game_root.get_possible_moves()[1]))
		edge2 = Edge(root_node, child2, 0.5, 104)
		edge2.stats['N'] = 20
		edge2.stats['Q'] = 0.5
		root_node.edges.append(edge2)

		child3 = Node(game_root.move(game_root.get_possible_moves()[2]))
		edge3 = Edge(root_node, child3, 0.17, 9)
		edge3.stats['N'] = 15
		edge3.stats['Q'] = 0.3
		root_node.edges.append(edge3)

		agent.prepare_mcts_for_next_action = MagicMock()
		mcts = MagicMock()
		mcts.root = root_node
		mcts.evaluate_leaf.return_value = 0.7
		agent.mcts = mcts
		mcts.move_to_leaf.return_value = (root_node, 0.5, False, [])

		action, pi, value = agent.act(game_root, tau=0)

		self.assertEqual(action, [9, 14])
		self.assertEqual(value, 0.5)
		self.assertEqual(pi[8], 10/(10 + 20 + 15))
		self.assertEqual(pi[9], 15/(10 + 20 + 15))
		self.assertEqual(pi[8 + 3*32], 20/(10 + 20 + 15))
예제 #2
0
파일: test_PUCT.py 프로젝트: evgeniy44/mcts
    def test_puct_non_root_node(self):
        np.random.seed(1)
        puct = PUCT(0.8, 0.2, 1)
        game = Game()
        parent_node = Node(game)
        parent_node.edges.append(
            Edge(parent_node, Node(game.move(game.get_possible_moves()[0])),
                 0.14805108, 29))
        parent_node.edges.append(
            Edge(parent_node, Node(game.move(game.get_possible_moves()[1])),
                 0.14307857, 35))
        parent_node.edges.append(
            Edge(parent_node, Node(game.move(game.get_possible_moves()[2])),
                 0.14475949, 37))
        parent_node.edges.append(
            Edge(parent_node, Node(game.move(game.get_possible_moves()[3])),
                 0.1387326, 38))
        parent_node.edges.append(
            Edge(parent_node, Node(game.move(game.get_possible_moves()[4])),
                 0.14208362, 39))
        parent_node.edges.append(
            Edge(parent_node, Node(game.move(game.get_possible_moves()[5])),
                 0.14188258, 40))
        parent_node.edges.append(
            Edge(parent_node, Node(game.move(game.get_possible_moves()[6])),
                 0.14141211, 41))
        simulation_edge = puct.puct(parent_node, is_root=False)

        self.assertEquals(simulation_edge.action, 29)
예제 #3
0
파일: test_PUCT.py 프로젝트: evgeniy44/mcts
    def test_puct_root_node(self):
        np.random.seed(1)
        puct = PUCT(0.8, 0.2, 1)
        game = Game()
        parent_node = Node(game)
        parent_node.edges.append(
            Edge(parent_node, Node(game.move(game.get_possible_moves()[0])),
                 0.14285715, 35))
        parent_node.edges.append(
            Edge(parent_node, Node(game.move(game.get_possible_moves()[1])),
                 0.14285715, 36))
        parent_node.edges.append(
            Edge(parent_node, Node(game.move(game.get_possible_moves()[2])),
                 0.14285715, 37))
        parent_node.edges.append(
            Edge(parent_node, Node(game.move(game.get_possible_moves()[3])),
                 0.14285715, 38))
        parent_node.edges.append(
            Edge(parent_node, Node(game.move(game.get_possible_moves()[4])),
                 0.14285715, 39))
        parent_node.edges.append(
            Edge(parent_node, Node(game.move(game.get_possible_moves()[5])),
                 0.14285715, 40))
        parent_node.edges.append(
            Edge(parent_node, Node(game.move(game.get_possible_moves()[6])),
                 0.14285715, 41))
        simulation_edge = puct.puct(parent_node, is_root=True)

        self.assertEquals(simulation_edge.action, 35)
예제 #4
0
 def test_id_black(self):
     game = Game()
     game = game.move(game.get_possible_moves()[0])
     expected_id = np.zeros(33)
     expected_id[32] = 2
     expected_id[:13] = np.ones(13)
     expected_id[8] = 0
     expected_id[20:32] = np.full(shape=12, fill_value=2)
     self.assertTrue(np.array_equal(game.id(), expected_id))
예제 #5
0
    def test_move_to_leaf(self):
        game = Game()
        root = Node(game)
        action_encoder = ActionEncoder(DirectionResolver())
        mcts = MCTS(root,
                    config={
                        'ALPHA': 0.8,
                        'CPUCT': 1,
                        'EPSILON': 0.2
                    },
                    model=None,
                    state_encoder=None,
                    action_encoder=action_encoder)

        puct = MagicMock()
        mcts.puct = puct

        child1 = Node(game.move(game.get_possible_moves()[0]))
        child2 = Node(game.move(game.get_possible_moves()[1]))
        child3 = Node(game.move(game.get_possible_moves()[2]))
        edge1 = Edge(
            root, child1, 0.33,
            action_encoder.convert_move_to_action_id(
                game.get_possible_moves()[0]))
        edge2 = Edge(
            root, child2, 0.34,
            action_encoder.convert_move_to_action_id(
                game.get_possible_moves()[1]))
        edge3 = Edge(
            root, child3, 0.33,
            action_encoder.convert_move_to_action_id(
                game.get_possible_moves()[2]))
        root.edges.append(edge1)
        root.edges.append(edge2)
        root.edges.append(edge3)
        puct.puct.return_value = edge2

        leaf, value, done, breadcrumbs = mcts.move_to_leaf()

        self.assertEquals(leaf, child2)
        self.assertEquals(value, 0)
        self.assertEquals(done, 0)
        self.assertEquals(False, 0)
        self.assertEquals(True, 1)
예제 #6
0
    def test_backfill(self):
        game_root = Game()
        root = Node(game_root)
        action_encoder = ActionEncoder(DirectionResolver())
        position1 = game_root.move(game_root.get_possible_moves()[0])
        child1 = Node(position1)
        edge1 = Edge(
            root, child1, 0.3,
            action_encoder.convert_move_to_action_id(
                game_root.get_possible_moves()[0]))

        position2 = position1.move(position1.get_possible_moves()[0])
        child2 = Node(position2)
        edge2 = Edge(
            child1, child2, 0.2,
            action_encoder.convert_move_to_action_id(
                game_root.get_possible_moves()[0]))
        edge2.stats['N'] = 4
        edge2.stats['W'] = 1

        mcts = MCTS(root,
                    config={
                        'ALPHA': 0.8,
                        'CPUCT': 1,
                        'EPSILON': 0.2
                    },
                    model=None,
                    state_encoder=None,
                    action_encoder=action_encoder)

        mcts.backfill(child2, -1, [edge2, edge1])

        self.assertEquals(edge2.stats['N'], 5)
        self.assertEquals(edge2.stats['W'], 2)
        self.assertEquals(edge2.stats['Q'], 2 / 5)

        self.assertEquals(edge1.stats['N'], 1)
        self.assertEquals(edge1.stats['W'], -1)
        self.assertEquals(edge1.stats['Q'], -1)
예제 #7
0
파일: test_PUCT.py 프로젝트: evgeniy44/mcts
    def test_puct_non_root_node_exploration(self):
        np.random.seed(1)
        game = Game()
        puct = PUCT(0.8, 0.2, 1)
        parent_node = Node(game)
        edge1 = Edge(parent_node,
                     Node(game.move(game.get_possible_moves()[0])), 0.14805108,
                     29)
        edge1.stats['N'] = 100
        parent_node.edges.append(edge1)

        edge2 = Edge(parent_node,
                     Node(game.move(game.get_possible_moves()[1])), 0.14307857,
                     35)
        edge2.stats['N'] = 100
        parent_node.edges.append(edge2)

        edge3 = Edge(parent_node,
                     Node(game.move(game.get_possible_moves()[2])), 0.14475949,
                     37)
        edge3.stats['N'] = 100
        parent_node.edges.append(edge3)

        edge4 = Edge(parent_node,
                     Node(game.move(game.get_possible_moves()[3])), 0.1387326,
                     38)
        edge4.stats['N'] = 10
        parent_node.edges.append(edge4)

        edge5 = Edge(parent_node,
                     Node(game.move(game.get_possible_moves()[4])), 0.14208362,
                     39)
        edge5.stats['N'] = 100
        parent_node.edges.append(edge5)

        edge6 = Edge(parent_node,
                     Node(game.move(game.get_possible_moves()[5])), 0.14188258,
                     40)
        edge6.stats['N'] = 100
        parent_node.edges.append(edge6)

        edge7 = Edge(parent_node,
                     Node(game.move(game.get_possible_moves()[6])), 0.14141211,
                     41)
        edge7.stats['N'] = 100
        parent_node.edges.append(edge7)

        simulation_edge = puct.puct(parent_node, is_root=False)

        self.assertEquals(simulation_edge.action, 38)
예제 #8
0
class TestPossibleMoves(unittest.TestCase):
    def test_possible_moves(self):
        self.game = Game()
        self.game.render()

        self.expect([[9, 13], [9, 14], [10, 14], [10, 15], [11, 15], [11, 16],
                     [12, 16]]).move([10, 14])
        self.expect([[21, 17], [22, 17], [22, 18], [23, 18], [23, 19],
                     [24, 19], [24, 20]]).move([23, 18])
        self.expect([[14, 23]]).move([14, 23])
        self.expect([[26, 19], [27, 18]]).move([27, 18])
        self.expect([[6, 10], [7, 10], [9, 13], [9, 14], [11, 15], [11, 16],
                     [12, 16]]).move([9, 13])
        self.expect([[18, 14], [18, 15], [21, 17], [22, 17], [24, 19],
                     [24, 20], [26, 23], [31, 27], [32, 27]]).move([21, 17])
        self.expect([[5, 9], [6, 9], [6, 10], [7, 10], [11, 15], [11, 16],
                     [12, 16]]).move([6, 10])
        self.expect([[17, 14], [18, 14], [18, 15], [24, 19], [24, 20],
                     [25, 21], [26, 23], [31, 27], [32, 27]]).move([18, 14])
        self.expect([[1, 6], [2, 6], [5, 9], [10, 15], [11, 15], [11, 16],
                     [12, 16]]).move([2, 6])
        self.expect([[14, 9], [22, 18], [24, 19], [24, 20], [25, 21], [26, 23],
                     [31, 27], [32, 27]]).move([31, 27])
        self.expect([[5, 9], [6, 9], [10, 15], [11, 15], [11, 16],
                     [12, 16]]).move([11, 16])
        self.expect([[14, 9], [22, 18], [24, 19], [24, 20], [25, 21], [26, 23],
                     [27, 23]]).move([22, 18])
        self.game.render()
        self.expect([[13, 22]]).move([13, 22])

        self.expect([[22, 31]]).move(
            [22, 31]
        )  #double jump where 10-17 is also in a jumpable position if not for piece restriction
        self.expect([[14, 9], [18, 15], [24, 19], [24, 20], [25, 21], [25, 22],
                     [27, 23], [30, 26]]).move([24, 19])
        self.expect([[10, 17], [16, 23], [31, 24]]).move([31, 24])
        self.expect([[24, 15]]).move([24, 15])
        self.expect([[15, 22]]).move([15, 22])
        self.expect([[25, 18]]).move([25, 18])
        self.expect([[10, 17]]).move([10, 17])
        self.expect([[18, 14], [18, 15], [28, 24], [29, 25], [30, 25],
                     [30, 26], [32, 27]]).move([29, 25])
        self.expect([[5, 9], [6, 9], [6, 10], [7, 10], [7, 11], [8, 11],
                     [16, 19], [16, 20], [17, 21], [17, 22]]).move([17, 21])
        self.expect([[18, 14], [18, 15], [25, 22], [28, 24], [30, 26],
                     [32, 27]]).move([30, 26])
        self.expect([[21, 30]]).move([21, 30])
        self.expect([[18, 14], [18, 15], [26, 22], [26, 23], [28, 24],
                     [32, 27]]).move([18, 15])
        self.expect([[30, 23]]).move([30, 23])
        self.expect([[15, 10], [15, 11], [28, 24], [32, 27]]).move([15, 11])
        self.expect([[8, 15]]).move([8, 15])
        self.expect([[28, 24], [32, 27]]).move([28, 24])
        self.expect([[3, 8], [4, 8], [5, 9], [6, 9], [6, 10], [7, 10], [7, 11],
                     [15, 18], [15, 19], [16, 19], [16, 20], [23, 26],
                     [23, 27], [23, 18], [23, 19]]).move([4, 8])
        self.expect([[24, 19], [24, 20], [32, 27], [32, 28]]).move([24, 19])
        self.expect([[15, 24]]).move([15, 24])
        self.expect([[32, 27], [32, 28]]).move([32, 27])
        self.expect([[23, 32], [24, 31]]).move([23, 32])
        self.expect([])

    def move(self, move):
        self.game = self.game.move(move)

    def expect(self, expected_possible_moves):
        self.assertEqual(self.game.get_possible_moves(),
                         expected_possible_moves)
        return self