Esempio n. 1
0
    def test_predict(self):
        game_root = Game()
        root = Node(game_root)
        model = MagicMock()

        prediction = [
            np.array([[0.25]]),
            np.reshape(np.arange(0.001, 0.897, step=0.001), newshape=(1, 896))
        ]
        model.predict.return_value = prediction

        action_encoder = ActionEncoder(DirectionResolver())
        mcts = MCTS(root,
                    config={
                        'ALPHA': 0.8,
                        'CPUCT': 1,
                        'EPSILON': 0.2
                    },
                    model=model,
                    state_encoder=StateEncoder(),
                    action_encoder=action_encoder)

        value, probs, allowed_actions = mcts.predict_state_value(game_root)

        self.assertEqual(value, 0.25)
        self.assertCountEqual(
            allowed_actions,
            action_encoder.convert_moves_to_action_ids(
                game_root.get_possible_moves_from_current_player_perspective())
        )
        for idx, prob in enumerate(probs):
            if idx in allowed_actions:
                self.assertTrue(prob > 0.01)
            else:
                self.assertTrue(prob < np.exp(-40))
Esempio n. 2
0
    def test_convert_to_one_hot_3(self):
        encoder = ActionEncoder(DirectionResolver())
        actual_action = encoder.convert_action_to_one_hot([1, 5])

        expected_action = np.zeros((1, 32 * 4))
        expected_action[0, 0] = 1
        self.assertTrue(
            np.array_equal(actual_action.toarray(), expected_action))
Esempio n. 3
0
    def test_evaluate_leaf(self):
        game_root = Game()
        root = Node(game_root)
        model = MagicMock()

        prediction = [
            np.array([[0.25]]),
            np.reshape(np.arange(0.001, 0.897, step=0.001), newshape=(1, 896))
        ]
        model.predict.return_value = prediction

        action_encoder = ActionEncoder(DirectionResolver())
        mcts = MCTS(root,
                    config={
                        'ALPHA': 0.8,
                        'CPUCT': 1,
                        'EPSILON': 0.2
                    },
                    model=model,
                    state_encoder=StateEncoder(),
                    action_encoder=action_encoder)
        _, probs, _ = mcts.predict_state_value(game_root)
        value = mcts.evaluate_leaf(root)
        self.assertEqual(value, 0.25)
        self.assertEqual(len(root.edges), 7)
        self.assertEqual(root.edges[0].action, 8)
        self.assertEqual(root.edges[0].stats['P'], probs[8])

        self.assertEqual(root.edges[1].action, 104)
        self.assertEqual(root.edges[1].stats['P'], probs[104])
Esempio n. 4
0
    def test_move_to_leaf(self):
        game = Game()
        root = Node(game)
        action_encoder = ActionEncoder(DirectionResolver())
        mcts = MCTS(root,
                    config={
                        'ALPHA': 0.8,
                        'CPUCT': 1,
                        'EPSILON': 0.2
                    },
                    model=None,
                    state_encoder=None,
                    action_encoder=action_encoder)

        puct = MagicMock()
        mcts.puct = puct

        child1 = Node(game.move(game.get_possible_moves()[0]))
        child2 = Node(game.move(game.get_possible_moves()[1]))
        child3 = Node(game.move(game.get_possible_moves()[2]))
        edge1 = Edge(
            root, child1, 0.33,
            action_encoder.convert_move_to_action_id(
                game.get_possible_moves()[0]))
        edge2 = Edge(
            root, child2, 0.34,
            action_encoder.convert_move_to_action_id(
                game.get_possible_moves()[1]))
        edge3 = Edge(
            root, child3, 0.33,
            action_encoder.convert_move_to_action_id(
                game.get_possible_moves()[2]))
        root.edges.append(edge1)
        root.edges.append(edge2)
        root.edges.append(edge3)
        puct.puct.return_value = edge2

        leaf, value, done, breadcrumbs = mcts.move_to_leaf()

        self.assertEquals(leaf, child2)
        self.assertEquals(value, 0)
        self.assertEquals(done, 0)
        self.assertEquals(False, 0)
        self.assertEquals(True, 1)
Esempio n. 5
0
def read_agent(version):
    nn = Residual_CNN(config['REG_CONST'], config['LEARNING_RATE'], (2, 4, 8),
                      config['ACTION_SIZE'], config['HIDDEN_CNN_LAYERS'],
                      config['MOMENTUM'])
    m_tmp = nn.read(version)
    nn.model.set_weights(m_tmp.get_weights())
    player = Agent(nn,
                   ActionEncoder(DirectionResolver()),
                   StateEncoder(),
                   name='player' + str(version),
                   config=config)
    return player
Esempio n. 6
0
    def test_backfill(self):
        game_root = Game()
        root = Node(game_root)
        action_encoder = ActionEncoder(DirectionResolver())
        position1 = game_root.move(game_root.get_possible_moves()[0])
        child1 = Node(position1)
        edge1 = Edge(
            root, child1, 0.3,
            action_encoder.convert_move_to_action_id(
                game_root.get_possible_moves()[0]))

        position2 = position1.move(position1.get_possible_moves()[0])
        child2 = Node(position2)
        edge2 = Edge(
            child1, child2, 0.2,
            action_encoder.convert_move_to_action_id(
                game_root.get_possible_moves()[0]))
        edge2.stats['N'] = 4
        edge2.stats['W'] = 1

        mcts = MCTS(root,
                    config={
                        'ALPHA': 0.8,
                        'CPUCT': 1,
                        'EPSILON': 0.2
                    },
                    model=None,
                    state_encoder=None,
                    action_encoder=action_encoder)

        mcts.backfill(child2, -1, [edge2, edge1])

        self.assertEquals(edge2.stats['N'], 5)
        self.assertEquals(edge2.stats['W'], 2)
        self.assertEquals(edge2.stats['Q'], 2 / 5)

        self.assertEquals(edge1.stats['N'], 1)
        self.assertEquals(edge1.stats['W'], -1)
        self.assertEquals(edge1.stats['Q'], -1)
Esempio n. 7
0
	def test_act_tau_0(self):
		config = {
			'ALPHA': 0.8,
			'CPUCT': 1,
			'EPSILON': 0.2,
			'ACTION_SIZE': 32 * 4 * 7,
			'MCTS_SIMULATIONS': 3
		}
		action_encoder = ActionEncoder(DirectionResolver())
		agent = Agent(model=None, action_encoder=action_encoder, state_encoder=StateEncoder(), name='player1', config=config)
		game_root = Game()
		root_node = Node(game_root)

		child1 = Node(game_root.move(game_root.get_possible_moves()[0]))
		edge1 = Edge(root_node, child1, 0.33, 8)
		edge1.stats['N'] = 10
		edge1.stats['Q'] = 0.2

		root_node.edges.append(edge1)

		child2 = Node(game_root.move(game_root.get_possible_moves()[1]))
		edge2 = Edge(root_node, child2, 0.5, 104)
		edge2.stats['N'] = 20
		edge2.stats['Q'] = 0.5
		root_node.edges.append(edge2)

		child3 = Node(game_root.move(game_root.get_possible_moves()[2]))
		edge3 = Edge(root_node, child3, 0.17, 9)
		edge3.stats['N'] = 15
		edge3.stats['Q'] = 0.3
		root_node.edges.append(edge3)

		agent.prepare_mcts_for_next_action = MagicMock()
		mcts = MagicMock()
		mcts.root = root_node
		mcts.evaluate_leaf.return_value = 0.7
		agent.mcts = mcts
		mcts.move_to_leaf.return_value = (root_node, 0.5, False, [])

		action, pi, value = agent.act(game_root, tau=0)

		self.assertEqual(action, [9, 14])
		self.assertEqual(value, 0.5)
		self.assertEqual(pi[8], 10/(10 + 20 + 15))
		self.assertEqual(pi[9], 15/(10 + 20 + 15))
		self.assertEqual(pi[8 + 3*32], 20/(10 + 20 + 15))
Esempio n. 8
0
    def test_integration(self):
        HIDDEN_CNN_LAYERS = [{
            'filters': 75,
            'kernel_size': (4, 4)
        }, {
            'filters': 75,
            'kernel_size': (4, 4)
        }, {
            'filters': 75,
            'kernel_size': (4, 4)
        }, {
            'filters': 75,
            'kernel_size': (4, 4)
        }, {
            'filters': 75,
            'kernel_size': (4, 4)
        }, {
            'filters': 75,
            'kernel_size': (4, 4)
        }]
        model = Residual_CNN(0.0001,
                             0.1, (2, 4, 8),
                             32 * 4,
                             HIDDEN_CNN_LAYERS,
                             momentum=0.9)
        game_root = Game()
        root = Node(game_root)
        mcts = MCTS(root,
                    config={
                        'ALPHA': 0.8,
                        'CPUCT': 1,
                        'EPSILON': 0.2
                    },
                    model=model,
                    state_encoder=StateEncoder(),
                    action_encoder=ActionEncoder(DirectionResolver()))

        mcts.predict_state_value(game_root)
        mcts.evaluate_leaf(root)
Esempio n. 9
0
                       config['ACTION_SIZE'], config['HIDDEN_CNN_LAYERS'],
                       config['MOMENTUM'])

if "INITIAL_MODEL_VERSION" in config:
    best_player_version = config["INITIAL_MODEL_VERSION"]
    logging.info('LOADING MODEL VERSION ' +
                 str(config["INITIAL_MODEL_VERSION"]) + '...')
    m_tmp = best_NN.read(best_player_version)
    current_NN.model.set_weights(m_tmp.get_weights())
    best_NN.model.set_weights(m_tmp.get_weights())
else:
    best_player_version = 0
    best_NN.model.set_weights(current_NN.model.get_weights())

current_player = Agent(current_NN,
                       ActionEncoder(DirectionResolver()),
                       StateEncoder(),
                       name='current_player',
                       config=config)
best_player = Agent(best_NN,
                    ActionEncoder(DirectionResolver()),
                    StateEncoder(),
                    name='best_player',
                    config=config)

iteration = 0

while 1:

    iteration += 1
    logging.info('ITERATION NUMBER ' + str(iteration))
Esempio n. 10
0
 def test_convert_action_id_to_true_position_and_direction_4(self):
     encoder = ActionEncoder(DirectionResolver())
     move = encoder.convert_action_id_to_true_position_and_direction(105, 2)
     self.assertEqual(move, (23, 2))
Esempio n. 11
0
 def test_convert_action_id_to_position_and_direction_2(self):
     encoder = ActionEncoder(DirectionResolver())
     move = encoder.convert_action_id_to_position_and_direction(105)
     self.assertEqual(move, (10, 4))
Esempio n. 12
0
 def test_convert_position_direction_distance_to_move_1(self):
     encoder = ActionEncoder(DirectionResolver())
     move = encoder.convert_direction_and_distance_to_move(10, 4, 1)
     self.assertEqual(move, [10, 15])
Esempio n. 13
0
 def test_convert_moves_to_action_ids(self):
     encoder = ActionEncoder(DirectionResolver())
     values = encoder.convert_moves_to_action_ids([[10, 17], [10, 15]])
     self.assertCountEqual(values, [9, 105])