Exemple #1
0
def select_play(board, energy, mcts_tree, temperature, model_indicator, gpuid):
    start = datetime.datetime.now()
    for i in range(int(conf['MCTS_SIMULATIONS'] / conf['ENERGY'])):
        async_simulate2(mcts_tree, np.copy(board), model_indicator, energy,
                        board[0, 0, 0, -1], gpuid)
    end = datetime.datetime.now()
    try:
        d = tree_depth(mcts_tree)
        logger.debug(
            "TIME PER MOVE: %s   tree depth: %s    1st level children: %s - %s"
            % (end - start, d, len(mcts_tree['subtree']), gpuid))
    except Exception as ex:
        logger.error(ex)

    if temperature == 1:
        total_n = sum(dic['count'] for dic in mcts_tree['subtree'].values())
        moves = []
        ps = []
        for move, dic in mcts_tree['subtree'].items():
            n = dic['count']
            if not n:
                continue
            p = dic['count'] / float(total_n)
            moves.append(move)
            ps.append(p)
        selected_a = np.random.choice(moves, size=1, p=ps)[0]
    elif temperature == 0:
        _, _, selected_a = max((dic['count'], dic['mean_value'], a)
                               for a, dic in mcts_tree['subtree'].items())

    return selected_a
Exemple #2
0
    def test_nested_selected(self):
        model = self.model
        board = self.board

        tree = {
            'count': 0,
            'mean_value': 0,
            'value': 0,
            'parent': None,
            'subtree': {
                0: {
                    'count': 0,
                    'p': 1,
                    'value': 0,
                    'mean_value': 0,
                    'subtree': {
                        1: {
                            'count': 0,
                            'p': 0,
                            'mean_value': 0,
                            'value': 0,
                            'subtree': {},
                        },
                        2: {
                            'count': 0,
                            'p': 1,
                            'mean_value': 0,
                            'value': 0,
                            'subtree': {},
                        }
                    }
                },
                1: {
                    'count': 0,
                    'p': 0,
                    'mean_value': 0,
                    'value': 0,
                    'subtree': {},
                }
            }
        }
        tree['subtree'][0]['parent'] = tree
        tree['subtree'][0]['subtree'][1]['parent'] = tree['subtree'][0]
        tree['subtree'][0]['subtree'][2]['parent'] = tree['subtree'][0]
        tree['subtree'][1]['parent'] = tree

        d = tree_depth(tree)
        assert d == 3

        simulate(tree, board, model, mcts_batch_size=2, original_player=1)
        self.assertEqual(tree['subtree'][0]['count'], 2)
        self.assertEqual(tree['subtree'][0]['subtree'][1]['count'], 1)
        self.assertEqual(tree['subtree'][0]['subtree'][2]['count'], 1)
        self.assertEqual(tree['subtree'][1]['count'], 0)
        self.assertEqual(tree['subtree'][0]['value'], 2)
        self.assertEqual(tree['subtree'][0]['mean_value'], 1)
        self.assertEqual(tree['subtree'][1]['value'], 0)
Exemple #3
0
def select_play(policy, board, mcts_simulations, mcts_tree, temperature,
                model):
    mask = legal_moves(board)
    policy = ma.masked_array(policy, mask=mask)
    start = datetime.datetime.now()
    index = mcts_decision(policy, board, mcts_simulations, mcts_tree,
                          temperature, model)
    end = datetime.datetime.now()
    d = tree_depth(mcts_tree)
    # print("################TIME PER MOVE: %s   tree depth: %s" % (end - start, d))
    return index
Exemple #4
0
 def test_tree_depth(self):
     d = tree_depth(self.tree)
     self.assertEqual(d, 3)
Exemple #5
0
 def test_tree_depth(self):
     d = tree_depth(self.tree)
     assert d == 2
Exemple #6
0
    def test_model_evaluation_other_nested(self):
        tree = {
            'count': 0,
            'mean_value': 0,
            'value': 0,
            'parent': None,
            'subtree': {
                0: {
                    'count': 0,
                    'p': 1,
                    'value': 0,
                    'mean_value': 0,
                    'subtree': {},
                },
                1: {
                    'count': 0,
                    'p': 0,
                    'mean_value': 0,
                    'value': 0,
                    'subtree': {
                        0: {
                            'count': 0,
                            'p': 0,
                            'mean_value': 0,
                            'value': 0,
                            'subtree': {},
                        },
                        2: {
                            'count': 0,
                            'p': 1,
                            'mean_value': 0,
                            'value': 0,
                            'subtree': {},
                        }
                    }
                }
            }
        }
        tree['subtree'][0]['parent'] = tree
        tree['subtree'][1]['parent'] = tree
        tree['subtree'][1]['subtree'][0]['parent'] = tree['subtree'][1]
        tree['subtree'][1]['subtree'][2]['parent'] = tree['subtree'][1]

        d = tree_depth(tree)
        assert d == 3

        board = self.board
        size = conf['SIZE']

        test_board1, player = game_init()
        make_play(0, 0, test_board1)

        test_board2, player = game_init()
        make_play(1, 0, test_board2)
        make_play(2, 0, test_board2)

        class DummyModel(object):
            def predict_on_batch(_, X):
                size = conf['SIZE']
                board1 = X[0].reshape(1, size, size, 17)
                board2 = X[1].reshape(1, size, size, 17)
                self.assertTrue(np.array_equal(board1, test_board1))
                self.assertTrue(np.array_equal(board2, test_board2))
                batch_size = X.shape[0]
                policy = np.zeros((batch_size, size * size + 1),
                                  dtype=np.float32)
                policy[:, 0] = 1

                value = np.zeros((batch_size, 1), dtype=np.float32)
                value[:] = 1
                return policy, value

        model = DummyModel()

        simulate(tree, board, model, mcts_batch_size=2, original_player=1)