def select_play(board, energy, mcts_tree, temperature, model_indicator, gpuid): start = datetime.datetime.now() for i in range(int(conf['MCTS_SIMULATIONS'] / conf['ENERGY'])): async_simulate2(mcts_tree, np.copy(board), model_indicator, energy, board[0, 0, 0, -1], gpuid) end = datetime.datetime.now() try: d = tree_depth(mcts_tree) logger.debug( "TIME PER MOVE: %s tree depth: %s 1st level children: %s - %s" % (end - start, d, len(mcts_tree['subtree']), gpuid)) except Exception as ex: logger.error(ex) if temperature == 1: total_n = sum(dic['count'] for dic in mcts_tree['subtree'].values()) moves = [] ps = [] for move, dic in mcts_tree['subtree'].items(): n = dic['count'] if not n: continue p = dic['count'] / float(total_n) moves.append(move) ps.append(p) selected_a = np.random.choice(moves, size=1, p=ps)[0] elif temperature == 0: _, _, selected_a = max((dic['count'], dic['mean_value'], a) for a, dic in mcts_tree['subtree'].items()) return selected_a
def test_nested_selected(self): model = self.model board = self.board tree = { 'count': 0, 'mean_value': 0, 'value': 0, 'parent': None, 'subtree': { 0: { 'count': 0, 'p': 1, 'value': 0, 'mean_value': 0, 'subtree': { 1: { 'count': 0, 'p': 0, 'mean_value': 0, 'value': 0, 'subtree': {}, }, 2: { 'count': 0, 'p': 1, 'mean_value': 0, 'value': 0, 'subtree': {}, } } }, 1: { 'count': 0, 'p': 0, 'mean_value': 0, 'value': 0, 'subtree': {}, } } } tree['subtree'][0]['parent'] = tree tree['subtree'][0]['subtree'][1]['parent'] = tree['subtree'][0] tree['subtree'][0]['subtree'][2]['parent'] = tree['subtree'][0] tree['subtree'][1]['parent'] = tree d = tree_depth(tree) assert d == 3 simulate(tree, board, model, mcts_batch_size=2, original_player=1) self.assertEqual(tree['subtree'][0]['count'], 2) self.assertEqual(tree['subtree'][0]['subtree'][1]['count'], 1) self.assertEqual(tree['subtree'][0]['subtree'][2]['count'], 1) self.assertEqual(tree['subtree'][1]['count'], 0) self.assertEqual(tree['subtree'][0]['value'], 2) self.assertEqual(tree['subtree'][0]['mean_value'], 1) self.assertEqual(tree['subtree'][1]['value'], 0)
def select_play(policy, board, mcts_simulations, mcts_tree, temperature, model): mask = legal_moves(board) policy = ma.masked_array(policy, mask=mask) start = datetime.datetime.now() index = mcts_decision(policy, board, mcts_simulations, mcts_tree, temperature, model) end = datetime.datetime.now() d = tree_depth(mcts_tree) # print("################TIME PER MOVE: %s tree depth: %s" % (end - start, d)) return index
def test_tree_depth(self): d = tree_depth(self.tree) self.assertEqual(d, 3)
def test_tree_depth(self): d = tree_depth(self.tree) assert d == 2
def test_model_evaluation_other_nested(self): tree = { 'count': 0, 'mean_value': 0, 'value': 0, 'parent': None, 'subtree': { 0: { 'count': 0, 'p': 1, 'value': 0, 'mean_value': 0, 'subtree': {}, }, 1: { 'count': 0, 'p': 0, 'mean_value': 0, 'value': 0, 'subtree': { 0: { 'count': 0, 'p': 0, 'mean_value': 0, 'value': 0, 'subtree': {}, }, 2: { 'count': 0, 'p': 1, 'mean_value': 0, 'value': 0, 'subtree': {}, } } } } } tree['subtree'][0]['parent'] = tree tree['subtree'][1]['parent'] = tree tree['subtree'][1]['subtree'][0]['parent'] = tree['subtree'][1] tree['subtree'][1]['subtree'][2]['parent'] = tree['subtree'][1] d = tree_depth(tree) assert d == 3 board = self.board size = conf['SIZE'] test_board1, player = game_init() make_play(0, 0, test_board1) test_board2, player = game_init() make_play(1, 0, test_board2) make_play(2, 0, test_board2) class DummyModel(object): def predict_on_batch(_, X): size = conf['SIZE'] board1 = X[0].reshape(1, size, size, 17) board2 = X[1].reshape(1, size, size, 17) self.assertTrue(np.array_equal(board1, test_board1)) self.assertTrue(np.array_equal(board2, test_board2)) batch_size = X.shape[0] policy = np.zeros((batch_size, size * size + 1), dtype=np.float32) policy[:, 0] = 1 value = np.zeros((batch_size, 1), dtype=np.float32) value[:] = 1 return policy, value model = DummyModel() simulate(tree, board, model, mcts_batch_size=2, original_player=1)