def add_data_to_move_buffer_with_8_symmetries(self, own, enemy, policy): for flip in [False, True]: for rot_right in range(4): own_saved, enemy_saved, policy_saved = own, enemy, policy.reshape((8, 8)) if flip: own_saved = flip_vertical(own_saved) enemy_saved = flip_vertical(enemy_saved) policy_saved = np.flipud(policy_saved) if rot_right: for _ in range(rot_right): own_saved = rotate90(own_saved) enemy_saved = rotate90(enemy_saved) policy_saved = np.rot90(policy_saved, k=-rot_right) self.moves.append([(own_saved, enemy_saved), list(policy_saved.reshape((64, )))])
async def expand_and_evaluate(self, env): """expand new leaf update var_p, return leaf_v :param ReversiEnv env: :return: leaf_v """ key = self.counter_key(env) another_side_key = self.another_side_counter_key(env) self.now_expanding.add(key) black, white = env.board.black, env.board.white # (di(p), v) = fθ(di(sL)) # rotation and flip. flip -> rot. is_flip_vertical = random() < 0.5 rotate_right_num = int(random() * 4) if is_flip_vertical: black, white = flip_vertical(black), flip_vertical(white) for i in range(rotate_right_num): black, white = rotate90(black), rotate90( white) # rotate90: rotate bitboard RIGHT 1 time black_ary = bit_to_array(black, 64).reshape((8, 8)) white_ary = bit_to_array(white, 64).reshape((8, 8)) state = [ black_ary, white_ary ] if env.next_player == Player.black else [white_ary, black_ary] future = await self.predict(np.array(state)) # type: Future await future leaf_p, leaf_v = future.result() # reverse rotate and flip about leaf_p if rotate_right_num > 0 or is_flip_vertical: # reverse rotation and flip. rot -> flip. leaf_p = leaf_p.reshape((8, 8)) if rotate_right_num > 0: leaf_p = np.rot90( leaf_p, k=rotate_right_num) # rot90: rotate matrix LEFT k times if is_flip_vertical: leaf_p = np.flipud(leaf_p) leaf_p = leaf_p.reshape((64, )) self.var_p[key] = leaf_p # P is value for next_player (black or white) self.var_p[another_side_key] = leaf_p self.expanded.add(key) self.now_expanding.remove(key) return float(leaf_v)