def process_zip(self, zip_file_name, data_file_name, game_list): tar_file = self.unzip_data(zip_file_name) zip_file = tarfile.open(self.data_dir + '/' + tar_file) name_list = zip_file.getnames() total_examples = self.num_total_examples(zip_file, game_list, name_list) shape = self.encoder.shape() feature_shape = np.insert(shape, 0, np.asarray([total_examples])) features = np.zeros(feature_shape) labels = np.zeros((total_examples, )) counter = 0 for index in game_list: name = name_list[index + 1] if not name.endswith('.sgf'): raise ValueError(name + ' is not a valid sgf') sgf_content = zip_file.extractfile(name).read() sgf = Sgf_game.from_string(sgf_content) game_state, first_move_done = self.get_handicap(sgf) for item in sgf.main_sequence_iter(): color, move_tuple = item.get_move() point = None if color is not None: if move_tuple is not None: row, col = move_tuple point = Point(row + 1, col + 1) move = Move.play(point) else: move = Move.pass_turn() if first_move_done and point is not None: features[counter] = self.encoder.encode(game_state) labels[counter] = self.encoder.encode_point(point) counter += 1 game_state = game_state.apply_move(move) first_move_done = True feature_file_base = self.data_dir + '/' + data_file_name + '_features_%d' label_file_base = self.data_dir + '/' + data_file_name + '_labels_%d' chunk = 0 # Due to files with large content, split up after chunksize chunksize = 1024 while features.shape[0] >= chunksize: feature_file = feature_file_base % chunk label_file = label_file_base % chunk chunk += 1 current_features, features = features[:chunksize], features[ chunksize:] current_labels, labels = labels[:chunksize], labels[chunksize:] np.save(feature_file, current_features) np.save(label_file, current_labels)
def process_zip(self, zip_file_name, data_file_name, game_list): tar_file = self.unzip_data(zip_file_name) zip_file = tarfile.open(self.data_dir + '/' + tar_file) name_list = zip_file.getnames() # Determine the total number of moves in all games in this zip file. total_examples = self.num_total_examples(zip_file, game_list, name_list) shape = self.encoder.shape() # Infer the shape of features and labels from the encoder we use feature_shape = np.insert(shape, 0, np.asarray([total_examples])) features = np.zeros(feature_shape) labels = np.zeros((total_examples,)) counter = 0 for index in game_list: name = name_list[index + 1] if not name.endswith('.sgf'): raise ValueError(name + ' is not a valid sgf') sgf_content = zip_file.extractfile(name).read() sgf = Sgf_game.from_string(sgf_content) # Read the SGF content as string, after extracting the zip file game_state, first_move_done = self.get_handicap(sgf) # Infer the initial game state by applying all handicap stones for item in sgf.main_sequence_iter(): # Iterate over all moves in the SGF file color, move_tuple = item.get_move() point = None if color is not None: if move_tuple is not None: # Read the coordinates of the stone to be played... row, col = move_tuple point = Point(row + 1, col + 1) move = Move.play(point) else: move = Move.pass_turn() # ... or pass, if there is none. if first_move_done and point is not None: features[counter] = self.encoder.encode(game_state) # We encode the current game state as features... labels[counter] = self.encoder.encode_point(point) # ... and the next move as label for the features counter += 1 game_state = game_state.apply_move(move) # Afterwards the move is applied to the board and we proceed with the next one first_move_done = True feature_file_base = self.data_dir + '/' + data_file_name + '_features_%d' label_file_base = self.data_dir + '/' + data_file_name + '_labels_%d' chunk = 0 # Due to files with large content, split up after chunksize chunksize = 1024 while features.shape[0] >= chunksize: # We process features and labels in chunks of size 1024. feature_file = feature_file_base % chunk label_file = label_file_base % chunk chunk += 1 current_features, features = features[:chunksize], features[chunksize:] current_labels, labels = labels[:chunksize], labels[chunksize:] # The current chunk is cut off from features and labels... np.save(feature_file, current_features) np.save(label_file, current_labels) # and then stored in a separate file.
def num_total_examples(self, zip_file, game_list, name_list): """ .tar.gzファイルで必要となる棋譜の着手の総数を取得 Parameters ---------- zip_file : str .tar.gzファイルのパス game_list : list 必要となる棋譜のインデックスのリスト name_list : list .tar.gzに入っているファイル名のリスト Returns ------- total_examples : int 結果となる着手の総数 """ total_examples = 0 # game_listの全ての棋譜インデックスについて着手を回す for index in game_list: name = name_list[index + 1] # 多分name_list[0]は特殊な何か if name.endswith('.sgf'): sgf_content = zip_file.extractfile(name).read() sgf = Sgf_game.from_string(sgf_content) game_state, first_move_done = self.get_handicap(sgf) num_moves = 0 for item in sgf.main_sequence_iter(): # 実際に再生は行わず,着手の数だけカウントしていく color, move = item.get_move() if color is not None: if first_move_done: num_moves += 1 first_move_done = True total_examples = total_examples + num_moves else: raise ValueError(name + ' is not a valid sgf') return total_examples
def num_total_examples(self, zip_file, game_list, name_list): total_examples = 0 for index in game_list: name = name_list[index + 1] if name.endswith('.sgf'): sgf_content = zip_file.extractfile(name).read() sgf = Sgf_game.from_string(sgf_content) game_state, first_move_done = self.get_handicap(sgf) num_moves = 0 for item in sgf.main_sequence_iter(): color, move = item.get_move() if color is not None: if first_move_done: num_moves += 1 first_move_done = True total_examples = total_examples + num_moves else: raise ValueError(name + ' is not a valid sgf') return total_examples
from dlgo.goboard import GameState, Move from dlgo.gotypes import Point from dlgo.utils import print_board from dlgo.gosgf.sgf import Sgf_game sgf_content = "(;GM[1]FF[4]SZ[9];B[ee];W[ef];B[ff];W[df];B[fe];W[fc];B[ec];W[gd];B[fb])" sgf_game = Sgf_game.from_string(sgf_content) game_state = GameState.new_game(19) for item in sgf_game.main_sequence_iter(): color, move_tuple = item.get_move() if color is not None and move_tuple is not None: row, col = move_tuple point = Point(row + 1, col + 1) move = Move.play(point) game_state = game_state.apply_move(move) print_board(game_state.board)
def process_zip(self, zip_file_name, data_file_name, game_list): """ .tar.gzファイルを解凍し, 必要なゲームだけ特徴量とラベルに変換して任意の名前で保存 1024の着手データごとに一つのファイルに保存する これにより,動的ロードによるメモリの節約ができる Parameters ---------- zip_file_name : str 解凍対象となるファイルパス data_file_name : str 特徴量とラベルの保存先パス game_list : list 解凍対象から選ばれるゲームのインデックスのリスト """ # ファイルを解凍し,必要な棋譜データの数を取得 tar_file = self.unzip_data(zip_file_name) zip_file = tarfile.open(self.data_dir + '/' + tar_file) name_list = zip_file.getnames() total_examples = self.num_total_examples(zip_file, game_list, name_list) # 空の特徴量とラベルを用意 shape = self.encoder.shape() feature_shape = np.insert(shape, 0, total_examples) features = np.zeros(feature_shape) labels = np.zeros((total_examples, )) # 必要な全てのゲームインデックスのゲームを再生しながら記録していく counter = 0 # 着手数 for index in game_list: # 対象となるsgfファイルの棋譜データを読み込み name = name_list[index + 1] if not name.endswith('.sgf'): raise ValueError(name + ' is not a valid sgf') # メンバーをファイルオブジェクトとして抽出 sgf_content = zip_file.extractfile(name).read() sgf = Sgf_game.from_string(sgf_content) # ハンディキャップの適用 game_state, first_move_done = self.get_handicap(sgf) # 対局再生 for item in sgf.main_sequence_iter(): color, move_tuple = item.get_move() # 着手 if color is not None: # 打石 if move_tuple is not None: row, col = move_tuple point = Point(row + 1, col + 1) move = Move.play(point) # パス else: move = Move.pass_turn() # 初手は盤面が空である.空の盤面はデータに加えない. if first_move_done: # 現在の盤面を特徴量として, features[counter] = self.encoder.encode(game_state) # その盤面に対するこのターンの着手をラベルとして記録 labels[counter] = self.encoder.encode_point(point) # 着手数をカウント counter += 1 # 着手を適用 game_state = game_state.apply_move(move) first_move_done = True # 保存するファイル名のプレースホルダ feature_file_base = self.data_dir + '/' + data_file_name + '_features_%d' label_file_base = self.data_dir + '/' + data_file_name + '_labels_%d' # 全てのデータを一つに保存するのではなく,chunk_sizeで区切って保存 chunk = 0 chunk_size = 1024 while features.shape[0] >= chunk_size: feature_file = feature_file_base % chunk label_file = label_file_base % chunk chunk += 1 # chunk_sizeでfeaturesとlabelsを区切っていく current_features, features = features[:chunk_size], features[ chunk_size:] current_labels, labels = labels[:chunk_size], labels[chunk_size:] # 区切ったfeaturesとlabelsを保存 np.save(feature_file, current_features) np.save(label_file, current_labels)