def process_zip(self, zip_file_name, data_file_name, game_list): tar_file = self.unzip_data(zip_file_name) zip_file = tarfile.open(self.data_dir + '/' + tar_file) name_list = zip_file.getnames() # Xac dinh tong so luong nuoc di trong tat ca games trong file zip total_examples = self.num_total_examples(zip_file, game_list, name_list) shape = self.encoder.shape() feature_shape = np.insert(shape, 0, np.asarray([total_examples])) features = np.zeros(feature_shape) labels = np.zeros((total_examples, )) counter = 0 for index in game_list: name = name_list[index + 1] if not name.endswith('.sgf'): raise ValueError(name + ' is not a valid sgf') sgf_content = zip_file.extractfile(name).read() # Doc noi dung SGF duoi dang chuoi ,sau khi giai nen tep zip sgf = Sgf_game.from_string(sgf_content) # Suy ra trang thai tro choi ban dau bang cach ap dung tat ca cac vien da handicap game_state, first_move_done = self.get_handicap(sgf) # Lap lai tat ca cac di chuyen trong tep SGF for item in sgf.main_sequence_iter(): color, move_tuple = item.get_move() point = None if color is not None: # Doc toa do cua hon da duoc choi if move_tuple is not None: row, col = move_tuple point = Point(row + 1, col + 1) move = Move.play(point) else: move = Move.pass_turn() if first_move_done and point is not None: # Ma hoa trang thai tro choi duoi dang feature features[counter] = self.encoder.encode(game_state) # Ma hoa nuoc di nhu nhan cua feature labels[counter] = self.encoder.encode_point(point) counter += 1 # Sau do, nuoc di duoc ap dung cho ban co de tien hanh nuoc di tiep theo game_state = game_state.apply_move(move) first_move_done = True feature_file_base = self.data_dir + '/' + data_file_name + '_features_%d' label_file_base = self.data_dir + '/' + data_file_name + '_labels_%d' chunk = 0 # Due to files with large content, split up after chunksize chunksize = 1024 while features.shape[0] >= chunksize: feature_file = feature_file_base % chunk label_file = label_file_base % chunk chunk += 1 current_features, features = features[:chunksize], features[ chunksize:] current_labels, labels = labels[:chunksize], labels[chunksize:] np.save(feature_file, current_features) np.save(label_file, current_labels)
def process_zip(self, zip_file_name, data_file_name, game_list): tar_file = self.unzip_data(zip_file_name) zip_file = tarfile.open(self.data_dir + '/' + tar_file) name_list = zip_file.getnames() #print(name_list) total_examples = self.num_total_examples(zip_file, game_list, name_list) #print(zip_file_name, game_list, total_examples) shape = self.encoder.shape() feature_shape = np.insert(shape, 0, np.asarray([total_examples])) features = np.zeros(feature_shape) labels = np.zeros((total_examples, )) counter = 0 for index in game_list: name = name_list[index + 1] if not name.endswith('.sgf'): raise ValueError(name + ' is not valid sgf') sgf_content = zip_file.extractfile(name).read() sgf = Sgf_game.from_string(sgf_content) #print(sgf) game_state, first_move_done = self.get_handicap(sgf) #print(game_state) for item in sgf.main_sequence_iter(): color, move_tuple = item.get_move() point = None if color is not None: if move_tuple is not None: row, col = move_tuple point = Point(row + 1, col + 1) move = Move.play(point) else: move = Move.pass_turn() if first_move_done and point is not None: features[counter] = self.encoder.encode(game_state) labels[counter] = self.encoder.encode_point(point) counter += 1 game_state = game_state.apply_move(move) first_move_done = True print('features and labels size is ', counter) feature_file_base = self.data_dir + '/' + data_file_name + '_features_%d' label_file_base = self.data_dir + '/' + data_file_name + '_labels_%d' chunk = 0 chunksize = 1024 zip_file.close() print(features.shape) while features.shape[0] >= chunksize: feature_file = feature_file_base % chunk label_file = label_file_base % chunk chunk += 1 current_features, features = features[:chunksize], features[ chunksize:] current_labels, labels = labels[:chunksize], labels[chunksize:] np.save(feature_file, current_features) np.save(label_file, current_labels)
def num_total_examples(self, zip_file, game_list, name_list): total_examples = 0 for index in game_list: name = name_list[index + 1] if name.endswith('.sgf'): sgf_content = zip_file.extractfile(name).read() sgf = Sgf_game.from_string(sgf_content) game_state, first_move_done = self.get_handicap(sgf) num_moves = 0 for item in sgf.main_sequence_iter(): color, move = item.get_move() if color is not None: if first_move_done: num_moves += 1 first_move_done = True total_examples = total_examples + num_moves else: raise ValueError(name + ' is not a valid sgf') return total_examples
def process_zip(self, zip_file_name, data_file_name, game_list): tar_file = self.unzip_data(zip_file_name) zip_file = tarfile.open(self.data_dir + '/' + tar_file) name_list = zip_file.getnames() total_examples = self.num_total_examples(zip_file, game_list, name_list) # <1> shape = self.encoder.shape() # <2> feature_shape = np.insert(shape, 0, np.asarray([total_examples])) features = np.zeros(feature_shape) labels = np.zeros((total_examples, )) counter = 0 for index in game_list: name = name_list[index + 1] if not name.endswith('.sgf'): raise ValueError(name + ' is not a valid sgf') sgf_content = zip_file.extractfile(name).read() sgf = Sgf_game.from_string(sgf_content) # <3> game_state, first_move_done = self.get_handicap(sgf) # <4> for item in sgf.main_sequence_iter(): # <5> color, move_tuple = item.get_move() point = None if color is not None: if move_tuple is not None: # <6> row, col = move_tuple point = Point(row + 1, col + 1) move = Move.play(point) else: move = Move.pass_turn() # <7> if first_move_done and point is not None: features[counter] = self.encoder.encode( game_state) # <8> labels[counter] = self.encoder.encode_point( point) # <9> counter += 1 game_state = game_state.apply_move(move) # <10> first_move_done = True # <1> Determine the total number of moves in all games in this zip file. # <2> Infer the shape of features and labels from the encoder we use. # <3> Read the SGF content as string, after extracting the zip file. # <4> Infer the initial game state by applying all handicap stones. # <5> Iterate over all moves in the SGF file. # <6> Read the coordinates of the stone to be played... # <7> ... or pass, if there is none. # <8> We encode the current game state as features... # <9> ... and the next move as label for the features. # <10> Afterwards the move is applied to the board and we proceed with the next one. # end::read_sgf_files[] # tag::store_features_and_labels[] feature_file_base = self.data_dir + '/' + data_file_name + '_features_%d' label_file_base = self.data_dir + '/' + data_file_name + '_labels_%d' chunk = 0 # Due to files with large content, split up after chunksize chunksize = 1024 while features.shape[0] >= chunksize: # <1> feature_file = feature_file_base % chunk label_file = label_file_base % chunk chunk += 1 current_features, features = features[:chunksize], features[ chunksize:] current_labels, labels = labels[:chunksize], labels[ chunksize:] # <2> np.save(feature_file, current_features) np.save(label_file, current_labels) # <3>
def process_zip(self, zip_file_name, data_file_name, game_list): tar_file = self.unzip_data(zip_file_name) zip_file = tarfile.open(self.data_dir + '/' + tar_file) name_list = zip_file.getnames() # このzipファイル内の全てのゲームの合計着手回数を決定する total_examples = self.num_total_examples(zip_file, game_list, name_list) # <1> # 使用するエンコーダからのフィーちゃとラベルの形状を推測する shape = self.encoder.shape() # <2> feature_shape = np.insert(shape, 0, np.asarray([total_examples])) features = np.zeros(feature_shape) labels = np.zeros((total_examples, )) counter = 0 for index in game_list: name = name_list[index + 1] if not name.endswith('.sgf'): raise ValueError(name + ' is not a valid sgf') sgf_content = zip_file.extractfile(name).read() # zipファイルを解凍した後、SGFの内容を文字列として読み込む sgf = Sgf_game.from_string(sgf_content) # <3> # すべての置石を適用して、初期のゲーム状態を推測する game_state, first_move_done = self.get_handicap(sgf) # <4> # SGFファイル内のすべての着手を繰り返す for item in sgf.main_sequence_iter(): # <5> color, move_tuple = item.get_move() point = None if color is not None: # 着手する石の座標を読み込み if move_tuple is not None: # <6> row, col = move_tuple point = Point(row + 1, col + 1) move = Move.play(point) else: # ない場合はパス move = Move.pass_turn() # <7> if first_move_done and point is not None: # 現在のゲームの状態を特徴量としてエンコード features[counter] = self.encoder.encode( game_state) # <8> # 次の着手を特徴量に対するラベルとしてエンコードする labels[counter] = self.encoder.encode_point( point) # <9> counter += 1 # その後、着手を盤に適用し、次に進む game_state = game_state.apply_move(move) # <10> first_move_done = True # <1> Determine the total number of moves in all games in this zip file. # <2> Infer the shape of features and labels from the encoder we use. # <3> Read the SGF content as string, after extracting the zip file. # <4> Infer the initial game state by applying all handicap stones. # <5> Iterate over all moves in the SGF file. # <6> Read the coordinates of the stone to be played... # <7> ... or pass, if there is none. # <8> We encode the current game state as features... # <9> ... and the next move as label for the features. # <10> Afterwards the move is applied to the board and we proceed with the next one. # end::read_sgf_files[] # tag::store_features_and_labels[] # 特徴量とラベルを小さなチャンクとしてローカルに保持する # 小さなチャンクを格納する理由は、データの配列が非常に高速になり、後でより柔軟な小さなファイルにデータを格納できるため。 feature_file_base = self.data_dir + '/' + data_file_name + '_features_%d' label_file_base = self.data_dir + '/' + data_file_name + '_labels_%d' chunk = 0 # Due to files with large content, split up after chunksize chunksize = 1024 # 特徴量とラベルを1024のサイズのチャンクで処理する while features.shape[0] >= chunksize: # <1> feature_file = feature_file_base % chunk label_file = label_file_base % chunk chunk += 1 current_features, features = features[:chunksize], features[ chunksize:] # 現在のチャンクは特徴量とラベルから切り離されている current_labels, labels = labels[:chunksize], labels[ chunksize:] # <2> np.save(feature_file, current_features) # 別々のファイルに保存される np.save(label_file, current_labels) # <3>
def process_zip(self, zip_file_name, data_file_name, game_list): tar_file = self.unzip_data(zip_file_name) zip_file = tarfile.open(self.data_dir + '/' + tar_file) name_list = zip_file.getnames() total_examples = self.num_total_examples(zip_file, game_list, name_list) shape = self.encoder.shape() feature_shape = np.insert(shape, 0, np.asarray([total_examples])) features = np.zeros(feature_shape) labels = np.zeros((total_examples, )) counter = 0 board = Board(19, 19) #Nail board_ext = Board_Ext(board) # Nail for index in game_list: name = name_list[index + 1] if not name.endswith('.sgf'): raise ValueError(name + ' is not a valid sgf') sgf_content = zip_file.extractfile(name).read() sgf = Sgf_game.from_string(sgf_content) game_state, first_move_done, board_ext = self.get_handicap(sgf) # if first_move_done : # Nail ignore handicap # continue # Ignore games with handicap if self.encoder.name( )[:2] == 'my' and first_move_done == False: # Not handicap board_ext = Board_Ext(game_state.board) #inserted Nail for item in sgf.main_sequence_iter(): color, move_tuple = item.get_move() point = None if color is not None: if move_tuple is not None: row, col = move_tuple point = Point(row + 1, col + 1) move = Move.play(point) else: move = Move.pass_turn() if first_move_done and point is not None: encode = True # # Data only for debute Nail # if self.count_stones_debute is not None and \ # self.count_stones_middle is None and \ # self.count_stones_end is None and \ # board_ext.count_stones() > self.count_stones_debute: # encode = False # # Data for middle game Nail # if self.count_stones_debute is not None and \ # self.count_stones_middle is not None and \ # self.count_stones_end is None and \ # self.board_ext.count_stones() <= self.count_stones_debute and board_ext.count_stones() > self.count_stones_middle: # encode = False # # # Data for end # if self.count_stones_middle is not None and \ # self.count_stones_end is not None and \ # self.board_ext.count_stones() <= self.count_stones_middle and board_ext.count_stones() > self.count_stones_end: # encode = False if encode == True: #Nail if self.encoder.name()[:2] == 'my': features[counter] = self.encoder.encode( game_state, board_ext) #Nail else: features[counter] = self.encoder.encode( game_state) labels[counter] = self.encoder.encode_point(point) counter += 1 game_state = game_state.apply_move(move) if self.encoder.name()[:2] == 'my': board_ext.place_stone_ext(game_state.board, color, point) # Inserted Nail # Nail first_move_done = True feature_file_base = self.data_dir + '/' + data_file_name + '_features_%d' label_file_base = self.data_dir + '/' + data_file_name + '_labels_%d' chunk = 0 # Due to files with large content, split up after chunksize chunksize = 1024 start_time_all = time.time() while features.shape[0] >= chunksize: start_time = time.time() feature_file = feature_file_base % chunk label_file = label_file_base % chunk chunk += 1 current_features, features = features[:chunksize], features[ chunksize:] current_labels, labels = labels[:chunksize], labels[chunksize:] np.save(feature_file, current_features) np.save(label_file, current_labels) # Inserted Nail print("Chunk = ", chunk, " File for training Current_features: ", feature_file) print("Time per one file = ", (time.time() - start_time_all) / 1000, ' seconds') print('Files preparation with proccess_zip is over\n') print('Full Time = ', (time.time() - start_time_all) / 1000, ' seconds') print("End chunk = ", chunk)
from dlgo.gosgf import Sgf_game from dlgo.goboard import GameState, Move from dlgo.gotypes import Point from dlgo.utils import print_board from time import sleep sgf_example = "(;GM[1]FF[4]SZ[9];W[ef];B[ff];W[df];B[fe];W[fc];B[ec];W[gd];B[fb])" sgf_game = Sgf_game.from_string(sgf_example) game_state = GameState.new_game(19) for item in sgf_game.main_sequence_iter(): color, move_tuple = item.get_move() if color is not None and move_tuple is not None: row, col = move_tuple point = Point(row + 1, col + 1) move = Move.play(point) game_state = game_state.apply_move(move) print_board(game_state.board) sleep(0.3)
from dlgo.gosgf import Sgf_game from dlgo.goboard_fast import GameState, Move from dlgo.gotypes import Point from dlgo.utils import print_board sgf_content = "(;GM[1]FF[4]SZ[9];B[ee];W[ef];B[ff]" + \ ";W[df];B[fe];W[fc];B[ec];W[gd];B[fb])" sgf_game = Sgf_game.from_string(sgf_content) game_state = GameState.new_game(19) for item in sgf_game.main_sequence_iter(): color, move_tuple = item.get_move() if color is not None and move_tuple is not None: row, col = move_tuple point = Point(row + 1, col + 1) move = Move.play(point) game_state = game_state.apply_move(move) print_board(game_state.board)