예제 #1
0
    def process_zip(self, zip_file_name, data_file_name, game_list):
        tar_file = self.unzip_data(zip_file_name)
        zip_file = tarfile.open(self.data_dir + '/' + tar_file)
        name_list = zip_file.getnames()
        total_examples = self.num_total_examples(zip_file, game_list,
                                                 name_list)

        shape = self.encoder.shape()
        feature_shape = np.insert(shape, 0, np.asarray([total_examples]))
        features = np.zeros(feature_shape)
        labels = np.zeros((total_examples, ))

        counter = 0
        for index in game_list:
            name = name_list[index + 1]
            if not name.endswith('.sgf'):
                raise ValueError(name + ' is not a valid sgf')
            sgf_content = zip_file.extractfile(name).read()
            sgf = Sgf_game.from_string(sgf_content)

            game_state, first_move_done = self.get_handicap(sgf)

            for item in sgf.main_sequence_iter():
                color, move_tuple = item.get_move()
                point = None
                if color is not None:
                    if move_tuple is not None:
                        row, col = move_tuple
                        point = Point(row + 1, col + 1)
                        move = Move.play(point)
                    else:
                        move = Move.pass_turn()
                    if first_move_done and point is not None:
                        features[counter] = self.encoder.encode(game_state)
                        labels[counter] = self.encoder.encode_point(point)
                        counter += 1
                    game_state = game_state.apply_move(move)
                    first_move_done = True

        feature_file_base = self.data_dir + '/' + data_file_name + '_features_%d'
        label_file_base = self.data_dir + '/' + data_file_name + '_labels_%d'

        chunk = 0  # Due to files with large content, split up after chunksize
        chunksize = 1024
        while features.shape[0] >= chunksize:
            feature_file = feature_file_base % chunk
            label_file = label_file_base % chunk
            chunk += 1
            current_features, features = features[:chunksize], features[
                chunksize:]
            current_labels, labels = labels[:chunksize], labels[chunksize:]
            np.save(feature_file, current_features)
            np.save(label_file, current_labels)
예제 #2
0
    def process_zip(self, zip_file_name, data_file_name, game_list):
        tar_file = self.unzip_data(zip_file_name)
        zip_file = tarfile.open(self.data_dir + '/' + tar_file)
        name_list = zip_file.getnames()
        # Determine the total number of moves in all games in this zip file.
        total_examples = self.num_total_examples(zip_file, game_list, name_list)

        shape = self.encoder.shape()  # Infer the shape of features and labels from the encoder we use
        feature_shape = np.insert(shape, 0, np.asarray([total_examples]))
        features = np.zeros(feature_shape)
        labels = np.zeros((total_examples,))

        counter = 0
        for index in game_list:
            name = name_list[index + 1]
            if not name.endswith('.sgf'):
                raise ValueError(name + ' is not a valid sgf')
            sgf_content = zip_file.extractfile(name).read()
            sgf = Sgf_game.from_string(sgf_content)  # Read the SGF content as string, after extracting the zip file

            game_state, first_move_done = self.get_handicap(sgf)  # Infer the initial game state by applying all handicap stones

            for item in sgf.main_sequence_iter():  # Iterate over all moves in the SGF file
                color, move_tuple = item.get_move()
                point = None
                if color is not None:
                    if move_tuple is not None:  # Read the coordinates of the stone to be played...
                        row, col = move_tuple
                        point = Point(row + 1, col + 1)
                        move = Move.play(point)
                    else:
                        move = Move.pass_turn()  # ... or pass, if there is none.
                    if first_move_done and point is not None:
                        features[counter] = self.encoder.encode(game_state)  # We encode the current game state as features...
                        labels[counter] = self.encoder.encode_point(point)  # ... and the next move as label for the features
                        counter += 1
                    game_state = game_state.apply_move(move)  # Afterwards the move is applied to the board and we proceed with the next one
                    first_move_done = True

        feature_file_base = self.data_dir + '/' + data_file_name + '_features_%d'
        label_file_base = self.data_dir + '/' + data_file_name + '_labels_%d'

        chunk = 0  # Due to files with large content, split up after chunksize
        chunksize = 1024
        while features.shape[0] >= chunksize:  # We process features and labels in chunks of size 1024.
            feature_file = feature_file_base % chunk
            label_file = label_file_base % chunk
            chunk += 1
            current_features, features = features[:chunksize], features[chunksize:]
            current_labels, labels = labels[:chunksize], labels[chunksize:]  # The current chunk is cut off from features and labels...
            np.save(feature_file, current_features)
            np.save(label_file, current_labels)  # and then stored in a separate file.
예제 #3
0
파일: processor.py 프로젝트: ntwuxc/dlgo
    def num_total_examples(self, zip_file, game_list, name_list):
        """
        .tar.gzファイルで必要となる棋譜の着手の総数を取得

        Parameters
        ----------
        zip_file : str
            .tar.gzファイルのパス
        game_list : list
            必要となる棋譜のインデックスのリスト
        name_list : list
            .tar.gzに入っているファイル名のリスト
        
        Returns
        -------
        total_examples : int
            結果となる着手の総数
        """

        total_examples = 0

        # game_listの全ての棋譜インデックスについて着手を回す
        for index in game_list:
            name = name_list[index + 1]  # 多分name_list[0]は特殊な何か
            if name.endswith('.sgf'):
                sgf_content = zip_file.extractfile(name).read()
                sgf = Sgf_game.from_string(sgf_content)
                game_state, first_move_done = self.get_handicap(sgf)

                num_moves = 0
                for item in sgf.main_sequence_iter():
                    # 実際に再生は行わず,着手の数だけカウントしていく
                    color, move = item.get_move()
                    if color is not None:
                        if first_move_done:
                            num_moves += 1
                        first_move_done = True
                total_examples = total_examples + num_moves
            else:
                raise ValueError(name + ' is not a valid sgf')

        return total_examples
예제 #4
0
    def num_total_examples(self, zip_file, game_list, name_list):
        total_examples = 0
        for index in game_list:
            name = name_list[index + 1]
            if name.endswith('.sgf'):
                sgf_content = zip_file.extractfile(name).read()
                sgf = Sgf_game.from_string(sgf_content)
                game_state, first_move_done = self.get_handicap(sgf)

                num_moves = 0
                for item in sgf.main_sequence_iter():
                    color, move = item.get_move()
                    if color is not None:
                        if first_move_done:
                            num_moves += 1
                        first_move_done = True
                total_examples = total_examples + num_moves
            else:
                raise ValueError(name + ' is not a valid sgf')
        return total_examples
예제 #5
0
from dlgo.goboard import GameState, Move
from dlgo.gotypes import Point
from dlgo.utils import print_board
from dlgo.gosgf.sgf import Sgf_game

sgf_content = "(;GM[1]FF[4]SZ[9];B[ee];W[ef];B[ff];W[df];B[fe];W[fc];B[ec];W[gd];B[fb])"

sgf_game = Sgf_game.from_string(sgf_content)

game_state = GameState.new_game(19)

for item in sgf_game.main_sequence_iter():

    color, move_tuple = item.get_move()
    if color is not None and move_tuple is not None:
        row, col = move_tuple
        point = Point(row + 1, col + 1)
        move = Move.play(point)
        game_state = game_state.apply_move(move)
        print_board(game_state.board)
예제 #6
0
파일: processor.py 프로젝트: ntwuxc/dlgo
    def process_zip(self, zip_file_name, data_file_name, game_list):
        """
        .tar.gzファイルを解凍し,
        必要なゲームだけ特徴量とラベルに変換して任意の名前で保存
        1024の着手データごとに一つのファイルに保存する
        これにより,動的ロードによるメモリの節約ができる

        Parameters
        ----------
        zip_file_name : str
            解凍対象となるファイルパス
        data_file_name : str
            特徴量とラベルの保存先パス
        game_list : list
            解凍対象から選ばれるゲームのインデックスのリスト
        """

        # ファイルを解凍し,必要な棋譜データの数を取得
        tar_file = self.unzip_data(zip_file_name)
        zip_file = tarfile.open(self.data_dir + '/' + tar_file)
        name_list = zip_file.getnames()
        total_examples = self.num_total_examples(zip_file, game_list,
                                                 name_list)

        # 空の特徴量とラベルを用意
        shape = self.encoder.shape()
        feature_shape = np.insert(shape, 0, total_examples)
        features = np.zeros(feature_shape)
        labels = np.zeros((total_examples, ))

        # 必要な全てのゲームインデックスのゲームを再生しながら記録していく
        counter = 0  # 着手数
        for index in game_list:

            # 対象となるsgfファイルの棋譜データを読み込み
            name = name_list[index + 1]
            if not name.endswith('.sgf'):
                raise ValueError(name + ' is not a valid sgf')
            # メンバーをファイルオブジェクトとして抽出
            sgf_content = zip_file.extractfile(name).read()
            sgf = Sgf_game.from_string(sgf_content)

            # ハンディキャップの適用
            game_state, first_move_done = self.get_handicap(sgf)

            # 対局再生
            for item in sgf.main_sequence_iter():
                color, move_tuple = item.get_move()
                # 着手
                if color is not None:
                    # 打石
                    if move_tuple is not None:
                        row, col = move_tuple
                        point = Point(row + 1, col + 1)
                        move = Move.play(point)
                    # パス
                    else:
                        move = Move.pass_turn()

                    # 初手は盤面が空である.空の盤面はデータに加えない.
                    if first_move_done:
                        # 現在の盤面を特徴量として,
                        features[counter] = self.encoder.encode(game_state)

                        # その盤面に対するこのターンの着手をラベルとして記録
                        labels[counter] = self.encoder.encode_point(point)

                        # 着手数をカウント
                        counter += 1

                    # 着手を適用
                    game_state = game_state.apply_move(move)
                    first_move_done = True

        # 保存するファイル名のプレースホルダ
        feature_file_base = self.data_dir + '/' + data_file_name + '_features_%d'
        label_file_base = self.data_dir + '/' + data_file_name + '_labels_%d'

        # 全てのデータを一つに保存するのではなく,chunk_sizeで区切って保存
        chunk = 0
        chunk_size = 1024
        while features.shape[0] >= chunk_size:
            feature_file = feature_file_base % chunk
            label_file = label_file_base % chunk
            chunk += 1

            # chunk_sizeでfeaturesとlabelsを区切っていく
            current_features, features = features[:chunk_size], features[
                chunk_size:]
            current_labels, labels = labels[:chunk_size], labels[chunk_size:]

            # 区切ったfeaturesとlabelsを保存
            np.save(feature_file, current_features)
            np.save(label_file, current_labels)