Esempio n. 1
0
 def load_go_data(self, data_type='train', num_samples=1000):
     index = KGSIndex(data_directory=self.data_dir)
     index.download_files()
     sampler = Sampler(data_dir=self.data_dir)
     data = sampler.draw_data(data_type, num_samples)
     #print(data)
     zip_names = set()
     indices_by_zip_name = {}
     for filename, index in data:
         zip_names.add(filename)
         if filename not in indices_by_zip_name:
             indices_by_zip_name[filename] = []
         indices_by_zip_name[filename].append(index)
     for zip_name in zip_names:
         basename = zip_name.replace('.tar.gz', '')
         data_file_name = basename + data_type
         if not os.path.isfile(self.data_dir + data_file_name):
             #print(self.data_dir + data_file_name)
             self.process_zip(zip_name, data_file_name,
                              indices_by_zip_name[zip_name])
Esempio n. 2
0
    def load_go_data(self,
                     data_type='train',
                     num_samples=1000,
                     use_generator=False):
        index = KGSIndex(data_directory=self.data_dir)
        index.download_files()

        sampler = Sampler(data_dir=self.data_dir)
        data = sampler.draw_data(data_type, num_samples)

        # ワークロードをCPUにマップする
        self.map_to_workers(data_type, data)  # <1>
        if use_generator:
            generator = DataGenerator(self.data_dir, data)

            # 囲碁データジェネレータを返すか
            return generator  # <2>
        else:
            features_and_labels = self.consolidate_games(data_type, data)

            # 以前のように結合されたデータを返す
            return features_and_labels  # <3>
Esempio n. 3
0
    def load_go_data(self, data_type='train', num_samples=1000):  # <1><2>
        index = KGSIndex(data_directory=self.data_dir)
        index.download_files()  # <3>

        sampler = Sampler(data_dir=self.data_dir)
        data = sampler.draw_data(data_type, num_samples)  # <4>

        zip_names = set()
        indices_by_zip_name = {}
        for filename, index in data:
            zip_names.add(filename)  # <5>
            if filename not in indices_by_zip_name:
                indices_by_zip_name[filename] = []
            indices_by_zip_name[filename].append(index)  # <6>
        for zip_name in zip_names:
            base_name = zip_name.replace('.tar.gz', '')
            data_file_name = base_name + data_type
            if not os.path.isfile(self.data_dir + '/' + data_file_name):
                self.process_zip(zip_name, data_file_name, indices_by_zip_name[zip_name])  # <7>

        features_and_labels = self.consolidate_games(data_type, data)  # <8>
        return features_and_labels
Esempio n. 4
0
    def load_go_data(self,
                     data_type='train',
                     num_samples=1000,
                     use_generator=False):
        # Khoi tao KGSIndex()
        index = KGSIndex(data_directory=self.data_dir)
        # Download tat ca games tu KGS toi thu muc data_directory. Neu data co san, khong can download mot lan nua
        index.download_files()

        sampler = Sampler(data_dir=self.data_dir)
        # Sample chon so luong games cu the cho data_type
        data = sampler.draw_data(data_type, num_samples)

        # Map workload to CPUs
        self.map_to_workers(data_type, data)
        if use_generator:
            generator = DataGenerator(self.data_dir, data)
            # Tra ve Go data generator
            return generator
        else:
            features_and_labels = self.consolidate_games(data_type, data)
            # Tra ve features va labels
            return features_and_labels
Esempio n. 5
0
    def load_go_data(
            self,
            data_type='train',  # <1>
            num_samples=1000):  # <2>
        index = KGSIndex(data_directory=self.data_dir)

        # KGSから全てのゲームをローカルのデータディレクトリにダウンロード。
        # データがすでに利用可能な場合は、再度ダウンロードされない。
        index.download_files()  # <3>

        sampler = Sampler(data_dir=self.data_dir)

        # Sampleインスタンスは、選択されたデータ種別のために指定された数のゲームを選択する
        data = sampler.draw_data(data_type, num_samples)  # <4>

        zip_names = set()
        indices_by_zip_name = {}
        for filename, index in data:
            # データに含まれるすべてのzipファイル名をリストにまとめる
            zip_names.add(filename)  # <5>
            if filename not in indices_by_zip_name:
                indices_by_zip_name[filename] = []

            # 全てのSGFファイルのインデックスをzipファイル名でグループ化する
            indices_by_zip_name[filename].append(index)  # <6>
        for zip_name in zip_names:
            base_name = zip_name.replace('.tar.gz', '')
            data_file_name = base_name + data_type
            if not os.path.isfile(self.data_dir + '/' + data_file_name):

                # zipファイルは個別に処理される
                self.process_zip(zip_name, data_file_name,
                                 indices_by_zip_name[zip_name])  # <7>

        # 各zipの特徴量とラベルが結合され、返される
        features_and_labels = self.consolidate_games(data_type, data)  # <8>
        return features_and_labels
Esempio n. 6
0
    def load_go_data(self, data_type='train',  # As `data_type` you can choose either 'train' or 'test'
                     num_samples=1000):  # `num_samples` refers to the number of games to load data from
        index = KGSIndex(data_directory=self.data_dir)
        index.download_files()  # download all games from KGS to our local data directory. If data is available, it won't be downloaded again

        sampler = Sampler(data_dir=self.data_dir)
        data = sampler.draw_data(data_type, num_samples)  # The `Sampler` instance selects the specified number of games for a data type

        zip_names = set()
        indices_by_zip_name = {}
        for filename, index in data:
            zip_names.add(filename)  # We collect all zip file names contained in the data in a list
            if filename not in indices_by_zip_name:
                indices_by_zip_name[filename] = []
            indices_by_zip_name[filename].append(index)  # Then we group all SGF file indices by zip file name
        for zip_name in zip_names:
            base_name = zip_name.replace('.tar.gz', '')
            data_file_name = base_name + data_type
            if not os.path.isfile(self.data_dir + '/' + data_file_name):
                # The zip files are then processed individually
                self.process_zip(zip_name, data_file_name, indices_by_zip_name[zip_name])
        # Features and labels from each zip are then aggregated and returned
        features_and_labels = self.consolidate_games(data_type, data)
        return features_and_labels
Esempio n. 7
0
from dlgo.data.index_processor import KGSIndex

index = KGSIndex()
index.download_files()