def generator(): sample_files = files_from_directory(os.path.abspath( self.sample_dir), file_type="h5") tuples = np.empty(shape=(0, 15, 773), dtype=bool) for file in sample_files: fname = correct_file_ending(file, 'h5') with h5py.File(fname, 'r') as hf: for key in hf.keys(): if self.H5_COL_KEY in key: new_tuples = np.asarray(hf[key][:], dtype=bool) tuples = np.concatenate((tuples, new_tuples)) while len(tuples) >= self.batch_size: if isinstance(self.subsampling_functions, (list, tuple, np.ndarray)): for fn in self.subsampling_functions: triplets = fn(tuples[:self.batch_size]) yield triplets else: triplets = self.subsampling_functions( tuples[:self.batch_size]) yield triplets tuples = tuples[self.batch_size:]
def save_tuples(tuples, file, dset_num=0): fname = correct_file_ending(file, "h5") with h5py.File(fname, "a") as f: data1 = f.create_dataset(f"tuples_{dset_num}", shape=(len(tuples), 15, 773), dtype=bool, compression="gzip", compression_opts=9) data1[:] = tuples[:]
def number_samples(self): samples = 0 sample_files = files_from_directory(os.path.abspath(self.sample_dir), file_type="h5") for file in sample_files: fname = correct_file_ending(file, 'h5') with h5py.File(fname, 'r') as hf: for key in hf.keys(): if self.H5_COL_KEY in key: samples += len(hf[key]) return samples
def tuples_from_file(file, table_id_prefix, tuple_indices=[0, 1, 6]): ''' Return specified tuples from all relevant tables in a file. ''' fname = correct_file_ending(file, 'h5') tuples = [] with h5py.File(fname, 'r') as hf: print(hf.keys()) for key in hf.keys(): if table_id_prefix in key: tuples.extend(hf[key][:, tuple_indices]) return np.asarray(tuples, dtype=bool)
def save_bb(game_list, game_id, file, dset_num=0): fname = correct_file_ending(file, "h5") position = [] gid = [] for (i, game) in enumerate(game_list): for pos in game: position.append(pos) gid.append(game_id[i]) with h5py.File(fname, "a") as f: data1 = f.create_dataset(f"position_{dset_num}", shape=(len(position), 773), dtype=bool, compression="gzip", compression_opts=9) data2 = f.create_dataset(f"game_id_{dset_num}", shape=(len(position), ), dtype=np.int, compression="gzip", compression_opts=9) data1[:] = position[:] data2[:] = gid[:]
def pgn_to_bitboard(pgn_file, generate_tuples=False, save_file=None, tuple_file=None, chunksize=100000, game_filter=None): game_list = [] game_id = [] counter = 1 game_index = -1 save_number = 0 pgn_name = correct_file_ending(pgn_file, "pgn") with open(pgn_name, 'r') as f: while True: next_game = chess.pgn.read_game(f) game_index += 1 if next_game is not None and game_filter is not {} and \ filter_out(next_game.headers, game_filter): continue if counter % chunksize == 0 or next_game is None: print("") if save_file is not None: save_bb(game_list, game_id, save_file, dset_num=save_number) print("Game positions saved.") else: raise ValueError("Save bitbaord file path not provided.") if generate_tuples: tup = tuple_generator(game_list) print("Tuples generated.") if tuple_file is not None: save_tuples(tup, tuple_file, dset_num=save_number) print("Tuples saved.") else: raise ValueError("Save tuple file path not provided.") print(f"\rChunk {save_number} processed.") save_number += 1 counter = 1 game_list = [] if next_game is None: break # end of file, break the while True loop else: temp_game = game_bb(next_game, game_nr=counter) if len(temp_game) > 0: game_list.append(temp_game) game_id.append(game_index) print( f" Games parsed: {game_index} Games processed: {counter}", end="\r") counter += 1 return 0
def test_correct_file_ending(): assert ut.correct_file_ending("data/hello", "txt") == "data/hello.txt" assert ut.correct_file_ending("one_file.pyc", "pyc") == "one_file.pyc"