def run_summary(model_dir: str) -> None: """ Gets a summary of the dataset contained in a directory :param model_dir: Path to the folder where the train.bin, test.bin and validate.bin can be found """ assert (os.path.exists(model_dir)) train_bin = os.path.join(model_dir, "train.bin") test_bin = os.path.join(model_dir, "test.bin") validate_bin = os.path.join(model_dir, "validate.bin") assert os.path.exists(train_bin), "Train dataset does not exists!" assert os.path.exists(test_bin), "Test dataset does not exists!" assert os.path.exists(validate_bin), "Validation dataset does not exists!" train = BinaryDs(train_bin, read_only=True).open() train_categories = count_categories(train) openc = train.is_encoded() features = train.get_features() train.close() val = BinaryDs(validate_bin, read_only=True).open() val_categories = count_categories(val) val.close() test = BinaryDs(test_bin, read_only=True).open() test_categories = count_categories(test) test.close() print(f"Features: {features}") print(f"Number of classes: {len(train_categories)}") if openc: print("Type: opcode encoded") else: print("Type: raw values") print("--------------------") for i in range(0, len(train_categories)): print(f"Training examples for class {i}: {train_categories[i]}") for i in range(0, len(val_categories)): print(f"Validation examples for class {i}: {val_categories[i]}") for i in range(0, len(test_categories)): print(f"Testing examples for class {i}: {test_categories[i]}")
def test_wrong_encoding_readonly(self): file = os.path.join(self.tmpdir, "wrongenc_readonly.bin") dataset = BinaryDs(file, encoded=False).open() dataset.close() with BinaryDs(file, encoded=True, read_only=True) as dataset: self.assertFalse(dataset.is_encoded())