Example #1
0
def run_summary(model_dir: str) -> None:
    """
    Gets a summary of the dataset contained in a directory
    :param model_dir: Path to the folder where the train.bin, test.bin and
    validate.bin can be found
    """
    assert (os.path.exists(model_dir))
    train_bin = os.path.join(model_dir, "train.bin")
    test_bin = os.path.join(model_dir, "test.bin")
    validate_bin = os.path.join(model_dir, "validate.bin")
    assert os.path.exists(train_bin), "Train dataset does not exists!"
    assert os.path.exists(test_bin), "Test dataset does not exists!"
    assert os.path.exists(validate_bin), "Validation dataset does not exists!"
    train = BinaryDs(train_bin, read_only=True).open()
    train_categories = count_categories(train)
    openc = train.is_encoded()
    features = train.get_features()
    train.close()
    val = BinaryDs(validate_bin, read_only=True).open()
    val_categories = count_categories(val)
    val.close()
    test = BinaryDs(test_bin, read_only=True).open()
    test_categories = count_categories(test)
    test.close()
    print(f"Features: {features}")
    print(f"Number of classes: {len(train_categories)}")
    if openc:
        print("Type: opcode encoded")
    else:
        print("Type: raw values")
    print("--------------------")
    for i in range(0, len(train_categories)):
        print(f"Training examples for class {i}: {train_categories[i]}")
    for i in range(0, len(val_categories)):
        print(f"Validation examples for class {i}: {val_categories[i]}")
    for i in range(0, len(test_categories)):
        print(f"Testing examples for class {i}: {test_categories[i]}")
 def test_wrong_encoding_readonly(self):
     file = os.path.join(self.tmpdir, "wrongenc_readonly.bin")
     dataset = BinaryDs(file, encoded=False).open()
     dataset.close()
     with BinaryDs(file, encoded=True, read_only=True) as dataset:
         self.assertFalse(dataset.is_encoded())