def test_write_to_ro(self): file = os.path.join(self.tmpdir, "write_ro.bin") dataset = BinaryDs(file, features=14).open() dataset.close() with BinaryDs(file, features=14, read_only=True) as dataset: with self.assertRaises(IOError): dataset.write(self.data_raw)
def evaluate_confusion(bs: int, file: str, fixed: int, model_path: str, test_bin) -> None: """ Evaluates the confusion matrix for a given number of features :param bs: batch size :param file: file where the confusion matrix will be written :param fixed: number of features to be considered :param model_path: string pointing to the .h5 keras model of the network. If empty will default to data_dir/model.h5 :param test_bin: path to the test dataset that will be used """ test = BinaryDs(test_bin, read_only=True).open() binary = test.get_categories() <= 2 model = load_model(model_path) generator = DataGenerator(test, bs, fake_pad=True, pad_len=fixed, predict=True) expected = get_expected(bs, test) predicted = model.predict(generator, verbose=1) if binary: predicted = np.round(predicted).flatten().astype(np.int8) else: predicted = np.argmax(predicted, axis=1) matrix = np.array(tf.math.confusion_matrix(expected, predicted)) with open(file, "w") as f: np.savetxt(f, X=matrix, fmt="%d") test.close()
def test_read_write(self): file = os.path.join(self.tmpdir, "rw.bin") binary = BinaryDs(file, features=14).open() binary.write(self.data_raw) binary.close() with BinaryDs(file, features=14, read_only=True) as dataset: read = dataset.read(0, len(self.data_raw)) self.assertEqual(read, self.data_raw)
def test_truncate_all(self): file = os.path.join(self.tmpdir, "truncate.bin") dataset = BinaryDs(file, features=14).open() dataset.close() empty_size = os.path.getsize(file) with BinaryDs(file, features=14) as dataset: dataset.write(self.data_raw2) self.assertGreater(os.path.getsize(file), empty_size) with BinaryDs(file, features=14) as dataset: dataset.truncate() self.assertEqual(os.path.getsize(file), empty_size)
def test_split(self): file1 = os.path.join(self.tmpdir, "splitA.bin") file2 = os.path.join(self.tmpdir, "splitB.bin") dataset1 = BinaryDs(file1, features=14).open() dataset1.write(self.data_raw2) dataset2 = BinaryDs(file2, features=14).open() self.assertEqual(dataset1.get_examples_no(), 8) self.assertEqual(dataset2.get_examples_no(), 0) dataset1.split(dataset2, 0.5) self.assertEqual(dataset1.get_examples_no(), 4) self.assertEqual(dataset2.get_examples_no(), 4) self.assertEqual(dataset1.read(0, 4), self.data_raw2[:4]) self.assertEqual(dataset2.read(0, 4), self.data_raw2[4:]) dataset1.close() dataset2.close()
def test_merge(self): file1 = os.path.join(self.tmpdir, "mergeA.bin") file2 = os.path.join(self.tmpdir, "mergeB.bin") dataset1 = BinaryDs(file1, features=14).open() dataset1.write(self.data_raw) dataset2 = BinaryDs(file2, features=14).open() dataset2.write(self.data_raw2) self.assertEqual(dataset1.get_examples_no(), 3) self.assertEqual(dataset2.get_examples_no(), 8) dataset1.merge(dataset2) self.assertEqual(dataset1.get_examples_no(), 11) self.assertEqual(dataset2.get_examples_no(), 0) self.assertEqual(dataset1.read(0, 11), self.data_raw + self.data_raw2) dataset1.close() dataset2.close()
def evaluate_incremental(bs: int, file: str, model_path: str, test_bin) -> None: """ Evaluates the accuracy incrementally (first only 1 feature, then 3, then 5) :param bs: batch size :param file: file where to write the accuracy (.csv) :param model_path: string pointing to the .h5 keras model of the network. If empty will default to data_dir/model.h5 :param test_bin: path to the test dataset that will be used """ cut = 1 test = BinaryDs(test_bin, read_only=True).open() model = load_model(model_path) features = test.get_features() with open(file, "w") as f: f.write("features,accuracy\n") while cut <= features: print(f"Evaluating {cut}") generator = DataGenerator(test, bs, fake_pad=True, pad_len=cut) score = model.evaluate(generator) with open(file, "a") as f: f.write(f"{cut},{score[1]}\n") if cut < 24: cut = cut + 2 elif cut < 80: cut = cut + 22 elif cut < 256: cut = cut + 33 elif cut < 500: cut = cut + 61 elif cut < features: cut = cut + 129 cut = min(cut, features) else: break test.close()
def run_summary(model_dir: str) -> None: """ Gets a summary of the dataset contained in a directory :param model_dir: Path to the folder where the train.bin, test.bin and validate.bin can be found """ assert (os.path.exists(model_dir)) train_bin = os.path.join(model_dir, "train.bin") test_bin = os.path.join(model_dir, "test.bin") validate_bin = os.path.join(model_dir, "validate.bin") assert os.path.exists(train_bin), "Train dataset does not exists!" assert os.path.exists(test_bin), "Test dataset does not exists!" assert os.path.exists(validate_bin), "Validation dataset does not exists!" train = BinaryDs(train_bin, read_only=True).open() train_categories = count_categories(train) openc = train.is_encoded() features = train.get_features() train.close() val = BinaryDs(validate_bin, read_only=True).open() val_categories = count_categories(val) val.close() test = BinaryDs(test_bin, read_only=True).open() test_categories = count_categories(test) test.close() print(f"Features: {features}") print(f"Number of classes: {len(train_categories)}") if openc: print("Type: opcode encoded") else: print("Type: raw values") print("--------------------") for i in range(0, len(train_categories)): print(f"Training examples for class {i}: {train_categories[i]}") for i in range(0, len(val_categories)): print(f"Validation examples for class {i}: {val_categories[i]}") for i in range(0, len(test_categories)): print(f"Testing examples for class {i}: {test_categories[i]}")
def test_open_wrong_features_readonly(self): file = os.path.join(self.tmpdir, "open_wrong_features_readonly.bin") dataset = BinaryDs(file, features=1024).open() dataset.close() with BinaryDs(file, features=2048, read_only=True) as dataset: self.assertEqual(dataset.get_features(), 1024)
def test_open_wrong_features(self): file = os.path.join(self.tmpdir, "open_wrong_features.bin") dataset = BinaryDs(file, features=1024).open() dataset.close() with self.assertRaises(IOError): BinaryDs(file, features=2048).open()
def test_wrong_encoding_readonly(self): file = os.path.join(self.tmpdir, "wrongenc_readonly.bin") dataset = BinaryDs(file, encoded=False).open() dataset.close() with BinaryDs(file, encoded=True, read_only=True) as dataset: self.assertFalse(dataset.is_encoded())
def test_wrong_encoding(self): file = os.path.join(self.tmpdir, "wrongenc.bin") dataset = BinaryDs(file, encoded=False).open() dataset.close() with self.assertRaises(IOError): BinaryDs(file, encoded=True).open()