def setUp(self): self.model_filename = "my_model_file.bin" with open("data/train.pickle", mode="rb") as f: self.train_data = pickle.load(f)["features"] trainer = Trainer() trainer.train(self.train_data) trainer.save(self.model_filename)
def setUp(self): self.model_filename = "my_model_file.bin" with open("data/train.pickle", mode="rb") as train_file, open("data/test.pickle", mode="rb") as test_file: self.train_data = pickle.load(train_file)["features"] self.test_data = pickle.load(test_file) trainer = Trainer() trainer.train(self.train_data, n_components=1) trainer.save(self.model_filename)
def train_and_predict(self, n_components=1, threshold=0.001): trainer = Trainer() trainer.train(self.train_data, n_components=n_components) predictor = Predictor(trainer=trainer.model) y_pred = list() for X in self.X_test: result = predictor.predict(X, threshold=threshold) y_pred.append(1 if result['is_anomaly'] else 0) predictor.init_result() return y_pred
def test_closs_validation(self): trainer = Trainer() kf = KFold(n_splits=3) self.features, self.labels = fetch_kddcup99(subset="http", return_X_y=True) self.labels = list( map(lambda label: 0 if label == b"normal." else 1, self.labels)) self.labels = np.array(self.labels) for train_index, test_index in kf.split(self.features, self.labels): train_data = self.features[train_index] test_data = self.features[test_index] train_label = self.labels[train_index] test_label = self.labels[test_index] trainer.train(train_data) result = trainer.model.predict(test_data) accuracy = accuracy_score(test_label, result) print("正解率=", accuracy) assert accuracy > 0.8
def test_train_with_small_data(self): trainer = Trainer() trainer.train([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) trainer.save("tmp.model")
def test_train_with_insufficient_data(self): trainer = Trainer() with self.assertRaises(InsufficientTrainingDataError): trainer.train([])
def test_train_and_save(self): trainer = Trainer() trainer.train(self.train_data["features"]) trainer.save("tmp.model")
def test_train_with_small_data(self): trainer = Trainer() with self.assertRaises(SmallTrainingDataError): trainer.train([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], n_components=3)
def test_train_with_current_size_data(self): trainer = Trainer() test_data = [] for i in range(20): test_data.append([i, i + 1, i + 2]) trainer.train(test_data)
def test_train_with_small_data(self): trainer = Trainer() with self.assertRaises(TooMuchComponentError): trainer.train([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])