def vaeAD(self, encoder_neurons, decoder_neurons, epochs, contamination): clf_name = 'VAE' clf = VAE(encoder_neurons=encoder_neurons, decoder_neurons=decoder_neurons, epochs=epochs, contamination=contamination) clf.fit(self.X) # get the prediction labels and outlier scores of the training data y_pred = clf.labels_ # binary labels (0: inliers, 1: outliers) y_scores = clf.decision_scores_ # raw outlier scores generateAnomalis(self.data, self.label, y_pred) self.evaluate()
def detect_outliers_VAE(df): ''' Returns the outlier scores using Variational AutoEncoders Parameters: ----------- df: pd.DataFrame, ''' if df.shape[1] < 128: encoder = [df.shape[1], df.shape[1]/2, df.shape[1]/4] decoder = encoder[::-1] else: encoder = [128, 64, 32] decoder = encoder[::-1] clf = VAE(contamination=0.1, encoder_neurons=encoder, decoder_neurons=decoder) df = df.astype(np.float32) clf.fit(df) outlier_score = clf.decision_scores_ # df_result = pd.DataFrame(outlier_pred, columns=['outlier_pred']) return outlier_score * -1
n_train = 20000 # number of training points n_test = 2000 # number of testing points n_features = 300 # number of features # Generate sample data X_train, y_train, X_test, y_test = \ generate_data(n_train=n_train, n_test=n_test, n_features=n_features, contamination=contamination, random_state=42) # train VAE detector clf_name = 'VAE' clf = VAE(epochs=30, contamination=contamination) clf.fit(X_train) # get the prediction labels and outlier scores of the training data y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers) y_train_scores = clf.decision_scores_ # raw outlier scores # get the prediction on the test data y_test_pred = clf.predict(X_test) # outlier labels (0 or 1) y_test_scores = clf.decision_function(X_test) # outlier scores # evaluate and print the results print("\nOn Training Data:") evaluate_print(clf_name, y_train, y_train_scores) print("\nOn Test Data:") evaluate_print(clf_name, y_test, y_test_scores)
class SolverVAECIFAR(): def __init__(self, data_name, hidden_dim=256, seed=0, learning_rate=3e-4, normal_class=0, anomaly_ratio=0.1, batch_size=128, concentrated=0, training_ratio=0.8, SN=1, Trim=1, L=1.5, max_epochs=100): np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) use_cuda = torch.cuda.is_available() self.device = torch.device("cuda" if use_cuda else "cpu") self.L = L if concentrated == 1.0: full_data_name = 'CIFAR10_Concentrated' elif concentrated == 0.0: full_data_name = 'CIFAR10' self.result_path = "./results/{}_{}/0.0/VAE/{}/".format( full_data_name, normal_class, seed ) data_path = "./data/" + data_name + ".npy" self.learning_rate = learning_rate self.SN = SN self.Trim = Trim # self.dataset = RealGraphDataset(data_path, missing_ratio=0, radius=2) self.dataset = CIFARVGGDataset(data_path, normal_class=normal_class, anomaly_ratio=anomaly_ratio, concentrated=concentrated) self.seed = seed self.hidden_dim = hidden_dim self.max_epochs = max_epochs self.data_path = data_path self.data_anomaly_ratio = self.dataset.__anomalyratio__() self.batch_size = batch_size self.input_dim = self.dataset.__dim__() self.data_normaly_ratio = 1 - self.data_anomaly_ratio n_sample = self.dataset.__len__() self.n_train = int(n_sample * training_ratio) self.n_test = n_sample - self.n_train print('|data dimension: {}|data noise ratio:{}'.format(self.dataset.__dim__(), self.data_anomaly_ratio)) self.training_data, self.testing_data = data.random_split(dataset=self.dataset, lengths=[ self.n_train, self.n_test ]) self.ae = None self.discriminator = None self.model=None def train(self): self.model = VAE() self.model.fit(self.training_data.dataset.x) def test(self): y_test_scores = self.model.decision_function(self.testing_data.dataset.x) auc = roc_auc_score(self.testing_data.dataset.y, y_test_scores) from sklearn.metrics import precision_recall_fscore_support as prf, accuracy_score print("AUC:{:0.4f}".format( auc)) os.makedirs(self.result_path, exist_ok=True) np.save( self.result_path + "result.npy", { "accuracy": auc, "precision": auc, "recall": auc, "f1": auc, "auc": auc, }, ) # for consistency print("result save to {}".format(self.result_path))
class TestVAE(unittest.TestCase): def setUp(self): self.n_train = 6000 self.n_test = 1000 self.n_features = 300 self.contamination = 0.1 self.roc_floor = 0.8 self.X_train, self.y_train, self.X_test, self.y_test = generate_data( n_train=self.n_train, n_test=self.n_test, n_features=self.n_features, contamination=self.contamination, random_state=42) self.clf = VAE(epochs=5, contamination=self.contamination) self.clf.fit(self.X_train) def test_parameters(self): assert (hasattr(self.clf, 'decision_scores_') and self.clf.decision_scores_ is not None) assert (hasattr(self.clf, 'labels_') and self.clf.labels_ is not None) assert (hasattr(self.clf, 'threshold_') and self.clf.threshold_ is not None) assert (hasattr(self.clf, '_mu') and self.clf._mu is not None) assert (hasattr(self.clf, '_sigma') and self.clf._sigma is not None) assert (hasattr(self.clf, 'model_') and self.clf.model_ is not None) def test_train_scores(self): assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) def test_prediction_scores(self): pred_scores = self.clf.decision_function(self.X_test) # check score shapes assert_equal(pred_scores.shape[0], self.X_test.shape[0]) # check performance assert (roc_auc_score(self.y_test, pred_scores) >= self.roc_floor) def test_prediction_labels(self): pred_labels = self.clf.predict(self.X_test) assert_equal(pred_labels.shape, self.y_test.shape) def test_prediction_proba(self): pred_proba = self.clf.predict_proba(self.X_test) assert (pred_proba.min() >= 0) assert (pred_proba.max() <= 1) def test_prediction_proba_linear(self): pred_proba = self.clf.predict_proba(self.X_test, method='linear') assert (pred_proba.min() >= 0) assert (pred_proba.max() <= 1) def test_prediction_proba_unify(self): pred_proba = self.clf.predict_proba(self.X_test, method='unify') assert (pred_proba.min() >= 0) assert (pred_proba.max() <= 1) def test_prediction_proba_parameter(self): with assert_raises(ValueError): self.clf.predict_proba(self.X_test, method='something') def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) def test_fit_predict_score(self): self.clf.fit_predict_score(self.X_test, self.y_test) self.clf.fit_predict_score(self.X_test, self.y_test, scoring='roc_auc_score') self.clf.fit_predict_score(self.X_test, self.y_test, scoring='prc_n_score') with assert_raises(NotImplementedError): self.clf.fit_predict_score(self.X_test, self.y_test, scoring='something') def tearDown(self): pass