Ejemplo n.º 1
0
    def vaeAD(self, encoder_neurons, decoder_neurons, epochs, contamination):
        clf_name = 'VAE'
        clf = VAE(encoder_neurons=encoder_neurons, decoder_neurons=decoder_neurons,
                  epochs=epochs, contamination=contamination)
        clf.fit(self.X)

        # get the prediction labels and outlier scores of the training data
        y_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)
        y_scores = clf.decision_scores_  # raw outlier scores
        generateAnomalis(self.data, self.label, y_pred)
        self.evaluate()
Ejemplo n.º 2
0
def detect_outliers_VAE(df):
    ''' Returns the outlier scores using Variational AutoEncoders

    Parameters:
    -----------
    df: pd.DataFrame,
    '''
    if df.shape[1] < 128:
        encoder = [df.shape[1], df.shape[1]/2, df.shape[1]/4]
        decoder = encoder[::-1]
    else:
        encoder = [128, 64, 32]
        decoder = encoder[::-1]

    clf = VAE(contamination=0.1, encoder_neurons=encoder, decoder_neurons=decoder)

    df = df.astype(np.float32)

    clf.fit(df)
    outlier_score = clf.decision_scores_
    # df_result = pd.DataFrame(outlier_pred, columns=['outlier_pred'])
    return outlier_score * -1
Ejemplo n.º 3
0
    n_train = 20000  # number of training points
    n_test = 2000  # number of testing points
    n_features = 300  # number of features

    # Generate sample data
    X_train, y_train, X_test, y_test = \
        generate_data(n_train=n_train,
                      n_test=n_test,
                      n_features=n_features,
                      contamination=contamination,
                      random_state=42)

    # train VAE detector
    clf_name = 'VAE'
    clf = VAE(epochs=30, contamination=contamination)
    clf.fit(X_train)

    # get the prediction labels and outlier scores of the training data
    y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)
    y_train_scores = clf.decision_scores_  # raw outlier scores

    # get the prediction on the test data
    y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)
    y_test_scores = clf.decision_function(X_test)  # outlier scores

    # evaluate and print the results
    print("\nOn Training Data:")
    evaluate_print(clf_name, y_train, y_train_scores)
    print("\nOn Test Data:")
    evaluate_print(clf_name, y_test, y_test_scores)
Ejemplo n.º 4
0
class SolverVAECIFAR():
    def __init__(self, data_name, hidden_dim=256, seed=0, learning_rate=3e-4, normal_class=0, anomaly_ratio=0.1,
                 batch_size=128, concentrated=0, training_ratio=0.8, SN=1, Trim=1, L=1.5, max_epochs=100):
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)
        use_cuda = torch.cuda.is_available()
        self.device = torch.device("cuda" if use_cuda else "cpu")
        self.L = L
        if concentrated == 1.0:
            full_data_name = 'CIFAR10_Concentrated'
        elif concentrated == 0.0:
            full_data_name = 'CIFAR10'
        self.result_path = "./results/{}_{}/0.0/VAE/{}/".format(
            full_data_name, normal_class, seed
        )
        data_path = "./data/" + data_name + ".npy"
        self.learning_rate = learning_rate
        self.SN = SN
        self.Trim = Trim
        # self.dataset = RealGraphDataset(data_path, missing_ratio=0, radius=2)
        self.dataset = CIFARVGGDataset(data_path, normal_class=normal_class, anomaly_ratio=anomaly_ratio, concentrated=concentrated)
        self.seed = seed
        self.hidden_dim = hidden_dim
        self.max_epochs = max_epochs

        self.data_path = data_path
        self.data_anomaly_ratio = self.dataset.__anomalyratio__()
        self.batch_size = batch_size
        self.input_dim = self.dataset.__dim__()
        self.data_normaly_ratio = 1 - self.data_anomaly_ratio
        n_sample = self.dataset.__len__()
        self.n_train = int(n_sample * training_ratio)
        self.n_test = n_sample - self.n_train
        print('|data dimension: {}|data noise ratio:{}'.format(self.dataset.__dim__(), self.data_anomaly_ratio))

        self.training_data, self.testing_data = data.random_split(dataset=self.dataset,
                                                                         lengths=[
                                                                             self.n_train,
                                                                             self.n_test
                                                                         ])

        self.ae = None
        self.discriminator = None
        self.model=None



    def train(self):
        self.model = VAE()
        self.model.fit(self.training_data.dataset.x)


    def test(self):
        y_test_scores = self.model.decision_function(self.testing_data.dataset.x)
        auc = roc_auc_score(self.testing_data.dataset.y, y_test_scores)

        from sklearn.metrics import precision_recall_fscore_support as prf, accuracy_score

        print("AUC:{:0.4f}".format(
           auc))

        os.makedirs(self.result_path, exist_ok=True)

        np.save(
            self.result_path + "result.npy",
            {
                "accuracy": auc,
                "precision": auc,
                "recall": auc,
                "f1": auc,
                "auc": auc,
            },
        ) # for consistency
        print("result save to {}".format(self.result_path))
Ejemplo n.º 5
0
class TestVAE(unittest.TestCase):
    def setUp(self):
        self.n_train = 6000
        self.n_test = 1000
        self.n_features = 300
        self.contamination = 0.1
        self.roc_floor = 0.8
        self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
            n_train=self.n_train,
            n_test=self.n_test,
            n_features=self.n_features,
            contamination=self.contamination,
            random_state=42)

        self.clf = VAE(epochs=5, contamination=self.contamination)
        self.clf.fit(self.X_train)

    def test_parameters(self):
        assert (hasattr(self.clf, 'decision_scores_')
                and self.clf.decision_scores_ is not None)
        assert (hasattr(self.clf, 'labels_') and self.clf.labels_ is not None)
        assert (hasattr(self.clf, 'threshold_')
                and self.clf.threshold_ is not None)
        assert (hasattr(self.clf, '_mu') and self.clf._mu is not None)
        assert (hasattr(self.clf, '_sigma') and self.clf._sigma is not None)
        assert (hasattr(self.clf, 'model_') and self.clf.model_ is not None)

    def test_train_scores(self):
        assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])

    def test_prediction_scores(self):
        pred_scores = self.clf.decision_function(self.X_test)

        # check score shapes
        assert_equal(pred_scores.shape[0], self.X_test.shape[0])

        # check performance
        assert (roc_auc_score(self.y_test, pred_scores) >= self.roc_floor)

    def test_prediction_labels(self):
        pred_labels = self.clf.predict(self.X_test)
        assert_equal(pred_labels.shape, self.y_test.shape)

    def test_prediction_proba(self):
        pred_proba = self.clf.predict_proba(self.X_test)
        assert (pred_proba.min() >= 0)
        assert (pred_proba.max() <= 1)

    def test_prediction_proba_linear(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='linear')
        assert (pred_proba.min() >= 0)
        assert (pred_proba.max() <= 1)

    def test_prediction_proba_unify(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='unify')
        assert (pred_proba.min() >= 0)
        assert (pred_proba.max() <= 1)

    def test_prediction_proba_parameter(self):
        with assert_raises(ValueError):
            self.clf.predict_proba(self.X_test, method='something')

    def test_fit_predict(self):
        pred_labels = self.clf.fit_predict(self.X_train)
        assert_equal(pred_labels.shape, self.y_train.shape)

    def test_fit_predict_score(self):
        self.clf.fit_predict_score(self.X_test, self.y_test)
        self.clf.fit_predict_score(self.X_test,
                                   self.y_test,
                                   scoring='roc_auc_score')
        self.clf.fit_predict_score(self.X_test,
                                   self.y_test,
                                   scoring='prc_n_score')
        with assert_raises(NotImplementedError):
            self.clf.fit_predict_score(self.X_test,
                                       self.y_test,
                                       scoring='something')

    def tearDown(self):
        pass