Exemple #1
0
class TestOCSVM(unittest.TestCase):
    def setUp(self):
        self.n_train = 200
        self.n_test = 100
        self.contamination = 0.1
        self.roc_floor = 0.8
        self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
            n_train=self.n_train,
            n_test=self.n_test,
            contamination=self.contamination,
            random_state=42)

        self.clf = OCSVM()
        self.clf.fit(self.X_train)

    def test_parameters(self):
        assert (hasattr(self.clf, 'decision_scores_')
                and self.clf.decision_scores_ is not None)
        assert (hasattr(self.clf, 'labels_') and self.clf.labels_ is not None)
        assert (hasattr(self.clf, 'threshold_')
                and self.clf.threshold_ is not None)
        assert (hasattr(self.clf, '_mu') and self.clf._mu is not None)
        assert (hasattr(self.clf, '_sigma') and self.clf._sigma is not None)
        assert (hasattr(self.clf, 'support_')
                and self.clf.support_ is not None)
        assert (hasattr(self.clf, 'support_vectors_')
                and self.clf.support_vectors_ is not None)
        assert (hasattr(self.clf, 'dual_coef_')
                and self.clf.dual_coef_ is not None)
        assert (hasattr(self.clf, 'intercept_')
                and self.clf.intercept_ is not None)

        # only available for linear kernel
        # if not hasattr(self.clf, 'coef_') or self.clf.coef_ is None:
        #     self.assertRaises(AttributeError, 'coef_ is not set')

    def test_train_scores(self):
        assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])

    def test_prediction_scores(self):
        pred_scores = self.clf.decision_function(self.X_test)

        # check score shapes
        assert_equal(pred_scores.shape[0], self.X_test.shape[0])

        # check performance
        assert_greater(roc_auc_score(self.y_test, pred_scores), self.roc_floor)

    def test_prediction_labels(self):
        pred_labels = self.clf.predict(self.X_test)
        assert_equal(pred_labels.shape, self.y_test.shape)

    def test_prediction_proba_linear(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='linear')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_unify(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='unify')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_parameter(self):
        with assert_raises(ValueError):
            self.clf.predict_proba(self.X_test, method='something')

    def test_fit_predict(self):
        pred_labels = self.clf.fit_predict(self.X_train)
        assert_equal(pred_labels.shape, self.y_train.shape)

    def test_fit_predict_score(self):
        self.clf.fit_predict_score(self.X_test, self.y_test)
        self.clf.fit_predict_score(self.X_test,
                                   self.y_test,
                                   scoring='roc_auc_score')
        self.clf.fit_predict_score(self.X_test,
                                   self.y_test,
                                   scoring='prc_n_score')
        with assert_raises(NotImplementedError):
            self.clf.fit_predict_score(self.X_test,
                                       self.y_test,
                                       scoring='something')

    def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3.5)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks)

    def test_predict_rank_normalized(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3.5)
        assert_array_less(pred_ranks, 1.01)
        assert_array_less(-0.1, pred_ranks)

    def tearDown(self):
        pass
Exemple #2
0
class SolverVAECIFAR():
    def __init__(self,
                 data_name,
                 hidden_dim=256,
                 seed=0,
                 learning_rate=3e-4,
                 normal_class=0,
                 anomaly_ratio=0.1,
                 batch_size=128,
                 concentrated=0,
                 training_ratio=0.8,
                 SN=1,
                 Trim=1,
                 L=1.5,
                 max_epochs=100):
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)
        use_cuda = torch.cuda.is_available()
        self.device = torch.device("cuda" if use_cuda else "cpu")
        self.L = L
        if concentrated == 1.0:
            full_data_name = 'CIFAR10_Concentrated'
        elif concentrated == 0.0:
            full_data_name = 'CIFAR10'
        self.result_path = "./results/{}_{}/0.0/OCSVM/{}/".format(
            full_data_name, normal_class, seed)
        data_path = "./data/" + data_name + ".npy"
        self.learning_rate = learning_rate
        self.SN = SN
        self.Trim = Trim
        # self.dataset = RealGraphDataset(data_path, missing_ratio=0, radius=2)
        self.dataset = CIFARVGGDataset(data_path,
                                       normal_class=normal_class,
                                       anomaly_ratio=anomaly_ratio,
                                       concentrated=concentrated)
        self.seed = seed
        self.hidden_dim = hidden_dim
        self.max_epochs = max_epochs

        self.data_path = data_path
        self.data_anomaly_ratio = self.dataset.__anomalyratio__()
        self.batch_size = batch_size
        self.input_dim = self.dataset.__dim__()
        self.data_normaly_ratio = 1 - self.data_anomaly_ratio
        n_sample = self.dataset.__len__()
        self.n_train = int(n_sample * training_ratio)
        self.n_test = n_sample - self.n_train
        print('|data dimension: {}|data noise ratio:{}'.format(
            self.dataset.__dim__(), self.data_anomaly_ratio))

        self.training_data, self.testing_data = data.random_split(
            dataset=self.dataset, lengths=[self.n_train, self.n_test])

        self.ae = None
        self.discriminator = None
        self.model = None

    def train(self):
        self.model = OCSVM()
        self.model.fit(self.training_data.dataset.x)

    def test(self):
        y_test_scores = self.model.decision_function(
            self.testing_data.dataset.x)
        auc = roc_auc_score(self.testing_data.dataset.y, y_test_scores)

        from sklearn.metrics import precision_recall_fscore_support as prf, accuracy_score

        print("AUC:{:0.4f}".format(auc))

        os.makedirs(self.result_path, exist_ok=True)

        np.save(
            self.result_path + "result.npy",
            {
                "accuracy": auc,
                "precision": auc,
                "recall": auc,
                "f1": auc,
                "auc": auc,
            },
        )  # for consistency
        print("result save to {}".format(self.result_path))
Exemple #3
0
                      n_features=2,
                      contamination=contamination,
                      random_state=42)

    # train one_class_svm detector
    clf_name = 'OneClassSVM'
    clf = OCSVM()
    clf.fit(X_train)

    # get the prediction labels and outlier scores of the training data
    y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)
    y_train_scores = clf.decision_scores_  # raw outlier scores

    # get the prediction on the test data
    y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)
    y_test_scores = clf.decision_function(X_test)  # outlier scores

    # evaluate and print the results
    print("\nOn Training Data:")
    evaluate_print(clf_name, y_train, y_train_scores)
    print("\nOn Test Data:")
    evaluate_print(clf_name, y_test, y_test_scores)

    # visualize the results
    visualize(clf_name,
              X_train,
              y_train,
              X_test,
              y_test,
              y_train_pred,
              y_test_pred,
Exemple #4
0
class TestOCSVM(unittest.TestCase):
    def setUp(self):
        self.n_train = 100
        self.n_test = 50
        self.contamination = 0.1
        self.roc_floor = 0.6
        self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
            n_train=self.n_train, n_test=self.n_test,
            contamination=self.contamination, random_state=42)

        self.clf = OCSVM()
        self.clf.fit(self.X_train)

    def test_sklearn_estimator(self):
        check_estimator(self.clf)

    def test_parameters(self):
        assert_true(hasattr(self.clf, 'decision_scores_') and
                    self.clf.decision_scores_ is not None)
        assert_true(hasattr(self.clf, 'labels_') and
                    self.clf.labels_ is not None)
        assert_true(hasattr(self.clf, 'threshold_') and
                    self.clf.threshold_ is not None)
        assert_true(hasattr(self.clf, '_mu') and
                    self.clf._mu is not None)
        assert_true(hasattr(self.clf, '_sigma') and
                    self.clf._sigma is not None)
        assert_true(hasattr(self.clf, 'support_') and
                    self.clf.support_ is not None)
        assert_true(hasattr(self.clf, 'support_vectors_') and
                    self.clf.support_vectors_ is not None)
        assert_true(hasattr(self.clf, 'dual_coef_') and
                    self.clf.dual_coef_ is not None)
        assert_true(hasattr(self.clf, 'intercept_') and
                    self.clf.intercept_ is not None)

        # only available for linear kernel
        # if not hasattr(self.clf, 'coef_') or self.clf.coef_ is None:
        #     self.assertRaises(AttributeError, 'coef_ is not set')

    def test_train_scores(self):
        assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])

    def test_prediction_scores(self):
        pred_scores = self.clf.decision_function(self.X_test)

        # check score shapes
        assert_equal(pred_scores.shape[0], self.X_test.shape[0])

        # check performance
        assert_greater(roc_auc_score(self.y_test, pred_scores), self.roc_floor)

    def test_prediction_labels(self):
        pred_labels = self.clf.predict(self.X_test)
        assert_equal(pred_labels.shape, self.y_test.shape)

    def test_prediction_proba_linear(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='linear')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_unify(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='unify')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_parameter(self):
        with assert_raises(ValueError):
            self.clf.predict_proba(self.X_test, method='something')

    def test_fit_predict(self):
        pred_labels = self.clf.fit_predict(self.X_train)
        assert_equal(pred_labels.shape, self.y_train.shape)

    def test_fit_predict_score(self):
        self.clf.fit_predict_score(self.X_test, self.y_test)
        self.clf.fit_predict_score(self.X_test, self.y_test,
                                   scoring='roc_auc_score')
        self.clf.fit_predict_score(self.X_test, self.y_test,
                                   scoring='prc_n_score')
        with assert_raises(NotImplementedError):
            self.clf.fit_predict_score(self.X_test, self.y_test,
                                       scoring='something')

    def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks)

    def test_predict_rank_normalized(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, 1.01)
        assert_array_less(-0.1, pred_ranks)

    def tearDown(self):
        pass
Exemple #5
0
class TestOCSVM(unittest.TestCase):
    def setUp(self):
        self.n_train = 100
        self.n_test = 50
        self.contamination = 0.1
        self.roc_floor = 0.6
        self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
            n_train=self.n_train, n_test=self.n_test,
            contamination=self.contamination)

        self.clf = OCSVM()
        self.clf.fit(self.X_train)

    def test_sklearn_estimator(self):
        check_estimator(self.clf)

    def test_parameters(self):
        if not hasattr(self.clf,
                       'decision_scores_') or self.clf.decision_scores_ is None:
            self.assertRaises(AttributeError, 'decision_scores_ is not set')
        if not hasattr(self.clf, 'labels_') or self.clf.labels_ is None:
            self.assertRaises(AttributeError, 'labels_ is not set')
        if not hasattr(self.clf, 'threshold_') or self.clf.threshold_ is None:
            self.assertRaises(AttributeError, 'threshold_ is not set')
        if not hasattr(self.clf,
                       'support_') or self.clf.support_ is None:
            self.assertRaises(AttributeError, 'support_ is not set')
        if not hasattr(self.clf,
                       'support_vectors_') or self.clf.support_vectors_ is None:
            self.assertRaises(AttributeError, 'support_vectors_ is not set')
        if not hasattr(self.clf, 'dual_coef_') or self.clf.dual_coef_ is None:
            self.assertRaises(AttributeError, 'dual_coef_ is not set')

        # only available for linear kernel
        # if not hasattr(self.clf, 'coef_') or self.clf.coef_ is None:
        #     self.assertRaises(AttributeError, 'coef_ is not set')
        if not hasattr(self.clf, 'intercept_') or self.clf.intercept_ is None:
            self.assertRaises(AttributeError, 'intercept_ is not set')

    def test_train_scores(self):
        assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])

    def test_prediction_scores(self):
        pred_scores = self.clf.decision_function(self.X_test)

        # check score shapes
        assert_equal(pred_scores.shape[0], self.X_test.shape[0])

        # check performance
        assert_greater(roc_auc_score(self.y_test, pred_scores), self.roc_floor)

    def test_prediction_labels(self):
        pred_labels = self.clf.predict(self.X_test)
        assert_equal(pred_labels.shape, self.y_test.shape)

    def test_prediction_proba_linear(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='linear')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_unify(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='unify')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_parameter(self):
        with assert_raises(ValueError):
            self.clf.predict_proba(self.X_test, method='something')

    def test_fit_predict(self):
        pred_labels = self.clf.fit_predict(self.X_train)
        assert_equal(pred_labels.shape, self.y_train.shape)

    def test_evaluate(self):
        self.clf.fit_predict_evaluate(self.X_test, self.y_test)

    def tearDown(self):
        pass
Exemple #6
0
    #划分测试集和训练集
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.33)

    #使用pyod中的OCSVM算法拟合数据
    clf_name = 'OCSVM'
    clf = OCSVM()
    clf.fit(X_train)

    #预测得到由0和1组成的数组,1表示离群点,0表示飞离群点
    y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)
    y_train_scores = clf.decision_scores_  # raw outlier scores,The outlier scores of the training data.

    #预测样本是不是离群点,返回0和1 的数组
    y_test_pred = clf.predict(X_test)

    y_test_scores = clf.decision_function(
        X_test)  # outlier scores,The anomaly score of the input samples.
    #使用sklearn中的roc_auc_score方法得到auc值,即roc曲线下面的面积
    try:
        sumAuc_train += sklearn.metrics.roc_auc_score(y_train,
                                                      y_train_scores,
                                                      average='macro')
        sumAuc_test += sklearn.metrics.roc_auc_score(y_test,
                                                     y_test_scores,
                                                     average='macro')
        #s=precision_score(y_train, y_train_scores, average='macro')
        i += 1
        print(sumAuc_train, sumAuc_test)
    except ValueError:
        print('1')
        pass
Exemple #7
0
def run_all_models(all_array, labels, pca, data_set_name):
    picture_name = all_array.get("# img", 1)
    all_array = all_array.drop("# img", 1)

    # standardizing data for processing
    all_array = standardizer(all_array)

    y = labels.get("in").to_numpy()
    x_train, x_test, y_train, y_test, picture_train, picture_test = train_test_split(all_array, y, picture_name,
                                                                                     test_size=0.4)

    if pca:
        transformer = IncrementalPCA()
        all_array = transformer.fit_transform(all_array)

    print("OCSVM")
    now = time()
    clf = OCSVM()
    clf.fit(x_train)
    test_scores = clf.decision_function(x_test)
    temp = print_score(picture_test, test_scores, y_test)
    train_scores = clf.decision_function(x_train)
    scores_train = print_score(picture_train, train_scores, y_train)
    output_table.append(("OCSVM", all_array.shape, temp, data_set_name, time() - now, scores_train))

    print("Auto-encoder")
    now = time()
    clf = AutoEncoder(epochs=30)
    clf.fit(x_train)
    test_scores = clf.decision_function(x_test)
    temp = print_score(picture_test, test_scores, y_test)
    train_scores = clf.decision_function(x_train)
    scores_train = print_score(picture_train, train_scores, y_train)
    output_table.append(("Auto-encoder", all_array.shape, temp, data_set_name, time() - now, scores_train))

    print("HBOS")
    now = time()
    clf = HBOS()
    clf.fit(x_train)
    test_scores = clf.decision_function(x_test)
    temp = print_score(picture_test, test_scores, y_test)
    train_scores = clf.decision_function(x_train)
    scores_train = print_score(picture_train, train_scores, y_train)
    output_table.append(("HBOS", all_array.shape, temp, data_set_name, time() - now, scores_train))

    print("SO_GAAL")
    now = time()
    clf = SO_GAAL()
    clf.fit(x_train)
    test_scores = clf.decision_function(x_test)
    temp = print_score(picture_test, test_scores, y_test)
    train_scores = clf.decision_function(x_train)
    scores_train = print_score(picture_train, train_scores, y_train)
    output_table.append(("SO_GAAL", all_array.shape, temp, data_set_name, time() - now, scores_train))

    print("MO_GAAL")
    now = time()
    clf = MO_GAAL()
    clf.fit(x_train)
    test_scores = clf.decision_function(x_test)
    temp = print_score(picture_test, test_scores, y_test)
    train_scores = clf.decision_function(x_train)
    scores_train = print_score(picture_train, train_scores, y_train)
    output_table.append(("MO_GAAL", all_array.shape, temp, data_set_name, time() - now, scores_train))

    print("MCD")
    now = time()
    clf = MCD()
    clf.fit(x_train)
    test_scores = clf.decision_function(x_test)
    temp = print_score(picture_test, test_scores, y_test)
    train_scores = clf.decision_function(x_train)
    scores_train = print_score(picture_train, train_scores, y_train)
    output_table.append(("MCD", all_array.shape, temp, data_set_name, time() - now, scores_train))

    print("SOS")
    now = time()
    clf = SOS()
    clf.fit(x_train)
    test_scores = clf.decision_function(x_test)
    temp = print_score(picture_test, test_scores, y_test)
    train_scores = clf.decision_function(x_train)
    scores_train = print_score(picture_train, train_scores, y_train)
    output_table.append(("SOS", all_array.shape, temp, data_set_name, time() - now, scores_train))

    print("IForest")
    now = time()
    clf = IForest()
    clf.fit(x_train)
    test_scores = clf.decision_function(x_test)
    temp = print_score(picture_test, test_scores, y_test)
    train_scores = clf.decision_function(x_train)
    scores_train = print_score(picture_train, train_scores, y_train)
    output_table.append(("IFrorest", all_array.shape, temp, data_set_name, time() - now, scores_train))

    print("KNN")
    now = time()
    clf = KNN()
    clf.fit(x_train)
    test_scores = clf.decision_function(x_test)
    temp = print_score(picture_test, test_scores, y_test)
    train_scores = clf.decision_function(x_train)
    scores_train = print_score(picture_train, train_scores, y_train)
    output_table.append(("KNN", all_array.shape, temp, data_set_name, time() - now, scores_train))

    print("PCA")
    now = time()
    clf = PCA()
    clf.fit(x_train)
    test_scores = clf.decision_function(x_test)
    temp = print_score(picture_test, test_scores, y_test)
    train_scores = clf.decision_function(x_train)
    scores_train = print_score(picture_train, train_scores, y_train)
    output_table.append(("PCA", all_array.shape, temp, data_set_name, time() - now, scores_train))
Exemple #8
0
def main():
    parser = argparse.ArgumentParser(description='baseline')
    register_data_args(parser)
    parser.add_argument("--mode",
                        type=str,
                        default='A',
                        choices=['A', 'AX', 'X'],
                        help="dropout probability")
    parser.add_argument("--seed",
                        type=int,
                        default=-1,
                        help="random seed, -1 means dont fix seed")
    parser.add_argument(
        "--emb-method",
        type=str,
        default='DeepWalk',
        help="embedding methods: DeepWalk, Node2Vec, LINE, SDNE, Struc2Vec")
    parser.add_argument("--ad-method",
                        type=str,
                        default='OCSVM',
                        help="embedding methods: PCA,OCSVM,IF,AE")
    args = parser.parse_args()

    if args.seed != -1:
        np.random.seed(args.seed)
        torch.manual_seed(args.seed)
        torch.cuda.manual_seed_all(args.seed)

    logging.basicConfig(
        filename="./log/baseline.log",
        filemode="a",
        format="%(asctime)s-%(name)s-%(levelname)s-%(message)s",
        level=logging.INFO)
    logger = logging.getLogger('baseline')

    datadict = emb_dataloader(args)

    if args.mode == 'X':
        data = datadict['features']
        #print('X shape',data.shape)
    else:
        t0 = time.time()
        embeddings = embedding(args, datadict)
        dur1 = time.time() - t0

        if args.mode == 'A':
            data = embeddings
            #print('A shape',data.shape)
        if args.mode == 'AX':
            data = np.concatenate((embeddings, datadict['features']), axis=1)
            #print('AX shape',data.shape)

    logger.debug(f'data shape: {data.shape}')

    if args.ad_method == 'OCSVM':
        clf = OCSVM(contamination=0.1)
    if args.ad_method == 'IF':
        clf = IForest(n_estimators=100,
                      contamination=0.1,
                      n_jobs=-1,
                      behaviour="new")
    if args.ad_method == 'PCA':
        clf = PCA(contamination=0.1)
    if args.ad_method == 'AE':
        clf = AutoEncoder(contamination=0.1)

    t1 = time.time()
    clf.fit(data[datadict['train_mask']])
    dur2 = time.time() - t1

    print('traininig time:', dur1 + dur2)

    logger.info('\n')
    logger.info('\n')
    logger.info(
        f'Parameters dataset:{args.dataset} datamode:{args.mode} ad-method:{args.ad_method} emb-method:{args.emb_method}'
    )
    logger.info('-------------Evaluating Validation Results--------------')

    t2 = time.time()
    y_pred_val = clf.predict(data[datadict['val_mask']])
    y_score_val = clf.decision_function(data[datadict['val_mask']])
    auc, ap, f1, acc, precision, recall = baseline_evaluate(datadict,
                                                            y_pred_val,
                                                            y_score_val,
                                                            val=True)
    dur3 = time.time() - t2
    print('infer time:', dur3)

    logger.info(f'AUC:{round(auc,4)},AP:{round(ap,4)}')
    logger.info(
        f'f1:{round(f1,4)},acc:{round(acc,4)},pre:{round(precision,4)},recall:{round(recall,4)}'
    )

    logger.info('-------------Evaluating Test Results--------------')
    y_pred_test = clf.predict(data[datadict['test_mask']])
    y_score_test = clf.decision_function(data[datadict['test_mask']])
    auc, ap, f1, acc, precision, recall = baseline_evaluate(datadict,
                                                            y_pred_test,
                                                            y_score_test,
                                                            val=False)
    logger.info(f'AUC:{round(auc,4)},AP:{round(ap,4)}')
    logger.info(
        f'f1:{round(f1,4)},acc:{round(acc,4)},pre:{round(precision,4)},recall:{round(recall,4)}'
    )