Python OCSVM.predict Exemples, pyod.models.ocsvm.OCSVM.predict Python Exemples

Exemple #1

0

Afficher le fichier

def print_accuracy(train_arr,test_arr,trader_id):
    if len(train_arr)==0 or len(test_arr)==0:
        return
    for i in range(len(train_arr)):
        l1=len(train_arr[i])
        l2=len(test_arr[i])
        if l1==0 or l2==0:
            continue
        train_data=np.array([train_arr[i]]).T
        test_data=np.array([test_arr[i]]).T
        clf=OCSVM()
        clf.fit(train_data)
        y_pred=clf.predict(train_data)
        print("TRAINING ACCURACY for TRADER",trader_id,":",100 - (sum(y_pred)*100/l1))
        y_pred=clf.predict(test_data)
        print("TESTING ACCURACY: ",sum(y_pred)*100/l2)

Exemple #2

0

Afficher le fichier

def detect_outliers(lst):
    """detect outliers in a list of numpy arrays
    
    Parameters
    ----------
    lst : List
        [description]
    
    Returns
    -------
    inliers : List
        A list of the inliers
    """
    clf = OCSVM(verbose=0)
    clf.fit(lst)

    inlier_idx = []
    outlier_idx = []
    for index, data in enumerate(lst):
        y = clf.predict(data.reshape(1, -1))
        if y:  # y==1 for outliers
            logger.debug('Found outlier: {0}'.format(index))
            outlier_idx.append(index)
        else:
            inlier_idx.append(index)

    logger.info('{:.0%} are outliers'.format(len(outlier_idx) / len(lst)))
    return inlier_idx, outlier_idx

Exemple #3

0

Afficher le fichier

class TestOCSVM(unittest.TestCase):
    def setUp(self):
        self.n_train = 200
        self.n_test = 100
        self.contamination = 0.1
        self.roc_floor = 0.8
        self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
            n_train=self.n_train,
            n_test=self.n_test,
            contamination=self.contamination,
            random_state=42)

        self.clf = OCSVM()
        self.clf.fit(self.X_train)

    def test_parameters(self):
        assert (hasattr(self.clf, 'decision_scores_')
                and self.clf.decision_scores_ is not None)
        assert (hasattr(self.clf, 'labels_') and self.clf.labels_ is not None)
        assert (hasattr(self.clf, 'threshold_')
                and self.clf.threshold_ is not None)
        assert (hasattr(self.clf, '_mu') and self.clf._mu is not None)
        assert (hasattr(self.clf, '_sigma') and self.clf._sigma is not None)
        assert (hasattr(self.clf, 'support_')
                and self.clf.support_ is not None)
        assert (hasattr(self.clf, 'support_vectors_')
                and self.clf.support_vectors_ is not None)
        assert (hasattr(self.clf, 'dual_coef_')
                and self.clf.dual_coef_ is not None)
        assert (hasattr(self.clf, 'intercept_')
                and self.clf.intercept_ is not None)

        # only available for linear kernel
        # if not hasattr(self.clf, 'coef_') or self.clf.coef_ is None:
        #     self.assertRaises(AttributeError, 'coef_ is not set')

    def test_train_scores(self):
        assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])

    def test_prediction_scores(self):
        pred_scores = self.clf.decision_function(self.X_test)

        # check score shapes
        assert_equal(pred_scores.shape[0], self.X_test.shape[0])

        # check performance
        assert_greater(roc_auc_score(self.y_test, pred_scores), self.roc_floor)

    def test_prediction_labels(self):
        pred_labels = self.clf.predict(self.X_test)
        assert_equal(pred_labels.shape, self.y_test.shape)

    def test_prediction_proba_linear(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='linear')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_unify(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='unify')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_parameter(self):
        with assert_raises(ValueError):
            self.clf.predict_proba(self.X_test, method='something')

    def test_fit_predict(self):
        pred_labels = self.clf.fit_predict(self.X_train)
        assert_equal(pred_labels.shape, self.y_train.shape)

    def test_fit_predict_score(self):
        self.clf.fit_predict_score(self.X_test, self.y_test)
        self.clf.fit_predict_score(self.X_test,
                                   self.y_test,
                                   scoring='roc_auc_score')
        self.clf.fit_predict_score(self.X_test,
                                   self.y_test,
                                   scoring='prc_n_score')
        with assert_raises(NotImplementedError):
            self.clf.fit_predict_score(self.X_test,
                                       self.y_test,
                                       scoring='something')

    def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3.5)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks)

    def test_predict_rank_normalized(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3.5)
        assert_array_less(pred_ranks, 1.01)
        assert_array_less(-0.1, pred_ranks)

    def tearDown(self):
        pass

Exemple #4

0

Afficher le fichier

Fichier : ocsvm_example.py Projet : yzhao062/pyod

                      n_test=n_test,
                      n_features=2,
                      contamination=contamination,
                      random_state=42)

    # train one_class_svm detector
    clf_name = 'OneClassSVM'
    clf = OCSVM()
    clf.fit(X_train)

    # get the prediction labels and outlier scores of the training data
    y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)
    y_train_scores = clf.decision_scores_  # raw outlier scores

    # get the prediction on the test data
    y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)
    y_test_scores = clf.decision_function(X_test)  # outlier scores

    # evaluate and print the results
    print("\nOn Training Data:")
    evaluate_print(clf_name, y_train, y_train_scores)
    print("\nOn Test Data:")
    evaluate_print(clf_name, y_test, y_test_scores)

    # visualize the results
    visualize(clf_name,
              X_train,
              y_train,
              X_test,
              y_test,
              y_train_pred,

Exemple #5

0

Afficher le fichier

Fichier : script_norm.py Projet : kUSHAL0601/MALICIOUS-TRADERS-DETECTION

from pyod.models.ocsvm import OCSVM
from pyod.models.pca import PCA
# from pyod.models.mcd import MCD

clf1=PCA(standardization = True,contamination=0.2)
# clf1 = MCD(assume_centered = True)
clf2=OCSVM(kernel = 'poly',nu = 0.25,degree =2,contamination =0.2)
# clf2 = OCSVM(kernel = 'linear',nu =0.02)
clf1.fit(train_set)
clf2.fit(train_set)

y_pred_train_pca=clf1.predict(train_set)
y_pred_test_pca=clf1.predict(test_set)

y_pred_train_ocsvm=clf2.predict(train_set)
y_pred_test_ocsvm=clf2.predict(test_set)
print(clf1.explained_variance_)
# print(y_pred_test_pca,y_pred_test_ocsvm)
train_pca_correct=0
train_ocsvm_correct=0
print("TRAIN SET")
for i in range(len(pred_train_set)):
    # print("Actual:",pred_train_set[i],"PCA",y_pred_train_pca[i],"OCSVM",y_pred_train_ocsvm[i])
    if pred_train_set[i]==y_pred_train_pca[i] and pred_train_set[i]==1:
        train_pca_correct+=1
    if pred_train_set[i]==y_pred_train_ocsvm[i] and y_pred_train_ocsvm[i]==1:
        train_ocsvm_correct+=1

test_pca_correct=0
test_ocsvm_correct=0

Exemple #6

0

Afficher le fichier

Fichier : test_ocsvm.py Projet : flaviassantos/pyod

class TestOCSVM(unittest.TestCase):
    def setUp(self):
        self.n_train = 100
        self.n_test = 50
        self.contamination = 0.1
        self.roc_floor = 0.6
        self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
            n_train=self.n_train, n_test=self.n_test,
            contamination=self.contamination, random_state=42)

        self.clf = OCSVM()
        self.clf.fit(self.X_train)

    def test_sklearn_estimator(self):
        check_estimator(self.clf)

    def test_parameters(self):
        assert_true(hasattr(self.clf, 'decision_scores_') and
                    self.clf.decision_scores_ is not None)
        assert_true(hasattr(self.clf, 'labels_') and
                    self.clf.labels_ is not None)
        assert_true(hasattr(self.clf, 'threshold_') and
                    self.clf.threshold_ is not None)
        assert_true(hasattr(self.clf, '_mu') and
                    self.clf._mu is not None)
        assert_true(hasattr(self.clf, '_sigma') and
                    self.clf._sigma is not None)
        assert_true(hasattr(self.clf, 'support_') and
                    self.clf.support_ is not None)
        assert_true(hasattr(self.clf, 'support_vectors_') and
                    self.clf.support_vectors_ is not None)
        assert_true(hasattr(self.clf, 'dual_coef_') and
                    self.clf.dual_coef_ is not None)
        assert_true(hasattr(self.clf, 'intercept_') and
                    self.clf.intercept_ is not None)

        # only available for linear kernel
        # if not hasattr(self.clf, 'coef_') or self.clf.coef_ is None:
        #     self.assertRaises(AttributeError, 'coef_ is not set')

    def test_train_scores(self):
        assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])

    def test_prediction_scores(self):
        pred_scores = self.clf.decision_function(self.X_test)

        # check score shapes
        assert_equal(pred_scores.shape[0], self.X_test.shape[0])

        # check performance
        assert_greater(roc_auc_score(self.y_test, pred_scores), self.roc_floor)

    def test_prediction_labels(self):
        pred_labels = self.clf.predict(self.X_test)
        assert_equal(pred_labels.shape, self.y_test.shape)

    def test_prediction_proba_linear(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='linear')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_unify(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='unify')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_parameter(self):
        with assert_raises(ValueError):
            self.clf.predict_proba(self.X_test, method='something')

    def test_fit_predict(self):
        pred_labels = self.clf.fit_predict(self.X_train)
        assert_equal(pred_labels.shape, self.y_train.shape)

    def test_fit_predict_score(self):
        self.clf.fit_predict_score(self.X_test, self.y_test)
        self.clf.fit_predict_score(self.X_test, self.y_test,
                                   scoring='roc_auc_score')
        self.clf.fit_predict_score(self.X_test, self.y_test,
                                   scoring='prc_n_score')
        with assert_raises(NotImplementedError):
            self.clf.fit_predict_score(self.X_test, self.y_test,
                                       scoring='something')

    def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks)

    def test_predict_rank_normalized(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, 1.01)
        assert_array_less(-0.1, pred_ranks)

    def tearDown(self):
        pass

Exemple #7

0

Afficher le fichier

Fichier : test_ocsvm.py Projet : deltat99/Pyod

class TestOCSVM(unittest.TestCase):
    def setUp(self):
        self.n_train = 100
        self.n_test = 50
        self.contamination = 0.1
        self.roc_floor = 0.6
        self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
            n_train=self.n_train, n_test=self.n_test,
            contamination=self.contamination)

        self.clf = OCSVM()
        self.clf.fit(self.X_train)

    def test_sklearn_estimator(self):
        check_estimator(self.clf)

    def test_parameters(self):
        if not hasattr(self.clf,
                       'decision_scores_') or self.clf.decision_scores_ is None:
            self.assertRaises(AttributeError, 'decision_scores_ is not set')
        if not hasattr(self.clf, 'labels_') or self.clf.labels_ is None:
            self.assertRaises(AttributeError, 'labels_ is not set')
        if not hasattr(self.clf, 'threshold_') or self.clf.threshold_ is None:
            self.assertRaises(AttributeError, 'threshold_ is not set')
        if not hasattr(self.clf,
                       'support_') or self.clf.support_ is None:
            self.assertRaises(AttributeError, 'support_ is not set')
        if not hasattr(self.clf,
                       'support_vectors_') or self.clf.support_vectors_ is None:
            self.assertRaises(AttributeError, 'support_vectors_ is not set')
        if not hasattr(self.clf, 'dual_coef_') or self.clf.dual_coef_ is None:
            self.assertRaises(AttributeError, 'dual_coef_ is not set')

        # only available for linear kernel
        # if not hasattr(self.clf, 'coef_') or self.clf.coef_ is None:
        #     self.assertRaises(AttributeError, 'coef_ is not set')
        if not hasattr(self.clf, 'intercept_') or self.clf.intercept_ is None:
            self.assertRaises(AttributeError, 'intercept_ is not set')

    def test_train_scores(self):
        assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])

    def test_prediction_scores(self):
        pred_scores = self.clf.decision_function(self.X_test)

        # check score shapes
        assert_equal(pred_scores.shape[0], self.X_test.shape[0])

        # check performance
        assert_greater(roc_auc_score(self.y_test, pred_scores), self.roc_floor)

    def test_prediction_labels(self):
        pred_labels = self.clf.predict(self.X_test)
        assert_equal(pred_labels.shape, self.y_test.shape)

    def test_prediction_proba_linear(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='linear')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_unify(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='unify')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_parameter(self):
        with assert_raises(ValueError):
            self.clf.predict_proba(self.X_test, method='something')

    def test_fit_predict(self):
        pred_labels = self.clf.fit_predict(self.X_train)
        assert_equal(pred_labels.shape, self.y_train.shape)

    def test_evaluate(self):
        self.clf.fit_predict_evaluate(self.X_test, self.y_test)

    def tearDown(self):
        pass

Exemple #8

0

Afficher le fichier

Fichier : script_vol.py Projet : kUSHAL0601/MALICIOUS-TRADERS-DETECTION

    data_price = []
    data_vol_price = []
    mal_t_stamps1 = []
    mal_t_stamps2 = []
    mal_t_stamps12 = []
    clf1 = OCSVM()
    clf2 = OCSVM()
    clf12 = OCSVM()
    for j in d_transaction[i]:
        data_vol.append(j[2])
        data_price.append(j[1])
        data_vol_price.append([j[2], j[1]])
    clf1.fit(np.array([data_vol]).T)
    clf2.fit(np.array([data_price]).T)
    clf12.fit(np.array(data_vol_price))
    for j in d_transaction[i]:
        p1 = clf1.predict(np.array(j[2]).reshape(1, -1))
        p2 = clf2.predict(np.array(j[2]).reshape(1, -1))
        p3 = clf12.predict(np.array([j[2], j[1]]).T.reshape(1, -1))
        if p1 == 1:
            mal_t_stamps1.append(j[0])
        if p2 == 1:
            mal_t_stamps2.append(j[0])
        if p3 == 1:
            mal_t_stamps12.append(j[0])
    s = set(d_attack[i])
    print("TRADER", i, "VOL", len(s & set(mal_t_stamps1)), "OUT OF",
          len(mal_t_stamps1), "PRICE", len(s & set(mal_t_stamps2)), "OUT OF",
          len(mal_t_stamps2), "VOL AND PRICE", len(s & set(mal_t_stamps12)),
          "OUT OF", len(mal_t_stamps12))

Exemple #9

0

Afficher le fichier

Fichier : wave_OCSVM.py Projet : SiYuan-Lee/DataMining

    y = data['s']

    #划分测试集和训练集
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.33)

    #使用pyod中的OCSVM算法拟合数据
    clf_name = 'OCSVM'
    clf = OCSVM()
    clf.fit(X_train)

    #预测得到由0和1组成的数组，1表示离群点，0表示飞离群点
    y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)
    y_train_scores = clf.decision_scores_  # raw outlier scores，The outlier scores of the training data.

    #预测样本是不是离群点，返回0和1 的数组
    y_test_pred = clf.predict(X_test)

    y_test_scores = clf.decision_function(
        X_test)  # outlier scores，The anomaly score of the input samples.
    #使用sklearn中的roc_auc_score方法得到auc值，即roc曲线下面的面积
    try:
        sumAuc_train += sklearn.metrics.roc_auc_score(y_train,
                                                      y_train_scores,
                                                      average='macro')
        sumAuc_test += sklearn.metrics.roc_auc_score(y_test,
                                                     y_test_scores,
                                                     average='macro')
        #s=precision_score(y_train, y_train_scores, average='macro')
        i += 1
        print(sumAuc_train, sumAuc_test)
    except ValueError:

Exemple #10

0

Afficher le fichier

def main():
    parser = argparse.ArgumentParser(description='baseline')
    register_data_args(parser)
    parser.add_argument("--mode",
                        type=str,
                        default='A',
                        choices=['A', 'AX', 'X'],
                        help="dropout probability")
    parser.add_argument("--seed",
                        type=int,
                        default=-1,
                        help="random seed, -1 means dont fix seed")
    parser.add_argument(
        "--emb-method",
        type=str,
        default='DeepWalk',
        help="embedding methods: DeepWalk, Node2Vec, LINE, SDNE, Struc2Vec")
    parser.add_argument("--ad-method",
                        type=str,
                        default='OCSVM',
                        help="embedding methods: PCA,OCSVM,IF,AE")
    args = parser.parse_args()

    if args.seed != -1:
        np.random.seed(args.seed)
        torch.manual_seed(args.seed)
        torch.cuda.manual_seed_all(args.seed)

    logging.basicConfig(
        filename="./log/baseline.log",
        filemode="a",
        format="%(asctime)s-%(name)s-%(levelname)s-%(message)s",
        level=logging.INFO)
    logger = logging.getLogger('baseline')

    datadict = emb_dataloader(args)

    if args.mode == 'X':
        data = datadict['features']
        #print('X shape',data.shape)
    else:
        t0 = time.time()
        embeddings = embedding(args, datadict)
        dur1 = time.time() - t0

        if args.mode == 'A':
            data = embeddings
            #print('A shape',data.shape)
        if args.mode == 'AX':
            data = np.concatenate((embeddings, datadict['features']), axis=1)
            #print('AX shape',data.shape)

    logger.debug(f'data shape: {data.shape}')

    if args.ad_method == 'OCSVM':
        clf = OCSVM(contamination=0.1)
    if args.ad_method == 'IF':
        clf = IForest(n_estimators=100,
                      contamination=0.1,
                      n_jobs=-1,
                      behaviour="new")
    if args.ad_method == 'PCA':
        clf = PCA(contamination=0.1)
    if args.ad_method == 'AE':
        clf = AutoEncoder(contamination=0.1)

    t1 = time.time()
    clf.fit(data[datadict['train_mask']])
    dur2 = time.time() - t1

    print('traininig time:', dur1 + dur2)

    logger.info('\n')
    logger.info('\n')
    logger.info(
        f'Parameters dataset:{args.dataset} datamode:{args.mode} ad-method:{args.ad_method} emb-method:{args.emb_method}'
    )
    logger.info('-------------Evaluating Validation Results--------------')

    t2 = time.time()
    y_pred_val = clf.predict(data[datadict['val_mask']])
    y_score_val = clf.decision_function(data[datadict['val_mask']])
    auc, ap, f1, acc, precision, recall = baseline_evaluate(datadict,
                                                            y_pred_val,
                                                            y_score_val,
                                                            val=True)
    dur3 = time.time() - t2
    print('infer time:', dur3)

    logger.info(f'AUC:{round(auc,4)},AP:{round(ap,4)}')
    logger.info(
        f'f1:{round(f1,4)},acc:{round(acc,4)},pre:{round(precision,4)},recall:{round(recall,4)}'
    )

    logger.info('-------------Evaluating Test Results--------------')
    y_pred_test = clf.predict(data[datadict['test_mask']])
    y_score_test = clf.decision_function(data[datadict['test_mask']])
    auc, ap, f1, acc, precision, recall = baseline_evaluate(datadict,
                                                            y_pred_test,
                                                            y_score_test,
                                                            val=False)
    logger.info(f'AUC:{round(auc,4)},AP:{round(ap,4)}')
    logger.info(
        f'f1:{round(f1,4)},acc:{round(acc,4)},pre:{round(precision,4)},recall:{round(recall,4)}'
    )