def lof_pyod_once(X_nor, X_test, y_test, n_neighbors, contamination=0.05): lof = LOF(n_neighbors=n_neighbors, contamination=contamination) X_train = X_nor.astype(float).values.copy() lof.fit(X_train) ## now threshold is determined y_pred = lof.predict(X_test) scoreTable = lof.decision_function(X_test) #print(scoreTable) scoreTable = np.nan_to_num(scoreTable, copy=True) ## confusion matrix tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel() tpr = tp / (tp + fn) fpr = fp / (tn + fp) #tprW[trail] = tpr #fprW[trail] = fpr tprW = tpr fprW = fpr # Auc score auc = roc_auc_score(y_test, scoreTable) #print(tpr, fpr) #print(auc) return tprW, fprW, auc, scoreTable
def obj_func_LOF(params): ## objective function used in baseian optimization outlier_fraction = params[0] n_neighbors = params[1] algorithm = params[2] leaf_size = params[3] # load data set to function work space Y_train = np.load('Y_train.npy') X_train = np.load('X_train.npy') # create model clf = LOF(n_neighbors=n_neighbors, algorithm=algorithm, leaf_size=leaf_size, contamination=outlier_fraction) # fit the dataset to the model clf.fit(X_train) scores_pred = clf.decision_function( X_train) * -1 # predict raw anomaly score Rprecision = Rprecision_f(Y_train, scores_pred) if glb_verbose: print('R Precision : ', Rprecision) y_pred = clf.predict( X_train) # prediction of a datapoint category outlier or inlier objVal = objVal_f(Rprecision, y_pred, Y_train) return objVal
n_features=2, contamination=contamination, random_state=42) # train LOF detector clf_name = 'LOF' clf = LOF() clf.fit(X_train) # get the prediction labels and outlier scores of the training data y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers) y_train_scores = clf.decision_scores_ # raw outlier scores # get the prediction on the test data y_test_pred = clf.predict(X_test) # outlier labels (0 or 1) y_test_scores = clf.decision_function(X_test) # outlier scores # evaluate and print the results print("\nOn Training Data:") evaluate_print(clf_name, y_train, y_train_scores) print("\nOn Test Data:") evaluate_print(clf_name, y_test, y_test_scores) # visualize the results visualize(clf_name, X_train, y_train, X_test, y_test, y_train_pred, y_test_pred,
class TestLOF(unittest.TestCase): def setUp(self): self.n_train = 100 self.n_test = 50 self.contamination = 0.1 self.roc_floor = 0.6 self.X_train, self.y_train, self.X_test, self.y_test = generate_data( n_train=self.n_train, n_test=self.n_test, contamination=self.contamination, random_state=42) self.clf = LOF(contamination=self.contamination) self.clf.fit(self.X_train) def test_sklearn_estimator(self): check_estimator(self.clf) def test_parameters(self): assert_true(hasattr(self.clf, 'decision_scores_') and self.clf.decision_scores_ is not None) assert_true(hasattr(self.clf, 'labels_') and self.clf.labels_ is not None) assert_true(hasattr(self.clf, 'threshold_') and self.clf.threshold_ is not None) assert_true(hasattr(self.clf, '_mu') and self.clf._mu is not None) assert_true(hasattr(self.clf, '_sigma') and self.clf._sigma is not None) assert_true(hasattr(self.clf, 'n_neighbors_') and self.clf.n_neighbors_ is not None) def test_train_scores(self): assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) def test_prediction_scores(self): pred_scores = self.clf.decision_function(self.X_test) # check score shapes assert_equal(pred_scores.shape[0], self.X_test.shape[0]) # check performance assert_greater(roc_auc_score(self.y_test, pred_scores), self.roc_floor) def test_prediction_labels(self): pred_labels = self.clf.predict(self.X_test) assert_equal(pred_labels.shape, self.y_test.shape) def test_prediction_proba(self): pred_proba = self.clf.predict_proba(self.X_test) assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) def test_prediction_proba_linear(self): pred_proba = self.clf.predict_proba(self.X_test, method='linear') assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) def test_prediction_proba_unify(self): pred_proba = self.clf.predict_proba(self.X_test, method='unify') assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) def test_prediction_proba_parameter(self): with assert_raises(ValueError): self.clf.predict_proba(self.X_test, method='something') def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) def test_fit_predict_score(self): self.clf.fit_predict_score(self.X_test, self.y_test) self.clf.fit_predict_score(self.X_test, self.y_test, scoring='roc_auc_score') self.clf.fit_predict_score(self.X_test, self.y_test, scoring='prc_n_score') with assert_raises(NotImplementedError): self.clf.fit_predict_score(self.X_test, self.y_test, scoring='something') def test_predict_rank(self): pred_socres = self.clf.decision_function(self.X_test) pred_ranks = self.clf._predict_rank(self.X_test) print(pred_ranks) print(pred_ranks) # assert the order is reserved assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2) assert_array_less(pred_ranks, self.X_train.shape[0] + 1) assert_array_less(-0.1, pred_ranks) def test_predict_rank_normalized(self): pred_socres = self.clf.decision_function(self.X_test) pred_ranks = self.clf._predict_rank(self.X_test, normalized=True) # assert the order is reserved assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2) assert_array_less(pred_ranks, 1.01) assert_array_less(-0.1, pred_ranks) def tearDown(self): pass
class TestLOF(unittest.TestCase): def setUp(self): self.n_train = 200 self.n_test = 100 self.contamination = 0.1 self.roc_floor = 0.8 self.X_train, self.y_train, self.X_test, self.y_test = generate_data( n_train=self.n_train, n_test=self.n_test, contamination=self.contamination, random_state=42) self.clf = LOF(contamination=self.contamination) self.clf.fit(self.X_train) def test_parameters(self): assert (hasattr(self.clf, 'decision_scores_') and self.clf.decision_scores_ is not None) assert (hasattr(self.clf, 'labels_') and self.clf.labels_ is not None) assert (hasattr(self.clf, 'threshold_') and self.clf.threshold_ is not None) assert (hasattr(self.clf, '_mu') and self.clf._mu is not None) assert (hasattr(self.clf, '_sigma') and self.clf._sigma is not None) assert (hasattr(self.clf, 'n_neighbors_') and self.clf.n_neighbors_ is not None) def test_train_scores(self): assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) def test_prediction_scores(self): pred_scores = self.clf.decision_function(self.X_test) # check score shapes assert_equal(pred_scores.shape[0], self.X_test.shape[0]) # check performance assert (roc_auc_score(self.y_test, pred_scores) >= self.roc_floor) def test_prediction_labels(self): pred_labels = self.clf.predict(self.X_test) assert_equal(pred_labels.shape, self.y_test.shape) def test_prediction_proba(self): pred_proba = self.clf.predict_proba(self.X_test) assert (pred_proba.min() >= 0) assert (pred_proba.max() <= 1) def test_prediction_proba_linear(self): pred_proba = self.clf.predict_proba(self.X_test, method='linear') assert (pred_proba.min() >= 0) assert (pred_proba.max() <= 1) def test_prediction_proba_unify(self): pred_proba = self.clf.predict_proba(self.X_test, method='unify') assert (pred_proba.min() >= 0) assert (pred_proba.max() <= 1) def test_prediction_proba_parameter(self): with assert_raises(ValueError): self.clf.predict_proba(self.X_test, method='something') def test_prediction_labels_confidence(self): pred_labels, confidence = self.clf.predict(self.X_test, return_confidence=True) assert_equal(pred_labels.shape, self.y_test.shape) assert_equal(confidence.shape, self.y_test.shape) assert (confidence.min() >= 0) assert (confidence.max() <= 1) def test_prediction_proba_linear_confidence(self): pred_proba, confidence = self.clf.predict_proba(self.X_test, method='linear', return_confidence=True) assert (pred_proba.min() >= 0) assert (pred_proba.max() <= 1) assert_equal(confidence.shape, self.y_test.shape) assert (confidence.min() >= 0) assert (confidence.max() <= 1) def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) def test_fit_predict_score(self): self.clf.fit_predict_score(self.X_test, self.y_test) self.clf.fit_predict_score(self.X_test, self.y_test, scoring='roc_auc_score') self.clf.fit_predict_score(self.X_test, self.y_test, scoring='prc_n_score') with assert_raises(NotImplementedError): self.clf.fit_predict_score(self.X_test, self.y_test, scoring='something') def test_predict_rank(self): pred_socres = self.clf.decision_function(self.X_test) pred_ranks = self.clf._predict_rank(self.X_test) # assert the order is reserved assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2) assert_array_less(pred_ranks, self.X_train.shape[0] + 1) assert_array_less(-0.1, pred_ranks) def test_predict_rank_normalized(self): pred_socres = self.clf.decision_function(self.X_test) pred_ranks = self.clf._predict_rank(self.X_test, normalized=True) # assert the order is reserved assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2) assert_array_less(pred_ranks, 1.01) assert_array_less(-0.1, pred_ranks) def test_model_clone(self): clone_clf = clone(self.clf) def tearDown(self): pass
train_test_split(X, y, test_size=0.4, random_state=42) contamination = y.sum() / len(y) base_estimators = get_estimators_small(contamination) model = SUOD(base_estimators=base_estimators, n_jobs=6, bps_flag=True, contamination=contamination, approx_flag_global=True) model.fit(X_train) # fit all models with X model.approximate(X_train) # conduct model approximation if it is enabled predicted_labels = model.predict(X_test) # predict labels predicted_scores = model.decision_function(X_test) # predict scores predicted_probs = model.predict_proba(X_test) # predict scores ########################################################################### # compared with other approaches evaluate_print('majority vote', y_test, majority_vote(predicted_labels)) evaluate_print('average', y_test, average(predicted_scores)) evaluate_print('maximization', y_test, maximization(predicted_scores)) clf = LOF() clf.fit(X_train) evaluate_print('LOF', y_test, clf.decision_function(X_test)) clf = IForest() clf.fit(X_train) evaluate_print('IForest', y_test, clf.decision_function(X_test))
class SolverAECIFAR(): def __init__(self, data_name, hidden_dim=256, seed=0, learning_rate=3e-4, normal_class=0, anomaly_ratio=0.1, batch_size=128, concentrated=0, training_ratio=0.8, SN=1, Trim=1, L=1.5, max_epochs=100): np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) use_cuda = torch.cuda.is_available() self.device = torch.device("cuda" if use_cuda else "cpu") self.L = L if concentrated == 1.0: full_data_name = 'CIFAR10_Concentrated' elif concentrated == 0.0: full_data_name = 'CIFAR10' self.result_path = "./results/{}_{}_{}/0.0/LOF/{}/".format( full_data_name, normal_class, anomaly_ratio, seed ) data_path = "./data/" + data_name + ".npy" self.learning_rate = learning_rate self.SN = SN self.Trim = Trim # self.dataset = RealGraphDataset(data_path, missing_ratio=0, radius=2) self.dataset = CIFARVGGDataset(data_path, normal_class=normal_class, anomaly_ratio=anomaly_ratio, concentrated=concentrated) self.seed = seed self.hidden_dim = hidden_dim self.max_epochs = max_epochs self.data_path = data_path self.data_anomaly_ratio = self.dataset.__anomalyratio__() self.batch_size = batch_size self.input_dim = self.dataset.__dim__() self.data_normaly_ratio = 1 - self.data_anomaly_ratio n_sample = self.dataset.__len__() self.n_train = int(n_sample * training_ratio) self.n_test = n_sample - self.n_train print('|data dimension: {}|data noise ratio:{}'.format(self.dataset.__dim__(), self.data_anomaly_ratio)) self.training_data, self.testing_data = data.random_split(dataset=self.dataset, lengths=[ self.n_train, self.n_test ]) self.ae = None self.discriminator = None self.model=None def train(self): self.model = LOF() self.model.fit(self.training_data.dataset.x) def test(self): y_test_scores = self.model.decision_function(self.testing_data.dataset.x) auc = roc_auc_score(self.testing_data.dataset.y, y_test_scores) from sklearn.metrics import precision_recall_fscore_support as prf, accuracy_score print("AUC:{:0.4f}".format( auc)) os.makedirs(self.result_path, exist_ok=True) np.save( self.result_path + "result.npy", { "accuracy": auc, "precision": auc, "recall": auc, "f1": auc, "auc": auc, }, ) # for consistency print("result save to {}".format(self.result_path))
class TestLOF(unittest.TestCase): def setUp(self): self.n_train = 100 self.n_test = 50 self.contamination = 0.1 self.roc_floor = 0.6 self.X_train, self.y_train, self.X_test, self.y_test = generate_data( n_train=self.n_train, n_test=self.n_test, contamination=self.contamination) self.clf = LOF(contamination=self.contamination) self.clf.fit(self.X_train) def test_sklearn_estimator(self): check_estimator(self.clf) def test_parameters(self): if not hasattr(self.clf, 'decision_scores_') or self.clf.decision_scores_ is None: self.assertRaises(AttributeError, 'decision_scores_ is not set') if not hasattr(self.clf, 'labels_') or self.clf.labels_ is None: self.assertRaises(AttributeError, 'labels_ is not set') if not hasattr(self.clf, 'threshold_') or self.clf.threshold_ is None: self.assertRaises(AttributeError, 'threshold_ is not set') if not hasattr(self.clf, 'negative_outlier_factor_') or self.clf.negative_outlier_factor_ is None: self.assertRaises(AttributeError, 'negative_outlier_factor_ is not set') if not hasattr(self.clf, 'n_neighbors') or self.clf.n_neighbors_ is None: self.assertRaises(AttributeError, 'n_neighbors is not set') def test_train_scores(self): assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) def test_prediction_scores(self): pred_scores = self.clf.decision_function(self.X_test) # check score shapes assert_equal(pred_scores.shape[0], self.X_test.shape[0]) # check performance assert_greater(roc_auc_score(self.y_test, pred_scores), self.roc_floor) def test_prediction_labels(self): pred_labels = self.clf.predict(self.X_test) assert_equal(pred_labels.shape, self.y_test.shape) def test_prediction_proba(self): pred_proba = self.clf.predict_proba(self.X_test) assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) def test_prediction_proba_linear(self): pred_proba = self.clf.predict_proba(self.X_test, method='linear') assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) def test_prediction_proba_unify(self): pred_proba = self.clf.predict_proba(self.X_test, method='unify') assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) def test_prediction_proba_parameter(self): with assert_raises(ValueError): self.clf.predict_proba(self.X_test, method='something') def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) def test_evaluate(self): self.clf.fit_predict_evaluate(self.X_test, self.y_test) def tearDown(self): pass
dataset, subdataset, "all", ) x_train = data_dict["train"] x_test = data_dict["test"] x_test_labels = data_dict["test_labels"] # data preprocessing for MSCRED start = time.time() od = LOF(n_neighbors=n_neighbors, leaf_size=leaf_size, p=p) od.fit(x_train) # get outlier scores anomaly_score = od.decision_function(x_test) anomaly_label = x_test_labels end = time.time() time = end - start # Make evaluation evaluate_all(anomaly_score, anomaly_label) salience = compute_salience(anomaly_score, anomaly_label) print('time') print(' ', time) print('salience') print(' ', salience)
# Generate sample data X_train, y_train, X_test, y_test = \ generate_data(n_train=n_train, n_test=n_test, n_features=2, contamination=contamination, random_state=42) # train LOF detector clf_name = 'LOF' clf = LOF() clf.fit(X_train) # get the prediction labels and outlier scores of the training data y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers) y_train_scores = clf.decision_scores_ # raw outlier scores # get the prediction on the test data y_test_pred = clf.predict(X_test) # outlier labels (0 or 1) y_test_scores = clf.decision_function(X_test) # outlier scores # evaluate and print the results print("\nOn Training Data:") evaluate_print(clf_name, y_train, y_train_scores) print("\nOn Test Data:") evaluate_print(clf_name, y_test, y_test_scores) # visualize the results visualize(clf_name, X_train, y_train, X_test, y_test, y_train_pred, y_test_pred, show_figure=True, save_figure=False)
class TestLOF(unittest.TestCase): def setUp(self): self.n_train = 100 self.n_test = 50 self.contamination = 0.1 self.roc_floor = 0.6 self.X_train, self.y_train, self.X_test, self.y_test = generate_data( n_train=self.n_train, n_test=self.n_test, contamination=self.contamination, random_state=42) self.clf = LOF(contamination=self.contamination) self.clf.fit(self.X_train) def test_sklearn_estimator(self): check_estimator(self.clf) def test_parameters(self): if not hasattr( self.clf, 'decision_scores_') or self.clf.decision_scores_ is None: self.assertRaises(AttributeError, 'decision_scores_ is not set') if not hasattr(self.clf, 'labels_') or self.clf.labels_ is None: self.assertRaises(AttributeError, 'labels_ is not set') if not hasattr(self.clf, 'threshold_') or self.clf.threshold_ is None: self.assertRaises(AttributeError, 'threshold_ is not set') if not hasattr(self.clf, 'negative_outlier_factor_' ) or self.clf.negative_outlier_factor_ is None: self.assertRaises(AttributeError, 'negative_outlier_factor_ is not set') if not hasattr(self.clf, 'n_neighbors') or self.clf.n_neighbors_ is None: self.assertRaises(AttributeError, 'n_neighbors is not set') def test_train_scores(self): assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) def test_prediction_scores(self): pred_scores = self.clf.decision_function(self.X_test) # check score shapes assert_equal(pred_scores.shape[0], self.X_test.shape[0]) # check performance assert_greater(roc_auc_score(self.y_test, pred_scores), self.roc_floor) def test_prediction_labels(self): pred_labels = self.clf.predict(self.X_test) assert_equal(pred_labels.shape, self.y_test.shape) def test_prediction_proba(self): pred_proba = self.clf.predict_proba(self.X_test) assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) def test_prediction_proba_linear(self): pred_proba = self.clf.predict_proba(self.X_test, method='linear') assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) def test_prediction_proba_unify(self): pred_proba = self.clf.predict_proba(self.X_test, method='unify') assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) def test_prediction_proba_parameter(self): with assert_raises(ValueError): self.clf.predict_proba(self.X_test, method='something') def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) def test_fit_predict_score(self): self.clf.fit_predict_score(self.X_test, self.y_test) self.clf.fit_predict_score(self.X_test, self.y_test, scoring='roc_auc_score') self.clf.fit_predict_score(self.X_test, self.y_test, scoring='prc_n_score') with assert_raises(NotImplementedError): self.clf.fit_predict_score(self.X_test, self.y_test, scoring='something') def test_predict_rank(self): pred_socres = self.clf.decision_function(self.X_test) pred_ranks = self.clf._predict_rank(self.X_test) # assert the order is reserved assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2) assert_array_less(pred_ranks, self.X_train.shape[0] + 1) assert_array_less(-0.1, pred_ranks) def test_predict_rank_normalized(self): pred_socres = self.clf.decision_function(self.X_test) pred_ranks = self.clf._predict_rank(self.X_test, normalized=True) # assert the order is reserved assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2) assert_array_less(pred_ranks, 1.01) assert_array_less(-0.1, pred_ranks) def tearDown(self): pass
clf_cblof = CBLOF() # clf_lscp = LSCP([clf_knn, clf_pca, clf_mcd ]) # clf_ae = AutoEncoder(epochs=50) clf_mcd.fit(encodings_train) clf_pca.fit(encodings_train) clf_knn.fit(encodings_train) clf_lof.fit(encodings_train) clf_cblof.fit(encodings_train) # clf_lscp.fit(encodings_train) # clf_ae.fit(encodings_train) anomaly_scores_mcd = clf_mcd.decision_function(encodings_train) anomaly_scores_pca = clf_pca.decision_function(encodings_train) anomaly_scores_knn = clf_knn.decision_function(encodings_train) anomaly_scores_lof = clf_lof.decision_function(encodings_train) anomaly_scores_cblof = clf_cblof.decision_function(encodings_train) # anomaly_scores_lscp = clf_lscp.decision_function(encodings_train) # anomaly_scores_ae = clf_ae.predict_proba(encodings_train) # y_test_scores = [] # for x,_ in test_loader: # encodings_test = encoder(torch.Tensor(x).to(device)) # probs = clf.predict_proba(encodings_test.detach().cpu().numpy()) # y_test_scores.extend(probs[:,0]) # y_test_scores = np.array(y_test_scores) y_ind_1 = np.argwhere(y_window.reshape(-1, ) == 1) y_ind_3 = np.argwhere(y_window.reshape(-1, ) == 3) for i, anomaly_scores in enumerate([