def detect_outliers(lst): """detect outliers in a list of numpy arrays Parameters ---------- lst : List [description] Returns ------- inliers : List A list of the inliers """ clf = OCSVM(verbose=0) clf.fit(lst) inlier_idx = [] outlier_idx = [] for index, data in enumerate(lst): y = clf.predict(data.reshape(1, -1)) if y: # y==1 for outliers logger.debug('Found outlier: {0}'.format(index)) outlier_idx.append(index) else: inlier_idx.append(index) logger.info('{:.0%} are outliers'.format(len(outlier_idx) / len(lst))) return inlier_idx, outlier_idx
def train_model(X, Y, contamination, name, from_scratch=True): model_dir = './model' if not os.path.exists(model_dir): os.mkdir(model_dir) file_name = name + '.pkl' if from_scratch: if name == 'ocsvm': model = OCSVM(contamination=contamination) model.fit(X) elif name == 'iforest': model = IForest(contamination=contamination) model.fit(X) elif name == 'lof': model = LOF(contamination=contamination) model.fit(X) elif name == 'knn': model = KNN(contamination=contamination) model.fit(X) elif name == 'xgbod': model = XGBOD(contamination=contamination) model.fit(X, Y) save(model, model_dir, file_name) else: model = load(model_dir, file_name) return model
def getOutlierOCSVM(dataset): ''' @brief Function that executes OCSVM algorithm on the dataset and obtains the labels of the dataset indicating which instance is an inlier (0) or outlier (1) @param dataset Dataset on which to try the algorithm @return It returns a list of labels 0 means inlier, 1 means outlier ''' # Initializating the model ocsvm = OCSVM() # Fits the data and obtains labels ocsvm.fit(dataset) # Return labels return ocsvm.labels_
def print_accuracy(train_arr,test_arr,trader_id): if len(train_arr)==0 or len(test_arr)==0: return for i in range(len(train_arr)): l1=len(train_arr[i]) l2=len(test_arr[i]) if l1==0 or l2==0: continue train_data=np.array([train_arr[i]]).T test_data=np.array([test_arr[i]]).T clf=OCSVM() clf.fit(train_data) y_pred=clf.predict(train_data) print("TRAINING ACCURACY for TRADER",trader_id,":",100 - (sum(y_pred)*100/l1)) y_pred=clf.predict(test_data) print("TESTING ACCURACY: ",sum(y_pred)*100/l2)
class TestOCSVM(unittest.TestCase): def setUp(self): self.n_train = 200 self.n_test = 100 self.contamination = 0.1 self.roc_floor = 0.8 self.X_train, self.y_train, self.X_test, self.y_test = generate_data( n_train=self.n_train, n_test=self.n_test, contamination=self.contamination, random_state=42) self.clf = OCSVM() self.clf.fit(self.X_train) def test_parameters(self): assert (hasattr(self.clf, 'decision_scores_') and self.clf.decision_scores_ is not None) assert (hasattr(self.clf, 'labels_') and self.clf.labels_ is not None) assert (hasattr(self.clf, 'threshold_') and self.clf.threshold_ is not None) assert (hasattr(self.clf, '_mu') and self.clf._mu is not None) assert (hasattr(self.clf, '_sigma') and self.clf._sigma is not None) assert (hasattr(self.clf, 'support_') and self.clf.support_ is not None) assert (hasattr(self.clf, 'support_vectors_') and self.clf.support_vectors_ is not None) assert (hasattr(self.clf, 'dual_coef_') and self.clf.dual_coef_ is not None) assert (hasattr(self.clf, 'intercept_') and self.clf.intercept_ is not None) # only available for linear kernel # if not hasattr(self.clf, 'coef_') or self.clf.coef_ is None: # self.assertRaises(AttributeError, 'coef_ is not set') def test_train_scores(self): assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) def test_prediction_scores(self): pred_scores = self.clf.decision_function(self.X_test) # check score shapes assert_equal(pred_scores.shape[0], self.X_test.shape[0]) # check performance assert_greater(roc_auc_score(self.y_test, pred_scores), self.roc_floor) def test_prediction_labels(self): pred_labels = self.clf.predict(self.X_test) assert_equal(pred_labels.shape, self.y_test.shape) def test_prediction_proba_linear(self): pred_proba = self.clf.predict_proba(self.X_test, method='linear') assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) def test_prediction_proba_unify(self): pred_proba = self.clf.predict_proba(self.X_test, method='unify') assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) def test_prediction_proba_parameter(self): with assert_raises(ValueError): self.clf.predict_proba(self.X_test, method='something') def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) def test_fit_predict_score(self): self.clf.fit_predict_score(self.X_test, self.y_test) self.clf.fit_predict_score(self.X_test, self.y_test, scoring='roc_auc_score') self.clf.fit_predict_score(self.X_test, self.y_test, scoring='prc_n_score') with assert_raises(NotImplementedError): self.clf.fit_predict_score(self.X_test, self.y_test, scoring='something') def test_predict_rank(self): pred_socres = self.clf.decision_function(self.X_test) pred_ranks = self.clf._predict_rank(self.X_test) # assert the order is reserved assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3.5) assert_array_less(pred_ranks, self.X_train.shape[0] + 1) assert_array_less(-0.1, pred_ranks) def test_predict_rank_normalized(self): pred_socres = self.clf.decision_function(self.X_test) pred_ranks = self.clf._predict_rank(self.X_test, normalized=True) # assert the order is reserved assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3.5) assert_array_less(pred_ranks, 1.01) assert_array_less(-0.1, pred_ranks) def tearDown(self): pass
contamination = 0.1 # percentage of outliers n_train = 200 # number of training points n_test = 100 # number of testing points # Generate sample data X_train, X_test, y_train, y_test = \ generate_data(n_train=n_train, n_test=n_test, n_features=2, contamination=contamination, random_state=42) # train one_class_svm detector clf_name = 'OneClassSVM' clf = OCSVM() clf.fit(X_train) # get the prediction labels and outlier scores of the training data y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers) y_train_scores = clf.decision_scores_ # raw outlier scores # get the prediction on the test data y_test_pred = clf.predict(X_test) # outlier labels (0 or 1) y_test_scores = clf.decision_function(X_test) # outlier scores # evaluate and print the results print("\nOn Training Data:") evaluate_print(clf_name, y_train, y_train_scores) print("\nOn Test Data:") evaluate_print(clf_name, y_test, y_test_scores)
pred_test_set.append(1) # print(len(train_set),len(test_set)) import numpy as np train_set=np.array(train_set) test_set=np.array(test_set) from pyod.models.ocsvm import OCSVM from pyod.models.pca import PCA # from pyod.models.mcd import MCD clf1=PCA(standardization = True,contamination=0.2) # clf1 = MCD(assume_centered = True) clf2=OCSVM(kernel = 'poly',nu = 0.25,degree =2,contamination =0.2) # clf2 = OCSVM(kernel = 'linear',nu =0.02) clf1.fit(train_set) clf2.fit(train_set) y_pred_train_pca=clf1.predict(train_set) y_pred_test_pca=clf1.predict(test_set) y_pred_train_ocsvm=clf2.predict(train_set) y_pred_test_ocsvm=clf2.predict(test_set) print(clf1.explained_variance_) # print(y_pred_test_pca,y_pred_test_ocsvm) train_pca_correct=0 train_ocsvm_correct=0 print("TRAIN SET") for i in range(len(pred_train_set)): # print("Actual:",pred_train_set[i],"PCA",y_pred_train_pca[i],"OCSVM",y_pred_train_ocsvm[i]) if pred_train_set[i]==y_pred_train_pca[i] and pred_train_set[i]==1: train_pca_correct+=1
random_state=42) # load pretrained models prepare_trained_model() # recommended models selected_models = select_model(X_train, n_selection=100) print("Showing the top recommended models...") for i, model in enumerate(selected_models): print(i, model) print() model_1 = LODA(n_bins=5, n_random_cuts=100) print( "1st model Average Precision", average_precision_score(y_train, model_1.fit(X_train).decision_scores_)) model_10 = LODA(n_bins=5, n_random_cuts=20) print( "10th model Average Precision", average_precision_score(y_train, model_10.fit(X_train).decision_scores_)) model_50 = OCSVM(kernel='sigmoid', nu=0.6) print( "50th model Average Precision", average_precision_score(y_train, model_50.fit(X_train).decision_scores_))
class SolverVAECIFAR(): def __init__(self, data_name, hidden_dim=256, seed=0, learning_rate=3e-4, normal_class=0, anomaly_ratio=0.1, batch_size=128, concentrated=0, training_ratio=0.8, SN=1, Trim=1, L=1.5, max_epochs=100): np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) use_cuda = torch.cuda.is_available() self.device = torch.device("cuda" if use_cuda else "cpu") self.L = L if concentrated == 1.0: full_data_name = 'CIFAR10_Concentrated' elif concentrated == 0.0: full_data_name = 'CIFAR10' self.result_path = "./results/{}_{}/0.0/OCSVM/{}/".format( full_data_name, normal_class, seed) data_path = "./data/" + data_name + ".npy" self.learning_rate = learning_rate self.SN = SN self.Trim = Trim # self.dataset = RealGraphDataset(data_path, missing_ratio=0, radius=2) self.dataset = CIFARVGGDataset(data_path, normal_class=normal_class, anomaly_ratio=anomaly_ratio, concentrated=concentrated) self.seed = seed self.hidden_dim = hidden_dim self.max_epochs = max_epochs self.data_path = data_path self.data_anomaly_ratio = self.dataset.__anomalyratio__() self.batch_size = batch_size self.input_dim = self.dataset.__dim__() self.data_normaly_ratio = 1 - self.data_anomaly_ratio n_sample = self.dataset.__len__() self.n_train = int(n_sample * training_ratio) self.n_test = n_sample - self.n_train print('|data dimension: {}|data noise ratio:{}'.format( self.dataset.__dim__(), self.data_anomaly_ratio)) self.training_data, self.testing_data = data.random_split( dataset=self.dataset, lengths=[self.n_train, self.n_test]) self.ae = None self.discriminator = None self.model = None def train(self): self.model = OCSVM() self.model.fit(self.training_data.dataset.x) def test(self): y_test_scores = self.model.decision_function( self.testing_data.dataset.x) auc = roc_auc_score(self.testing_data.dataset.y, y_test_scores) from sklearn.metrics import precision_recall_fscore_support as prf, accuracy_score print("AUC:{:0.4f}".format(auc)) os.makedirs(self.result_path, exist_ok=True) np.save( self.result_path + "result.npy", { "accuracy": auc, "precision": auc, "recall": auc, "f1": auc, "auc": auc, }, ) # for consistency print("result save to {}".format(self.result_path))
class TestOCSVM(unittest.TestCase): def setUp(self): self.n_train = 100 self.n_test = 50 self.contamination = 0.1 self.roc_floor = 0.6 self.X_train, self.y_train, self.X_test, self.y_test = generate_data( n_train=self.n_train, n_test=self.n_test, contamination=self.contamination, random_state=42) self.clf = OCSVM() self.clf.fit(self.X_train) def test_sklearn_estimator(self): check_estimator(self.clf) def test_parameters(self): assert_true(hasattr(self.clf, 'decision_scores_') and self.clf.decision_scores_ is not None) assert_true(hasattr(self.clf, 'labels_') and self.clf.labels_ is not None) assert_true(hasattr(self.clf, 'threshold_') and self.clf.threshold_ is not None) assert_true(hasattr(self.clf, '_mu') and self.clf._mu is not None) assert_true(hasattr(self.clf, '_sigma') and self.clf._sigma is not None) assert_true(hasattr(self.clf, 'support_') and self.clf.support_ is not None) assert_true(hasattr(self.clf, 'support_vectors_') and self.clf.support_vectors_ is not None) assert_true(hasattr(self.clf, 'dual_coef_') and self.clf.dual_coef_ is not None) assert_true(hasattr(self.clf, 'intercept_') and self.clf.intercept_ is not None) # only available for linear kernel # if not hasattr(self.clf, 'coef_') or self.clf.coef_ is None: # self.assertRaises(AttributeError, 'coef_ is not set') def test_train_scores(self): assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) def test_prediction_scores(self): pred_scores = self.clf.decision_function(self.X_test) # check score shapes assert_equal(pred_scores.shape[0], self.X_test.shape[0]) # check performance assert_greater(roc_auc_score(self.y_test, pred_scores), self.roc_floor) def test_prediction_labels(self): pred_labels = self.clf.predict(self.X_test) assert_equal(pred_labels.shape, self.y_test.shape) def test_prediction_proba_linear(self): pred_proba = self.clf.predict_proba(self.X_test, method='linear') assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) def test_prediction_proba_unify(self): pred_proba = self.clf.predict_proba(self.X_test, method='unify') assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) def test_prediction_proba_parameter(self): with assert_raises(ValueError): self.clf.predict_proba(self.X_test, method='something') def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) def test_fit_predict_score(self): self.clf.fit_predict_score(self.X_test, self.y_test) self.clf.fit_predict_score(self.X_test, self.y_test, scoring='roc_auc_score') self.clf.fit_predict_score(self.X_test, self.y_test, scoring='prc_n_score') with assert_raises(NotImplementedError): self.clf.fit_predict_score(self.X_test, self.y_test, scoring='something') def test_predict_rank(self): pred_socres = self.clf.decision_function(self.X_test) pred_ranks = self.clf._predict_rank(self.X_test) # assert the order is reserved assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2) assert_array_less(pred_ranks, self.X_train.shape[0] + 1) assert_array_less(-0.1, pred_ranks) def test_predict_rank_normalized(self): pred_socres = self.clf.decision_function(self.X_test) pred_ranks = self.clf._predict_rank(self.X_test, normalized=True) # assert the order is reserved assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2) assert_array_less(pred_ranks, 1.01) assert_array_less(-0.1, pred_ranks) def tearDown(self): pass
class TestOCSVM(unittest.TestCase): def setUp(self): self.n_train = 100 self.n_test = 50 self.contamination = 0.1 self.roc_floor = 0.6 self.X_train, self.y_train, self.X_test, self.y_test = generate_data( n_train=self.n_train, n_test=self.n_test, contamination=self.contamination) self.clf = OCSVM() self.clf.fit(self.X_train) def test_sklearn_estimator(self): check_estimator(self.clf) def test_parameters(self): if not hasattr(self.clf, 'decision_scores_') or self.clf.decision_scores_ is None: self.assertRaises(AttributeError, 'decision_scores_ is not set') if not hasattr(self.clf, 'labels_') or self.clf.labels_ is None: self.assertRaises(AttributeError, 'labels_ is not set') if not hasattr(self.clf, 'threshold_') or self.clf.threshold_ is None: self.assertRaises(AttributeError, 'threshold_ is not set') if not hasattr(self.clf, 'support_') or self.clf.support_ is None: self.assertRaises(AttributeError, 'support_ is not set') if not hasattr(self.clf, 'support_vectors_') or self.clf.support_vectors_ is None: self.assertRaises(AttributeError, 'support_vectors_ is not set') if not hasattr(self.clf, 'dual_coef_') or self.clf.dual_coef_ is None: self.assertRaises(AttributeError, 'dual_coef_ is not set') # only available for linear kernel # if not hasattr(self.clf, 'coef_') or self.clf.coef_ is None: # self.assertRaises(AttributeError, 'coef_ is not set') if not hasattr(self.clf, 'intercept_') or self.clf.intercept_ is None: self.assertRaises(AttributeError, 'intercept_ is not set') def test_train_scores(self): assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) def test_prediction_scores(self): pred_scores = self.clf.decision_function(self.X_test) # check score shapes assert_equal(pred_scores.shape[0], self.X_test.shape[0]) # check performance assert_greater(roc_auc_score(self.y_test, pred_scores), self.roc_floor) def test_prediction_labels(self): pred_labels = self.clf.predict(self.X_test) assert_equal(pred_labels.shape, self.y_test.shape) def test_prediction_proba_linear(self): pred_proba = self.clf.predict_proba(self.X_test, method='linear') assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) def test_prediction_proba_unify(self): pred_proba = self.clf.predict_proba(self.X_test, method='unify') assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) def test_prediction_proba_parameter(self): with assert_raises(ValueError): self.clf.predict_proba(self.X_test, method='something') def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) def test_evaluate(self): self.clf.fit_predict_evaluate(self.X_test, self.y_test) def tearDown(self): pass
# clf12.fit(np.array([data_vol,data_price]).T) # print("TRADER",i,true1,count1,true1/count1,true2,count2,true2/count2,true3,count3,true3/count3) data_vol = [] data_price = [] data_vol_price = [] mal_t_stamps1 = [] mal_t_stamps2 = [] mal_t_stamps12 = [] clf1 = OCSVM() clf2 = OCSVM() clf12 = OCSVM() for j in d_transaction[i]: data_vol.append(j[2]) data_price.append(j[1]) data_vol_price.append([j[2], j[1]]) clf1.fit(np.array([data_vol]).T) clf2.fit(np.array([data_price]).T) clf12.fit(np.array(data_vol_price)) for j in d_transaction[i]: p1 = clf1.predict(np.array(j[2]).reshape(1, -1)) p2 = clf2.predict(np.array(j[2]).reshape(1, -1)) p3 = clf12.predict(np.array([j[2], j[1]]).T.reshape(1, -1)) if p1 == 1: mal_t_stamps1.append(j[0]) if p2 == 1: mal_t_stamps2.append(j[0]) if p3 == 1: mal_t_stamps12.append(j[0]) s = set(d_attack[i]) print("TRADER", i, "VOL", len(s & set(mal_t_stamps1)), "OUT OF", len(mal_t_stamps1), "PRICE", len(s & set(mal_t_stamps2)), "OUT OF",
def run_all_models(all_array, labels, pca, data_set_name): picture_name = all_array.get("# img", 1) all_array = all_array.drop("# img", 1) # standardizing data for processing all_array = standardizer(all_array) y = labels.get("in").to_numpy() x_train, x_test, y_train, y_test, picture_train, picture_test = train_test_split(all_array, y, picture_name, test_size=0.4) if pca: transformer = IncrementalPCA() all_array = transformer.fit_transform(all_array) print("OCSVM") now = time() clf = OCSVM() clf.fit(x_train) test_scores = clf.decision_function(x_test) temp = print_score(picture_test, test_scores, y_test) train_scores = clf.decision_function(x_train) scores_train = print_score(picture_train, train_scores, y_train) output_table.append(("OCSVM", all_array.shape, temp, data_set_name, time() - now, scores_train)) print("Auto-encoder") now = time() clf = AutoEncoder(epochs=30) clf.fit(x_train) test_scores = clf.decision_function(x_test) temp = print_score(picture_test, test_scores, y_test) train_scores = clf.decision_function(x_train) scores_train = print_score(picture_train, train_scores, y_train) output_table.append(("Auto-encoder", all_array.shape, temp, data_set_name, time() - now, scores_train)) print("HBOS") now = time() clf = HBOS() clf.fit(x_train) test_scores = clf.decision_function(x_test) temp = print_score(picture_test, test_scores, y_test) train_scores = clf.decision_function(x_train) scores_train = print_score(picture_train, train_scores, y_train) output_table.append(("HBOS", all_array.shape, temp, data_set_name, time() - now, scores_train)) print("SO_GAAL") now = time() clf = SO_GAAL() clf.fit(x_train) test_scores = clf.decision_function(x_test) temp = print_score(picture_test, test_scores, y_test) train_scores = clf.decision_function(x_train) scores_train = print_score(picture_train, train_scores, y_train) output_table.append(("SO_GAAL", all_array.shape, temp, data_set_name, time() - now, scores_train)) print("MO_GAAL") now = time() clf = MO_GAAL() clf.fit(x_train) test_scores = clf.decision_function(x_test) temp = print_score(picture_test, test_scores, y_test) train_scores = clf.decision_function(x_train) scores_train = print_score(picture_train, train_scores, y_train) output_table.append(("MO_GAAL", all_array.shape, temp, data_set_name, time() - now, scores_train)) print("MCD") now = time() clf = MCD() clf.fit(x_train) test_scores = clf.decision_function(x_test) temp = print_score(picture_test, test_scores, y_test) train_scores = clf.decision_function(x_train) scores_train = print_score(picture_train, train_scores, y_train) output_table.append(("MCD", all_array.shape, temp, data_set_name, time() - now, scores_train)) print("SOS") now = time() clf = SOS() clf.fit(x_train) test_scores = clf.decision_function(x_test) temp = print_score(picture_test, test_scores, y_test) train_scores = clf.decision_function(x_train) scores_train = print_score(picture_train, train_scores, y_train) output_table.append(("SOS", all_array.shape, temp, data_set_name, time() - now, scores_train)) print("IForest") now = time() clf = IForest() clf.fit(x_train) test_scores = clf.decision_function(x_test) temp = print_score(picture_test, test_scores, y_test) train_scores = clf.decision_function(x_train) scores_train = print_score(picture_train, train_scores, y_train) output_table.append(("IFrorest", all_array.shape, temp, data_set_name, time() - now, scores_train)) print("KNN") now = time() clf = KNN() clf.fit(x_train) test_scores = clf.decision_function(x_test) temp = print_score(picture_test, test_scores, y_test) train_scores = clf.decision_function(x_train) scores_train = print_score(picture_train, train_scores, y_train) output_table.append(("KNN", all_array.shape, temp, data_set_name, time() - now, scores_train)) print("PCA") now = time() clf = PCA() clf.fit(x_train) test_scores = clf.decision_function(x_test) temp = print_score(picture_test, test_scores, y_test) train_scores = clf.decision_function(x_train) scores_train = print_score(picture_train, train_scores, y_train) output_table.append(("PCA", all_array.shape, temp, data_set_name, time() - now, scores_train))
def main(): parser = argparse.ArgumentParser(description='baseline') register_data_args(parser) parser.add_argument("--mode", type=str, default='A', choices=['A', 'AX', 'X'], help="dropout probability") parser.add_argument("--seed", type=int, default=-1, help="random seed, -1 means dont fix seed") parser.add_argument( "--emb-method", type=str, default='DeepWalk', help="embedding methods: DeepWalk, Node2Vec, LINE, SDNE, Struc2Vec") parser.add_argument("--ad-method", type=str, default='OCSVM', help="embedding methods: PCA,OCSVM,IF,AE") args = parser.parse_args() if args.seed != -1: np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) logging.basicConfig( filename="./log/baseline.log", filemode="a", format="%(asctime)s-%(name)s-%(levelname)s-%(message)s", level=logging.INFO) logger = logging.getLogger('baseline') datadict = emb_dataloader(args) if args.mode == 'X': data = datadict['features'] #print('X shape',data.shape) else: t0 = time.time() embeddings = embedding(args, datadict) dur1 = time.time() - t0 if args.mode == 'A': data = embeddings #print('A shape',data.shape) if args.mode == 'AX': data = np.concatenate((embeddings, datadict['features']), axis=1) #print('AX shape',data.shape) logger.debug(f'data shape: {data.shape}') if args.ad_method == 'OCSVM': clf = OCSVM(contamination=0.1) if args.ad_method == 'IF': clf = IForest(n_estimators=100, contamination=0.1, n_jobs=-1, behaviour="new") if args.ad_method == 'PCA': clf = PCA(contamination=0.1) if args.ad_method == 'AE': clf = AutoEncoder(contamination=0.1) t1 = time.time() clf.fit(data[datadict['train_mask']]) dur2 = time.time() - t1 print('traininig time:', dur1 + dur2) logger.info('\n') logger.info('\n') logger.info( f'Parameters dataset:{args.dataset} datamode:{args.mode} ad-method:{args.ad_method} emb-method:{args.emb_method}' ) logger.info('-------------Evaluating Validation Results--------------') t2 = time.time() y_pred_val = clf.predict(data[datadict['val_mask']]) y_score_val = clf.decision_function(data[datadict['val_mask']]) auc, ap, f1, acc, precision, recall = baseline_evaluate(datadict, y_pred_val, y_score_val, val=True) dur3 = time.time() - t2 print('infer time:', dur3) logger.info(f'AUC:{round(auc,4)},AP:{round(ap,4)}') logger.info( f'f1:{round(f1,4)},acc:{round(acc,4)},pre:{round(precision,4)},recall:{round(recall,4)}' ) logger.info('-------------Evaluating Test Results--------------') y_pred_test = clf.predict(data[datadict['test_mask']]) y_score_test = clf.decision_function(data[datadict['test_mask']]) auc, ap, f1, acc, precision, recall = baseline_evaluate(datadict, y_pred_test, y_score_test, val=False) logger.info(f'AUC:{round(auc,4)},AP:{round(ap,4)}') logger.info( f'f1:{round(f1,4)},acc:{round(acc,4)},pre:{round(precision,4)},recall:{round(recall,4)}' )