def setUp(self): self.X_train, self.y_train, _, self.X_test, self.y_test, _ = generate_data( n_train=100, n_test=50, contamination=0.05)
# temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to import sys and sys.path.append("..") import sys sys.path.append("..") from pyod.models.knn import Knn from pyod.models.combination import aom, moa from pyod.utils.load_data import generate_data from pyod.utils.utility import precision_n_scores from pyod.utils.utility import standardizer if __name__ == "__main__": n_clf = 20 # number of base detectors ite = 10 # number of iterations X, y, _ = generate_data(contamination=0.05, train_only=True) # load data # lists for storing roc information roc_mean = [] roc_max = [] roc_aom = [] roc_moa = [] prn_mean = [] prn_max = [] prn_aom = [] prn_moa = [] for t in range(ite): X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)
# temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to import sys and sys.path.append("..") import sys sys.path.append("..") from pyod.models.knn import Knn from pyod.utils.load_data import generate_data from pyod.utils.utility import precision_n_scores if __name__ == "__main__": contamination = 0.1 # percentage of outliers n_train = 1000 # number of training points n_test = 500 # number of testing points X_train, y_train, c_train, X_test, y_test, c_test = generate_data( n_train=n_train, n_test=n_test, contamination=contamination) # train a k-NN detector (default parameters, k=10) clf = Knn() clf.fit(X_train) # get the prediction label and scores on the training data y_train_pred = clf.y_pred y_train_score = clf.decision_scores # get the prediction on the test data y_test_pred = clf.predict(X_test) # outlier label (0 or 1) y_test_score = clf.decision_function(X_test) # outlier scores print('Train ROC:{roc}, precision@n:{prn}'.format( roc=roc_auc_score(y_train, y_train_score),