def test_moa_static_n_buckets(self): with assert_raises(ValueError): moa(self.scores, 5, method='static', bootstrap_estimators=False, random_state=42)
def test_moa_dynamic_repeat(self): score = moa(self.scores, 3, method='dynamic', bootstrap_estimators=True, random_state=42) assert_equal(score.shape, (4, ))
def test_moa_dynamic_repeat(self): score = moa(self.scores, 3, method='dynamic', replace=True, random_state=42) assert_equal(score.shape, (4, ))
def define_combination_methods(normalizer_name, k, norm_results): combination_methods = { normalizer_name + ' Average_' + str(k): average(norm_results), normalizer_name + ' Maximization_' + str(k): maximization(norm_results), normalizer_name + ' Aom_' + str(k): aom(norm_results, int(k / 2)), normalizer_name + ' Moa_' + str(k): moa(norm_results, int(k / 2)) } return combination_methods
def majority_get(): try: array = np.array(request.json["Data"]) weights = [1 for i in array] n_buckets = 1 if "n_buckets" in request.json: n_buckets = np.array([request.json['Weights']]) result = moa(np.transpose(array), n_buckets=n_buckets) return jsonify({"data": result.tolist(), "message": "OK"}) except Exception as e: return jsonify({"message": str(e)}), 400
def _combine(self, scores): """ Wrapping for PyOD the ensembler. Args: scores: np.float array of shape (num_anomaly_detectors, ) List of scores from multiple anomaly detectors. Returns: float: Resulting anomaly score. """ return moa( scores, n_buckets=self.n_buckets, method=self.method, bootstrap_estimators=self.bootstrap_estimators)
def test_moa_static_norepeat(self): score = moa(self.scores, 3, method='static', bootstrap_estimators=False, random_state=42) assert_equal(score.shape, (4,)) shuffled_list = shuffle(list(range(0, 6, 1)), random_state=42) manual_scores = np.zeros([4, 3]) manual_scores[:, 0] = np.mean(self.scores[:, shuffled_list[0:2]], axis=1) manual_scores[:, 1] = np.mean(self.scores[:, shuffled_list[2:4]], axis=1) manual_scores[:, 2] = np.mean(self.scores[:, shuffled_list[4:6]], axis=1) manual_score = np.max(manual_scores, axis=1) assert_array_equal(score, manual_score)
def test_moa_static_norepeat(self): score = moa(self.scores, 3, method='static', bootstrap_estimators=False, random_state=42) assert_equal(score.shape, (4,)) shuffled_list = shuffle(list(range(0, 6, 1)), random_state=42) manual_scores = np.zeros([4, 3]) manual_scores[:, 0] = np.mean(self.scores[:, shuffled_list[0:2]], axis=1) manual_scores[:, 1] = np.mean(self.scores[:, shuffled_list[2:4]], axis=1) manual_scores[:, 2] = np.mean(self.scores[:, shuffled_list[4:6]], axis=1) manual_score = np.max(manual_scores, axis=1) assert_array_equal(score, manual_score)
roc_max.append(roc_auc_score(y_test, comb_by_max)) prn_max.append(precision_n_scores(y_test, comb_by_max)) print('ite', t + 1, 'comb by max,', 'ROC:', roc_auc_score(y_test, comb_by_max), 'precision@n:', precision_n_scores(y_test, comb_by_max)) # combination by aom comb_by_aom = aom(test_scores_norm, 5, 20) roc_aom.append(roc_auc_score(y_test, comb_by_aom)) prn_aom.append(precision_n_scores(y_test, comb_by_aom)) print('ite', t + 1, 'comb by aom,', 'ROC:', roc_auc_score(y_test, comb_by_aom), 'precision@n:', precision_n_scores(y_test, comb_by_aom)) # combination by moa comb_by_moa = moa(test_scores_norm, 5, 20) roc_moa.append(roc_auc_score(y_test, comb_by_moa)) prn_moa.append(precision_n_scores(y_test, comb_by_moa)) print('ite', t + 1, 'comb by moa,', 'ROC:', roc_auc_score(y_test, comb_by_moa), 'precision@n:', precision_n_scores(y_test, comb_by_moa)) print() ########################################################################## print('summary of {ite} iterations'.format(ite=ite)) print('comb by mean, ROC: {roc}, precision@n: {prn}'.format( roc=np.mean(roc_mean), prn=np.mean(prn_mean))) print('comb by max, ROC: {roc}, precision@n: {prn}'.format( roc=np.mean(roc_max), prn=np.mean(prn_max))) print('comb by aom, ROC: {roc}, precision@n: {prn}'.format(
train_scores = np.zeros([X_train.shape[0], n_clf]) test_scores = np.zeros([X_test.shape[0], n_clf]) for i in range(n_clf): k = k_list[i] clf = KNN(n_neighbors=k, method='largest') clf.fit(X_train_norm) train_scores[:, i] = clf.decision_scores_ test_scores[:, i] = clf.decision_function(X_test_norm) # decision scores have to be normalized before combination train_scores_norm, test_scores_norm = standardizer(train_scores, test_scores) # combination by average y_by_average = average(test_scores_norm) evaluate_print('Combination by Average', y_test, y_by_average) # combination by max y_by_maximization = maximization(test_scores_norm) evaluate_print('Combination by Maximization', y_test, y_by_maximization) # combination by aom y_by_aom = aom(test_scores_norm, n_buckets=5) evaluate_print('Combination by AOM', y_test, y_by_aom) # combination by moa y_by_moa = moa(test_scores_norm, n_buckets=5) evaluate_print('Combination by MOA', y_test, y_by_moa)
prn_df.to_csv('results/final_prn.csv', index=False) # Export results for comparison between the algorithms and the proposed technique roc_comparison_df = roc_df[['Angle-based Outlier Detector (ABOD)', 'Cluster-based Local Outlier Factor', 'Feature Bagging', 'Histogram-base Outlier Detection (HBOS)', 'Isolation Forest', 'K Nearest Neighbors (KNN)', 'Local Outlier Factor (LOF)', 'Minimum Covariance Determinant (MCD)','One-class SVM (OCSVM)', 'Principal Component Analysis (PCA)', 'Lasso Moa_6']] prn_comparison_df = prn_df[['Angle-based Outlier Detector (ABOD)', 'Cluster-based Local Outlier Factor', 'Feature Bagging', 'Histogram-base Outlier Detection (HBOS)', 'Isolation Forest', 'K Nearest Neighbors (KNN)', 'Local Outlier Factor (LOF)', 'Minimum Covariance Determinant (MCD)','One-class SVM (OCSVM)', 'Principal Component Analysis (PCA)', 'Lasso Moa_6']] roc_comparison_df = roc_comparison_df.copy() prn_comparison_df = prn_comparison_df.copy() roc_comparison_df['Moa'] = moa(roc_comparison_df.iloc[:, :10], 5) prn_comparison_df['Moa'] = moa(prn_comparison_df.iloc[:, :10], 5) roc_comparison_df.to_csv('results/roc_comparison.csv', index=False) prn_comparison_df.to_csv('results/prn_comparison.csv', index=False) # Export results for comparison between different choices of feature selection roc_fs_cols = [col for col in roc_df.columns if 'Moa' in col] prn_fs_cols = [col for col in prn_df.columns if 'Moa' in col] roc_comparison_2_df = roc_df[roc_fs_cols] prn_comparison_2_df = roc_df[prn_fs_cols] roc_comparison_2_df.to_csv('results/roc_comparison_2.csv', index=False) prn_comparison_2_df.to_csv('results/prn_comparison_2.csv', index=False)
print('Combining {n_clf} kNN detectors'.format(n_clf=n_clf)) for i in range(n_clf): k = k_list[i] clf = KNN(n_neighbors=k, method='largest') clf.fit(X_train_norm) train_scores[:, i] = clf.decision_scores_ test_scores[:, i] = clf.decision_function(X_test_norm) # Decision scores have to be normalized before combination train_scores_norm, test_scores_norm = standardizer(train_scores, test_scores) # Combination by average y_by_average = average(test_scores_norm) evaluate_print('Combination by Average', y_test, y_by_average) # Combination by max y_by_maximization = maximization(test_scores_norm) evaluate_print('Combination by Maximization', y_test, y_by_maximization) # Combination by aom y_by_aom = aom(test_scores_norm, n_buckets=5) evaluate_print('Combination by AOM', y_test, y_by_aom) # Combination by moa y_by_moa = moa(test_scores_norm, n_buckets=5) evaluate_print('Combination by MOA', y_test, y_by_moa)
def test_moa_dynamic_repeat(self): score = moa(self.scores, 3, method='dynamic', bootstrap_estimators=True, random_state=42) assert_equal(score.shape, (4,))
def test_moa_static_n_buckets(self): with assert_raises(ValueError): moa(self.scores, 5, method='static', bootstrap_estimators=False, random_state=42)