def fit(self, X, contamination=0.01): """ Fit detector Args: X: pd.DataFrame """ self.detectors = { "auto_encoder": AutoEncoder( epochs=256, validation_size=0, preprocessing=False, verbose=0, contamination=contamination, ), } # print("train_data.shape:", X.shape) # 数据预处理 # 标准化 X_train_norm, self.data_norm_scalar = standardizer(X, keep_scalar=True) # 归一化 X_train_unif, self.data_unif_scalar = minmaxizer(X_train_norm, keep_scalar=True) train_scores = np.zeros([X.shape[0], len(self.detectors)]) thresholds = np.zeros([1, len(self.detectors)]) # 训练 for i, clf_name in enumerate(self.detectors): clf = self.detectors[clf_name] clf.fit(X_train_unif) train_scores[:, i] = clf.decision_scores_ thresholds[:, i] = clf.threshold_ # 训练集异常程度及阈值 train_scores_norm, self.score_scalar = standardizer(train_scores, keep_scalar=True) thresholds_norm = self.score_scalar.transform(thresholds) self.decision_scores = pd.DataFrame(average(train_scores_norm), index=X.index) self.decision_scores.columns = ["score"] self.threshold = average(thresholds_norm)[0] self.label = self.get_label(self.decision_scores)
def _combine(self, scores): """Wrapping for PyOD the ensembler. Args: scores (np.float array of shape (num_anomaly_detectors, )): List of scores from multiple anomaly detectors. Returns: float: Resulting anomaly score. """ return average(scores, estimator_weights=self.estimator_weights)
def define_combination_methods(normalizer_name, k, norm_results): combination_methods = { normalizer_name + ' Average_' + str(k): average(norm_results), normalizer_name + ' Maximization_' + str(k): maximization(norm_results), normalizer_name + ' Aom_' + str(k): aom(norm_results, int(k / 2)), normalizer_name + ' Moa_' + str(k): moa(norm_results, int(k / 2)) } return combination_methods
def majority_get(): try: data = request.json['Data'] array = np.array(data['values']) weights = np.array([1 for i in array]) if "weights" in data: weights = np.array([data['weights']]) result = average(np.transpose(array), weights) return jsonify({"data": result.tolist(), "message": "OK"}) except Exception as e: return jsonify({"message": str(e)}), 400
def decision_function(self, X): """ Predict raw anomaly score of X using the fitted detector. Args: X: pd.DataFrame Return: anomaly_scores: pd.DataFrame """ # 数据预处理 X_test_norm = self.data_norm_scalar.transform(X) X_test_unif = self.data_unif_scalar.transform(X_test_norm) test_scores = np.zeros([X_test_unif.shape[0], len(self.detectors)]) for i, clf_name in enumerate(self.detectors): test_scores[:, i] = self.detectors[clf_name].\ decision_function(X_test_unif) test_scores_norm = self.score_scalar.transform(test_scores) anomaly_scores = pd.DataFrame(average(test_scores_norm), index=X.index) anomaly_scores.columns = ["score"] return anomaly_scores
train_scores = np.zeros([X_train.shape[0], n_clf]) test_scores = np.zeros([X_test.shape[0], n_clf]) for i in range(n_clf): k = k_list[i] clf = KNN(n_neighbors=k, method='largest') clf.fit(X_train_norm) train_scores[:, i] = clf.decision_scores_ test_scores[:, i] = clf.decision_function(X_test_norm) # decision scores have to be normalized before combination train_scores_norm, test_scores_norm = standardizer(train_scores, test_scores) # combination by average y_by_average = average(test_scores_norm) evaluate_print('Combination by Average', y_test, y_by_average) # combination by max y_by_maximization = maximization(test_scores_norm) evaluate_print('Combination by Maximization', y_test, y_by_maximization) # combination by aom y_by_aom = aom(test_scores_norm, n_buckets=5) evaluate_print('Combination by AOM', y_test, y_by_aom) # combination by moa y_by_moa = moa(test_scores_norm, n_buckets=5) evaluate_print('Combination by MOA', y_test, y_by_moa)
def test_weighted_average(self): score = average(self.scores, self.weights) assert_allclose(score, np.array([1.75, 3.75, 5.75]))
def test_average(self): score = average(self.scores) assert_allclose(score, np.array([1.5, 3.5, 5.5]))
print('Combining {n_clf} kNN detectors'.format(n_clf=n_clf)) for i in range(n_clf): k = k_list[i] clf = KNN(n_neighbors=k, method='largest') clf.fit(X_train_norm) train_scores[:, i] = clf.decision_scores_ test_scores[:, i] = clf.decision_function(X_test_norm) # Decision scores have to be normalized before combination train_scores_norm, test_scores_norm = standardizer(train_scores, test_scores) # Combination by average y_by_average = average(test_scores_norm) evaluate_print('Combination by Average', y_test, y_by_average) # Combination by max y_by_maximization = maximization(test_scores_norm) evaluate_print('Combination by Maximization', y_test, y_by_maximization) # Combination by aom y_by_aom = aom(test_scores_norm, n_buckets=5) evaluate_print('Combination by AOM', y_test, y_by_aom) # Combination by moa y_by_moa = moa(test_scores_norm, n_buckets=5) evaluate_print('Combination by MOA', y_test, y_by_moa)
def test_weighted_average(self): score = average(self.scores, self.weights) assert_allclose(score, np.array([1.75, 3.75, 5.75]))
def test_average(self): score = average(self.scores) assert_allclose(score, np.array([1.5, 3.5, 5.5]))