class ensemble(abstract_occ_model): """ """ def __init__(self, nu=0.1): self.base_estimators = [ #OCSVM(contamination=nu), KNN(n_neighbors=100, contamination=nu), KNN(n_neighbors=25, contamination=nu), KNN(n_neighbors=5, contamination=nu), IForest(contamination=nu) ] self.model = SUOD(base_estimators=self.base_estimators, rp_flag_global=False, bps_flag=True, approx_flag_global=False) self.scores = None def fit(self, X): self.model.fit(X) self.model.approximate(X) def predict(self, X): self.scores = self.compute_score(X) return np.where(self.scores >= 0.5, 1, np.where(self.scores < 0.5, -1, self.scores)) def score_samples(self, X): if type(self.scores) != np.ndarray: self.scores = self.compute_score(X) return self.scores else: return self.scores def compute_score(self, X): mean_prob = np.mean(self.model.predict_proba(X), axis=1) return mean_prob
train_test_split(X, y, test_size=0.4, random_state=42) contamination = y.sum() / len(y) base_estimators = get_estimators_small(contamination) model = SUOD(base_estimators=base_estimators, n_jobs=6, bps_flag=True, contamination=contamination, approx_flag_global=True) model.fit(X_train) # fit all models with X model.approximate(X_train) # conduct model approximation if it is enabled predicted_labels = model.predict(X_test) # predict labels predicted_scores = model.decision_function(X_test) # predict scores predicted_probs = model.predict_proba(X_test) # predict scores ########################################################################### # compared with other approaches evaluate_print('majority vote', y_test, majority_vote(predicted_labels)) evaluate_print('average', y_test, average(predicted_scores)) evaluate_print('maximization', y_test, maximization(predicted_scores)) clf = LOF() clf.fit(X_train) evaluate_print('LOF', y_test, clf.decision_function(X_test)) clf = IForest() clf.fit(X_train) evaluate_print('IForest', y_test, clf.decision_function(X_test))
class TestBASE(unittest.TestCase): def setUp(self): self.n_train = 1000 self.n_test = 500 self.contamination = 0.1 self.roc_floor = 0.6 self.random_state = 42 self.X_train, self.y_train, self.X_test, self.y_test = generate_data( n_train=self.n_train, n_test=self.n_test, contamination=self.contamination, random_state=self.random_state) self.base_estimators = [ LOF(n_neighbors=5, contamination=self.contamination), LOF(n_neighbors=15, contamination=self.contamination), LOF(n_neighbors=25, contamination=self.contamination), LOF(n_neighbors=35, contamination=self.contamination), LOF(n_neighbors=45, contamination=self.contamination), HBOS(contamination=self.contamination), PCA(contamination=self.contamination), LSCP(detector_list=[ LOF(n_neighbors=5, contamination=self.contamination), LOF(n_neighbors=15, contamination=self.contamination) ], random_state=self.random_state) ] this_directory = os.path.abspath(os.path.dirname(__file__)) self.cost_forecast_loc_fit_ = os.path.join(this_directory, 'bps_train.joblib') self.cost_forecast_loc_pred_ = os.path.join(this_directory, 'bps_prediction.joblib') self.model = SUOD(base_estimators=self.base_estimators, n_jobs=2, rp_flag_global=True, bps_flag=True, contamination=self.contamination, approx_flag_global=True, cost_forecast_loc_fit=self.cost_forecast_loc_fit_, cost_forecast_loc_pred=self.cost_forecast_loc_pred_) def test_initialization(self): self.model.get_params() self.model.set_params(**{'n_jobs': 4}) def test_fit(self): """ Test base class initialization :return: """ self.model.fit(self.X_train) def test_approximate(self): self.model.fit(self.X_train) self.model.approximate(self.X_train) def test_predict(self): self.model.fit(self.X_train) self.model.approximate(self.X_train) self.model.predict(self.X_test) def test_decision_function(self): self.model.fit(self.X_train) self.model.approximate(self.X_train) self.model.decision_function(self.X_test) def test_predict_proba(self): self.model.fit(self.X_train) self.model.approximate(self.X_train) self.model.predict_proba(self.X_test)