Example #1
0
class ensemble(abstract_occ_model):
    """
    
    """
    def __init__(self, nu=0.1):
        self.base_estimators = [
            #OCSVM(contamination=nu),
            KNN(n_neighbors=100, contamination=nu),
            KNN(n_neighbors=25, contamination=nu),
            KNN(n_neighbors=5, contamination=nu),
            IForest(contamination=nu)
        ]
        self.model = SUOD(base_estimators=self.base_estimators,
                          rp_flag_global=False,
                          bps_flag=True,
                          approx_flag_global=False)
        self.scores = None

    def fit(self, X):
        self.model.fit(X)
        self.model.approximate(X)

    def predict(self, X):
        self.scores = self.compute_score(X)
        return np.where(self.scores >= 0.5, 1,
                        np.where(self.scores < 0.5, -1, self.scores))

    def score_samples(self, X):
        if type(self.scores) != np.ndarray:
            self.scores = self.compute_score(X)
            return self.scores
        else:
            return self.scores

    def compute_score(self, X):
        mean_prob = np.mean(self.model.predict_proba(X), axis=1)
        return mean_prob
Example #2
0
        train_test_split(X, y, test_size=0.4, random_state=42)

    contamination = y.sum() / len(y)
    base_estimators = get_estimators_small(contamination)

    model = SUOD(base_estimators=base_estimators,
                 n_jobs=6,
                 bps_flag=True,
                 contamination=contamination,
                 approx_flag_global=True)

    model.fit(X_train)  # fit all models with X
    model.approximate(X_train)  # conduct model approximation if it is enabled
    predicted_labels = model.predict(X_test)  # predict labels
    predicted_scores = model.decision_function(X_test)  # predict scores
    predicted_probs = model.predict_proba(X_test)  # predict scores

    ###########################################################################
    # compared with other approaches
    evaluate_print('majority vote', y_test, majority_vote(predicted_labels))
    evaluate_print('average', y_test, average(predicted_scores))
    evaluate_print('maximization', y_test, maximization(predicted_scores))

    clf = LOF()
    clf.fit(X_train)
    evaluate_print('LOF', y_test, clf.decision_function(X_test))

    clf = IForest()
    clf.fit(X_train)
    evaluate_print('IForest', y_test, clf.decision_function(X_test))
Example #3
0
class TestBASE(unittest.TestCase):
    def setUp(self):
        self.n_train = 1000
        self.n_test = 500
        self.contamination = 0.1
        self.roc_floor = 0.6
        self.random_state = 42
        self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
            n_train=self.n_train,
            n_test=self.n_test,
            contamination=self.contamination,
            random_state=self.random_state)

        self.base_estimators = [
            LOF(n_neighbors=5, contamination=self.contamination),
            LOF(n_neighbors=15, contamination=self.contamination),
            LOF(n_neighbors=25, contamination=self.contamination),
            LOF(n_neighbors=35, contamination=self.contamination),
            LOF(n_neighbors=45, contamination=self.contamination),
            HBOS(contamination=self.contamination),
            PCA(contamination=self.contamination),
            LSCP(detector_list=[
                LOF(n_neighbors=5, contamination=self.contamination),
                LOF(n_neighbors=15, contamination=self.contamination)
            ],
                 random_state=self.random_state)
        ]

        this_directory = os.path.abspath(os.path.dirname(__file__))

        self.cost_forecast_loc_fit_ = os.path.join(this_directory,
                                                   'bps_train.joblib')

        self.cost_forecast_loc_pred_ = os.path.join(this_directory,
                                                    'bps_prediction.joblib')

        self.model = SUOD(base_estimators=self.base_estimators,
                          n_jobs=2,
                          rp_flag_global=True,
                          bps_flag=True,
                          contamination=self.contamination,
                          approx_flag_global=True,
                          cost_forecast_loc_fit=self.cost_forecast_loc_fit_,
                          cost_forecast_loc_pred=self.cost_forecast_loc_pred_)

    def test_initialization(self):
        self.model.get_params()
        self.model.set_params(**{'n_jobs': 4})

    def test_fit(self):
        """
        Test base class initialization

        :return:
        """
        self.model.fit(self.X_train)

    def test_approximate(self):
        self.model.fit(self.X_train)
        self.model.approximate(self.X_train)

    def test_predict(self):
        self.model.fit(self.X_train)
        self.model.approximate(self.X_train)
        self.model.predict(self.X_test)

    def test_decision_function(self):
        self.model.fit(self.X_train)
        self.model.approximate(self.X_train)
        self.model.decision_function(self.X_test)

    def test_predict_proba(self):
        self.model.fit(self.X_train)
        self.model.approximate(self.X_train)
        self.model.predict_proba(self.X_test)