def train():
    dataset = get_data(1000, 10, 100)
    contamination = 0.01
    with mlflow.start_run():
        base_estimators = [
            LOF(n_neighbors=5, contamination=contamination),
            LOF(n_neighbors=15, contamination=contamination),
            LOF(n_neighbors=25, contamination=contamination),
            PCA(contamination=contamination),
            KNN(n_neighbors=5, contamination=contamination),
            KNN(n_neighbors=15, contamination=contamination),
            KNN(n_neighbors=25, contamination=contamination)]
        model = SUOD(base_estimators=base_estimators, n_jobs=6,  
                    rp_flag_global=True,  
                    bps_flag=True,  
                    approx_flag_global=False, 
                    contamination=contamination)
        model.fit(dataset)  
        model.approximate(dataset)  
        predicted_labels = model.predict(dataset)
        voted_labels = vote(predicted_labels)
        true_labels = [0]*1000 + [1]*10
        auc_score = roc_auc_score(voted_labels, true_labels)
        print("The resulted area under the ROC curve score is {}".format(auc_score))
        mlflow.log_metric("auc_score", auc_score)
        mlflow.sklearn.log_model(model, "anomaly_model", conda_env="conda.yaml")
Example #2
0
                 bps_flag=True,
                 contamination=contamination,
                 approx_flag_global=True)

    start = time.time()
    model.fit(X_train)  # fit all models with X
    print('Fit time:', time.time() - start)
    print()

    start = time.time()
    model.approximate(X_train)  # conduct model approximation if it is enabled
    print('Approximation time:', time.time() - start)
    print()

    start = time.time()
    predicted_labels = model.predict(X_test)  # predict labels
    print('Predict time:', time.time() - start)
    print()

    start = time.time()
    predicted_scores = model.decision_function(X_test)  # predict scores
    print('Decision Function time:', time.time() - start)
    print()

    ##########################################################################
    # compare with no projection, no bps, and no approximation
    print("******************************************************************")
    start = time.time()
    n_estimators = len(base_estimators)
    n_estimators_list, starts, n_jobs = _partition_estimators(
        n_estimators, n_jobs)
Example #3
0
        X = np.append(arr=X, values=features_pca, axis=0)
        X_num = X.shape[0]
        base_estimators = [LOF(), IForest(), OCSVM(kernel="rbf", gamma=0.001)]

        model = SUOD(
            base_estimators=base_estimators,
            n_jobs=2,  # number of workers(if -1 it use full core)
            rp_flag_global=True,  # global flag for random projection
            bps_flag=True,  # global flag for balanced parallel scheduling
            approx_flag_global=False,  # global flag for model approximation
            contamination=0.2)

        # X_train, X_test = train_test_split(X, test_size=0, random_state=123)\
        model.fit(X)
        model.approximate(X)
        predicted_labels = model.predict(X)

        sum_labels = np.sum(predicted_labels, axis=1) / 3
        sum_labels = np.where(sum_labels >= 0.5, -1,
                              1)  # -1 abnormal, 1 normal
        result_label = np.average(sum_labels)
        result_label = result_label.tolist()

        # Add outliers
        fig = plt.figure()
        colors = np.array(['r', 'b'])
        ax = fig.add_subplot(111, projection='3d')
        ax.scatter(X[:, 0],
                   X[:, 1],
                   X[:, 2],
                   color=colors[(sum_labels + 1) // 2])
Example #4
0
class TestBASE(unittest.TestCase):
    def setUp(self):
        self.n_train = 1000
        self.n_test = 500
        self.contamination = 0.1
        self.roc_floor = 0.6
        self.random_state = 42
        self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
            n_train=self.n_train, n_test=self.n_test,
            contamination=self.contamination, random_state=self.random_state)

        self.base_estimators = [
            LOF(n_neighbors=5, contamination=self.contamination),
            LOF(n_neighbors=15, contamination=self.contamination),
            LOF(n_neighbors=25, contamination=self.contamination),
            LOF(n_neighbors=35, contamination=self.contamination),
            LOF(n_neighbors=45, contamination=self.contamination),
            HBOS(contamination=self.contamination),
            PCA(contamination=self.contamination),
            LSCP(detector_list=[
                LOF(n_neighbors=5, contamination=self.contamination),
                LOF(n_neighbors=15, contamination=self.contamination)],
                random_state=self.random_state)
        ]

        this_directory = os.path.abspath(os.path.dirname(__file__))

        self.cost_forecast_loc_fit_ = os.path.join(this_directory,
                                                   'bps_train.joblib')

        self.cost_forecast_loc_pred_ = os.path.join(this_directory,
                                                    'bps_prediction.joblib')

        self.model = SUOD(base_estimators=self.base_estimators, n_jobs=2,
                          rp_flag_global=True, bps_flag=True,
                          contamination=self.contamination,
                          approx_flag_global=True,
                          cost_forecast_loc_fit=self.cost_forecast_loc_fit_,
                          cost_forecast_loc_pred=self.cost_forecast_loc_pred_)

    def test_initialization(self):
        self.model.get_params()
        self.model.set_params(**{'n_jobs': 4})

    def test_fit(self):
        """
        Test base class initialization

        :return:
        """
        self.model.fit(self.X_train)

    def test_approximate(self):
        self.model.fit(self.X_train)
        self.model.approximate(self.X_train)

    def test_predict(self):
        self.model.fit(self.X_train)
        self.model.approximate(self.X_train)
        self.model.predict(self.X_test)

    def test_decision_function(self):
        self.model.fit(self.X_train)
        self.model.approximate(self.X_train)
        self.model.decision_function(self.X_test)
Example #5
0
        KNN(n_neighbors=25, contamination=contamination),
        KNN(n_neighbors=35, contamination=contamination),
        KNN(n_neighbors=45, contamination=contamination),
        IForest(n_estimators=50, contamination=contamination),
        IForest(n_estimators=100, contamination=contamination),
        LSCP(detector_list=[LOF(contamination=contamination),
                            LOF(contamination=contamination)])
    ]
    
    
    model = SUOD(base_estimators=base_estimators, n_jobs=6, bps_flag=True, 
                 contamination=contamination, approx_flag_global=False)

    model.fit(X)  # fit all models with X
    model.approximate(X)  # conduct model approximation if it is enabled
    predicted_labels = model.predict(X)  # predict labels on X; for demo purpose only
    predicted_scores = model.decision_function(X)  # predict scores on X; for demo purpose only

    # %%
    evaluate_print('majority vote', y, majority_vote(predicted_labels))
    evaluate_print('average', y, average(predicted_scores))
    evaluate_print('maximization', y, maximization(predicted_scores))

    clf = LOF()
    clf.fit(X)
    evaluate_print('LOF', y, clf.decision_scores_)

    clf = IForest()
    clf.fit(X)
    evaluate_print('IForest', y, clf.decision_scores_)