コード例 #1
0
    def test_hbos(self):
        clf = HBOS(contamination=0.05)
        clf.fit(self.X_train)
        assert_equal(len(clf.decision_scores), self.X_train.shape[0])

        pred_scores = clf.decision_function(self.X_test)
        assert_equal(pred_scores.shape[0], self.X_test.shape[0])
        assert_equal(clf.predict(self.X_test).shape[0],
                     self.X_test.shape[0])
        assert_greater(roc_auc_score(self.y_test, pred_scores), 0.5)
コード例 #2
0
ファイル: spike_detection.py プロジェクト: shaggy63/pltrading
def detect_anomaly(df):
	df = df.fillna(0)
	clf =HBOS()
	x_values = df.index.values.reshape(df.index.values.shape[0],1)
	y_values = df.total_traded_quote_asset_volume.values.reshape(df.total_traded_quote_asset_volume.values.shape[0],1)
	clf.fit(y_values)
	clf.predict(y_values)
	df["label_qav"] = clf.predict(y_values)
	df["score_qav"] = clf.decision_function(y_values)#.round(6)
	df['change_qav'] = df.total_traded_quote_asset_volume.pct_change(periods=1)*100
	df['change_price'] = df.last_price.pct_change(periods=1)*100
	return df
コード例 #3
0
def train():
    """ Train the predictor on the data collected """
    start_time = time.time()
    device_uuid, date_time = request.args.get('deviceUUID'), request.args.get(
        'datetime')
    data_filename = get_data_filename(device_uuid, date_time)

    with open(data_filename, 'r') as f:
        rows = f.readlines()
    with open(config.awake_filename, 'rb') as f:
        awake_features = pickle.load(f)
    if len(rows) < config.min_train_data_size:
        return jsonify({
            "status": 1,
            "message": "Not enough training data! %d" % len(rows)
        })
    raw = np.zeros((len(rows), 3))
    for i in range(len(rows)):
        raw[i] = [int(val) for val in rows[i].strip().split(',')]
    norm = features.normalize(raw)
    temp_features = features.extract_multi_features(norm,
                                                    step=config.step_size,
                                                    x_len=config.sample_size)
    baseline_features = features.get_baseline_features(temp_features)
    norm_features = features.get_calibrated_features(temp_features,
                                                     baseline_features)
    X = np.concatenate((awake_features, norm_features), axis=0)
    X[:, 1] = np.abs(np.random.normal(0, 0.01, len(X)))
    app.logger.info(
        'Training classifier using %d feature sets, each containing %d features'
        % (X.shape[0], X.shape[1]))
    clf = HBOS(contamination=0.05)
    clf.fit(X)

    model_filename = get_model_filename(device_uuid, date_time)
    with open(model_filename, 'wb') as f:
        pickle.dump(clf, f)

    pred = clf.decision_function(X)
    baseline = {'features': baseline_features, 'hboss_base': np.min(pred)}

    baseline_filename = get_baseline_filename(device_uuid, date_time)
    with open(baseline_filename, 'wb') as f:
        pickle.dump(baseline, f)

    return jsonify({"status": 0, "time": (time.time() - start_time)})
コード例 #4
0
    def get_HBOS_scores(dataframe,
                        cols,
                        outliers_fraction=0.01,
                        standardize=True):
        '''Takes df, a list selected column nmaes, outliers_fraction = 0.01 default
        
        Returns:
            df with CBOLF scores added         
        '''
        if standardize:
            #standardize selected variables
            minmax = MinMaxScaler(feature_range=(0, 1))
            dataframe[cols] = minmax.fit_transform(dataframe[cols])

        #Convert dataframe to a numpy array in order to incorprate our algorithm
        arrays = []
        for row in cols:
            row = dataframe[row].values.reshape(-1, 1)
            arrays.append(row)
        X = np.concatenate((arrays), axis=1)

        #fit
        clf = HBOS(contamination=outliers_fraction)
        #clf = CBLOF(contamination=outliers_fraction,check_estimator=False, random_state=0)
        clf.fit(X)

        # predict raw anomaly score
        scores_pred = clf.decision_function(X) * -1

        # prediction of a datapoint category outlier or inlier
        y_pred = clf.predict(X)
        n_inliers = len(y_pred) - np.count_nonzero(y_pred)
        n_outliers = np.count_nonzero(y_pred == 1)

        CheckOutliers.df2 = dataframe
        CheckOutliers.df2['outlier'] = y_pred.tolist()

        print('OUTLIERS:', n_outliers, 'INLIERS:', n_inliers,
              'found with HBOS')
コード例 #5
0
ファイル: test_hbos.py プロジェクト: John-Almardeny/pyod
class TestHBOS(unittest.TestCase):
    def setUp(self):
        self.n_train = 200
        self.n_test = 100
        self.contamination = 0.1
        self.roc_floor = 0.8
        self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
            n_train=self.n_train,
            n_test=self.n_test,
            contamination=self.contamination,
            random_state=42)

        self.clf = HBOS(contamination=self.contamination)
        self.clf.fit(self.X_train)

    def test_parameters(self):
        assert (hasattr(self.clf, 'decision_scores_')
                and self.clf.decision_scores_ is not None)
        assert (hasattr(self.clf, 'labels_') and self.clf.labels_ is not None)
        assert (hasattr(self.clf, 'threshold_')
                and self.clf.threshold_ is not None)
        assert (hasattr(self.clf, '_mu') and self.clf._mu is not None)
        assert (hasattr(self.clf, '_sigma') and self.clf._sigma is not None)
        assert (hasattr(self.clf, 'hist_') and self.clf.hist_ is not None)
        assert (hasattr(self.clf, 'bin_edges_')
                and self.clf.bin_edges_ is not None)

    def test_train_scores(self):
        assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])

    def test_prediction_scores(self):
        pred_scores = self.clf.decision_function(self.X_test)

        # check score shapes
        assert_equal(pred_scores.shape[0], self.X_test.shape[0])

        # check performance
        assert (roc_auc_score(self.y_test, pred_scores) >= self.roc_floor)

    def test_prediction_labels(self):
        pred_labels = self.clf.predict(self.X_test)
        assert_equal(pred_labels.shape, self.y_test.shape)

    def test_prediction_proba(self):
        pred_proba = self.clf.predict_proba(self.X_test)
        assert (pred_proba.min() >= 0)
        assert (pred_proba.max() <= 1)

    def test_prediction_proba_linear(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='linear')
        assert (pred_proba.min() >= 0)
        assert (pred_proba.max() <= 1)

    def test_prediction_proba_unify(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='unify')
        assert (pred_proba.min() >= 0)
        assert (pred_proba.max() <= 1)

    def test_prediction_proba_parameter(self):
        with assert_raises(ValueError):
            self.clf.predict_proba(self.X_test, method='something')

    def test_fit_predict(self):
        pred_labels = self.clf.fit_predict(self.X_train)
        assert_equal(pred_labels.shape, self.y_train.shape)

    def test_fit_predict_score(self):
        self.clf.fit_predict_score(self.X_test, self.y_test)
        self.clf.fit_predict_score(self.X_test,
                                   self.y_test,
                                   scoring='roc_auc_score')
        self.clf.fit_predict_score(self.X_test,
                                   self.y_test,
                                   scoring='prc_n_score')
        with assert_raises(NotImplementedError):
            self.clf.fit_predict_score(self.X_test,
                                       self.y_test,
                                       scoring='something')

    # def test_score(self):
    #     self.clf.score(self.X_test, self.y_test)
    #     self.clf.score(self.X_test, self.y_test, scoring='roc_auc_score')
    #     self.clf.score(self.X_test, self.y_test, scoring='prc_n_score')
    #     with assert_raises(NotImplementedError):
    #         self.clf.score(self.X_test, self.y_test, scoring='something')

    def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks)

    def test_predict_rank_normalized(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, 1.01)
        assert_array_less(-0.1, pred_ranks)

    def test_model_clone(self):
        clone_clf = clone(self.clf)

    def tearDown(self):
        pass
コード例 #6
0
ファイル: hbos_example.py プロジェクト: zhuyansen/Pyod
    n_test = 500

    X_train, y_train, c_train, X_test, y_test, c_test = generate_data(
        n_train=n_train, n_test=n_test, contamination=contamination)

    # train a HBOS detector (default version)
    clf = HBOS()
    clf.fit(X_train)

    # get the prediction on the training data
    y_train_pred = clf.y_pred
    y_train_score = clf.decision_scores

    # get the prediction on the test data
    y_test_pred = clf.predict(X_test)
    y_test_score = clf.decision_function(X_test)

    print('Train ROC:{roc}, precision@n:{prn}'.format(
        roc=roc_auc_score(y_train, y_train_score),
        prn=precision_n_scores(y_train, y_train_score)))

    print('Test ROC:{roc}, precision@n:{prn}'.format(
        roc=roc_auc_score(y_test, y_test_score),
        prn=precision_n_scores(y_test, y_test_score)))

    #######################################################################
    # Visualizations
    # initialize the log directory if it does not exist
    pathlib.Path('example_figs').mkdir(parents=True, exist_ok=True)

    # plot the results
コード例 #7
0
ファイル: test_hbos.py プロジェクト: flaviassantos/pyod
class TestHBOS(unittest.TestCase):
    def setUp(self):
        self.n_train = 100
        self.n_test = 50
        self.contamination = 0.1
        self.roc_floor = 0.6
        self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
            n_train=self.n_train, n_test=self.n_test,
            contamination=self.contamination, random_state=42)

        self.clf = HBOS(contamination=self.contamination)
        self.clf.fit(self.X_train)

    def test_sklearn_estimator(self):
        check_estimator(self.clf)

    def test_parameters(self):
        assert_true(hasattr(self.clf, 'decision_scores_') and
                    self.clf.decision_scores_ is not None)
        assert_true(hasattr(self.clf, 'labels_') and
                    self.clf.labels_ is not None)
        assert_true(hasattr(self.clf, 'threshold_') and
                    self.clf.threshold_ is not None)
        assert_true(hasattr(self.clf, '_mu') and
                    self.clf._mu is not None)
        assert_true(hasattr(self.clf, '_sigma') and
                    self.clf._sigma is not None)
        assert_true(hasattr(self.clf, 'hist_') and
                    self.clf.hist_ is not None)
        assert_true(hasattr(self.clf, 'bin_edges_') and
                    self.clf.bin_edges_ is not None)

    def test_train_scores(self):
        assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])

    def test_prediction_scores(self):
        pred_scores = self.clf.decision_function(self.X_test)

        # check score shapes
        assert_equal(pred_scores.shape[0], self.X_test.shape[0])

        # check performance
        assert_greater(roc_auc_score(self.y_test, pred_scores), self.roc_floor)

    def test_prediction_labels(self):
        pred_labels = self.clf.predict(self.X_test)
        assert_equal(pred_labels.shape, self.y_test.shape)

    def test_prediction_proba(self):
        pred_proba = self.clf.predict_proba(self.X_test)
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_linear(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='linear')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_unify(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='unify')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_parameter(self):
        with assert_raises(ValueError):
            self.clf.predict_proba(self.X_test, method='something')

    def test_fit_predict(self):
        pred_labels = self.clf.fit_predict(self.X_train)
        assert_equal(pred_labels.shape, self.y_train.shape)

    def test_fit_predict_score(self):
        self.clf.fit_predict_score(self.X_test, self.y_test)
        self.clf.fit_predict_score(self.X_test, self.y_test,
                                   scoring='roc_auc_score')
        self.clf.fit_predict_score(self.X_test, self.y_test,
                                   scoring='prc_n_score')
        with assert_raises(NotImplementedError):
            self.clf.fit_predict_score(self.X_test, self.y_test,
                                       scoring='something')

    # def test_score(self):
    #     self.clf.score(self.X_test, self.y_test)
    #     self.clf.score(self.X_test, self.y_test, scoring='roc_auc_score')
    #     self.clf.score(self.X_test, self.y_test, scoring='prc_n_score')
    #     with assert_raises(NotImplementedError):
    #         self.clf.score(self.X_test, self.y_test, scoring='something')

    def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks)

    def test_predict_rank_normalized(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, 1.01)
        assert_array_less(-0.1, pred_ranks)

    def tearDown(self):
        pass
コード例 #8
0
                      n_features=2,
                      contamination=contamination,
                      random_state=42)

    # train HBOS detector
    clf_name = 'HBOS'
    clf = HBOS()
    clf.fit(X_train)

    # get the prediction labels and outlier scores of the training data
    y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)
    y_train_scores = clf.decision_scores_  # raw outlier scores

    # get the prediction on the test data
    y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)
    y_test_scores = clf.decision_function(X_test)  # outlier scores

    # evaluate and print the results
    print("\nOn Training Data:")
    evaluate_print(clf_name, y_train, y_train_scores)
    print("\nOn Test Data:")
    evaluate_print(clf_name, y_test, y_test_scores)

    # visualize the results
    visualize(clf_name,
              X_train,
              y_train,
              X_test,
              y_test,
              y_train_pred,
              y_test_pred,
コード例 #9
0
def detect_anomaly(df, type):
    clf = HBOS()  #
    if type == "forest":
        clf = IForest()

    x_values = df.index.values.reshape(df.index.values.shape[0], 1)
    y_values = df.close.values.reshape(df.close.values.shape[0], 1)
    clf.fit(y_values)
    clf.predict(y_values)
    df["label_close"] = clf.predict(y_values)
    df["score_close"] = clf.decision_function(y_values)  #.round(6)

    y_values = df.volume.values.reshape(df.volume.values.shape[0], 1)
    clf.fit(y_values)
    clf.predict(y_values)
    df["label_volume"] = clf.predict(y_values)
    df["score_volume"] = clf.decision_function(y_values)  #.round(4)

    # x_values = df.index.values.reshape(df.index.values.shape[0],1)
    # y_values = df.close.values.reshape(df.close.values.shape[0],1)
    # clf = KNN()
    # clf.fit(y_values)
    # clf.predict(y_values)
    # df["label_close_knn"] = clf.predict(y_values)
    # df["score_close_knn"] = clf.decision_function(y_values)#.round(6)

    # y_values = df.volume.values.reshape(df.volume.values.shape[0],1)
    # clf = KNN()
    # clf.fit(y_values)
    # clf.predict(y_values)
    # df["label_volume_knn"] = clf.predict(y_values)
    # df["score_volume_knn"] = clf.decision_function(y_values)#.round(4)

    # x_values = df.index.values.reshape(df.index.values.shape[0],1)
    # y_values = df.close.values.reshape(df.close.values.shape[0],1)
    # clf = PCA()
    # clf.fit(y_values)
    # clf.predict(y_values)
    # df["label_close_pca"] = clf.predict(y_values)
    # df["score_close_pca"] = clf.decision_function(y_values)#.round(6)

    # y_values = df.volume.values.reshape(df.volume.values.shape[0],1)
    # clf = PCA()
    # clf.fit(y_values)
    # clf.predict(y_values)
    # df["label_volume_pca"] = clf.predict(y_values)
    # df["score_volume_pca"] = clf.decision_function(y_values)#.round(4)

    # x_values = df.index.values.reshape(df.index.values.shape[0],1)
    # y_values = df.close.values.reshape(df.close.values.shape[0],1)
    # clf = IForest()
    # clf.fit(y_values)
    # clf.predict(y_values)
    # df["label_close_iforest"] = clf.predict(y_values)
    # df["score_close_iforest"] = clf.decision_function(y_values)#.round(6)

    # y_values = df.volume.values.reshape(df.volume.values.shape[0],1)
    # clf = IForest()
    # clf.fit(y_values)
    # clf.predict(y_values)
    # df["label_volume_iforest"] = clf.predict(y_values)
    # df["score_volume_iforest"] = clf.decision_function(y_values)#.round(4)

    return df
コード例 #10
0
ファイル: test_hbos.py プロジェクト: deltat99/Pyod
class TestHBOS(unittest.TestCase):
    def setUp(self):
        self.n_train = 100
        self.n_test = 50
        self.contamination = 0.1
        self.roc_floor = 0.6
        self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
            n_train=self.n_train,
            n_test=self.n_test,
            contamination=self.contamination)

        self.clf = HBOS(contamination=self.contamination)
        self.clf.fit(self.X_train)

    def test_sklearn_estimator(self):
        check_estimator(self.clf)

    def test_parameters(self):
        if not hasattr(
                self.clf,
                'decision_scores_') or self.clf.decision_scores_ is None:
            self.assertRaises(AttributeError, 'decision_scores_ is not set')
        if not hasattr(self.clf, 'labels_') or self.clf.labels_ is None:
            self.assertRaises(AttributeError, 'labels_ is not set')
        if not hasattr(self.clf, 'threshold_') or self.clf.threshold_ is None:
            self.assertRaises(AttributeError, 'threshold_ is not set')
        if not hasattr(self.clf, '_mu') or self.clf._mu is None:
            self.assertRaises(AttributeError, '_mu is not set')
        if not hasattr(self.clf, '_sigma') or self.clf._sigma is None:
            self.assertRaises(AttributeError, '_sigma is not set')
        if not hasattr(self.clf, 'hist_') or self.clf.hist_ is None:
            self.assertRaises(AttributeError, 'hist_ is not set')
        if not hasattr(self.clf, 'bin_edges_') or self.clf.bin_edges_ is None:
            self.assertRaises(AttributeError, 'bin_edges_ is not set')

    def test_train_scores(self):
        assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])

    def test_prediction_scores(self):
        pred_scores = self.clf.decision_function(self.X_test)

        # check score shapes
        assert_equal(pred_scores.shape[0], self.X_test.shape[0])

        # check performance
        assert_greater(roc_auc_score(self.y_test, pred_scores), self.roc_floor)

    def test_prediction_labels(self):
        pred_labels = self.clf.predict(self.X_test)
        assert_equal(pred_labels.shape, self.y_test.shape)

    def test_prediction_proba(self):
        pred_proba = self.clf.predict_proba(self.X_test)
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_linear(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='linear')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_unify(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='unify')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_parameter(self):
        with assert_raises(ValueError):
            self.clf.predict_proba(self.X_test, method='something')

    def test_fit_predict(self):
        pred_labels = self.clf.fit_predict(self.X_train)
        assert_equal(pred_labels.shape, self.y_train.shape)

    def test_evaluate(self):
        self.clf.fit_predict_evaluate(self.X_test, self.y_test)

    def tearDown(self):
        pass
コード例 #11
0
ファイル: hbos_example.py プロジェクト: flaviassantos/pyod
    # Generate sample data
    X_train, y_train, X_test, y_test = \
        generate_data(n_train=n_train,
                      n_test=n_test,
                      n_features=2,
                      contamination=contamination,
                      random_state=42)

    # train HBOS detector
    clf_name = 'HBOS'
    clf = HBOS()
    clf.fit(X_train)

    # get the prediction labels and outlier scores of the training data
    y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)
    y_train_scores = clf.decision_scores_  # raw outlier scores

    # get the prediction on the test data
    y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)
    y_test_scores = clf.decision_function(X_test)  # outlier scores

    # evaluate and print the results
    print("\nOn Training Data:")
    evaluate_print(clf_name, y_train, y_train_scores)
    print("\nOn Test Data:")
    evaluate_print(clf_name, y_test, y_test_scores)

    # visualize the results
    visualize(clf_name, X_train, y_train, X_test, y_test, y_train_pred,
              y_test_pred, show_figure=True, save_figure=False)