Ejemplos de HBOS.predict en Python, ejemplos de pyod.models.hbos.HBOS.predict en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: spike_detection.py Proyecto: shaggy63/pltrading

def detect_anomaly(df):
	df = df.fillna(0)
	clf =HBOS()
	x_values = df.index.values.reshape(df.index.values.shape[0],1)
	y_values = df.total_traded_quote_asset_volume.values.reshape(df.total_traded_quote_asset_volume.values.shape[0],1)
	clf.fit(y_values)
	clf.predict(y_values)
	df["label_qav"] = clf.predict(y_values)
	df["score_qav"] = clf.decision_function(y_values)#.round(6)
	df['change_qav'] = df.total_traded_quote_asset_volume.pct_change(periods=1)*100
	df['change_price'] = df.last_price.pct_change(periods=1)*100
	return df

Ejemplo n.º 2

0

Mostrar archivo

    def test_hbos(self):
        clf = HBOS(contamination=0.05)
        clf.fit(self.X_train)
        assert_equal(len(clf.decision_scores), self.X_train.shape[0])

        pred_scores = clf.decision_function(self.X_test)
        assert_equal(pred_scores.shape[0], self.X_test.shape[0])
        assert_equal(clf.predict(self.X_test).shape[0],
                     self.X_test.shape[0])
        assert_greater(roc_auc_score(self.y_test, pred_scores), 0.5)

Ejemplo n.º 3

0

Mostrar archivo

def extract_is_outlier(df: pd.DataFrame,
                       col: str,
                       pbar=None,
                       verbose: bool = True,
                       model=None,
                       outliers_fraction: float = 0.05,
                       replace_with=None) -> pd.DataFrame:
    """
    Create an is_outlier column
    :param df: the data
    :param col: the column name
    :param conf: the config dir
    :param pbar: tqdm progress bar
    :return:
    """
    df = df.copy(deep=True)

    msg = "Trying to find outliers in " + str(col)
    if pbar is None:
        print_c(verbose, msg)
    else:
        pbar.set_description(msg)

    if model is None:
        model = HBOS(contamination=outliers_fraction)
    X = df[col].astype(np.float32)
    mask = ~(np.isnan(X) | np.isinf(X) | np.isneginf(X))
    model.fit(X[mask].to_frame())
    preds = model.predict(X[mask].to_frame())
    df[col + '_' + 'isoutlier'] = 0
    df.loc[mask, col + '_' + 'isoutlier'] = preds

    if replace_with is not None:
        msg = "Replacing outliers in " + str(col) + " with " + str(
            replace_with)
        if pbar is None:
            print_c(verbose, msg)
        else:
            pbar.set_description(msg)
        df.loc[df[col + '_' + 'isoutlier'] == 1, col] = replace_with

    return df

Ejemplo n.º 4

0

Mostrar archivo

    def get_HBOS_scores(dataframe,
                        cols,
                        outliers_fraction=0.01,
                        standardize=True):
        '''Takes df, a list selected column nmaes, outliers_fraction = 0.01 default
        
        Returns:
            df with CBOLF scores added         
        '''
        if standardize:
            #standardize selected variables
            minmax = MinMaxScaler(feature_range=(0, 1))
            dataframe[cols] = minmax.fit_transform(dataframe[cols])

        #Convert dataframe to a numpy array in order to incorprate our algorithm
        arrays = []
        for row in cols:
            row = dataframe[row].values.reshape(-1, 1)
            arrays.append(row)
        X = np.concatenate((arrays), axis=1)

        #fit
        clf = HBOS(contamination=outliers_fraction)
        #clf = CBLOF(contamination=outliers_fraction,check_estimator=False, random_state=0)
        clf.fit(X)

        # predict raw anomaly score
        scores_pred = clf.decision_function(X) * -1

        # prediction of a datapoint category outlier or inlier
        y_pred = clf.predict(X)
        n_inliers = len(y_pred) - np.count_nonzero(y_pred)
        n_outliers = np.count_nonzero(y_pred == 1)

        CheckOutliers.df2 = dataframe
        CheckOutliers.df2['outlier'] = y_pred.tolist()

        print('OUTLIERS:', n_outliers, 'INLIERS:', n_inliers,
              'found with HBOS')

Ejemplo n.º 5

0

Mostrar archivo

Archivo: test_hbos.py Proyecto: John-Almardeny/pyod

class TestHBOS(unittest.TestCase):
    def setUp(self):
        self.n_train = 200
        self.n_test = 100
        self.contamination = 0.1
        self.roc_floor = 0.8
        self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
            n_train=self.n_train,
            n_test=self.n_test,
            contamination=self.contamination,
            random_state=42)

        self.clf = HBOS(contamination=self.contamination)
        self.clf.fit(self.X_train)

    def test_parameters(self):
        assert (hasattr(self.clf, 'decision_scores_')
                and self.clf.decision_scores_ is not None)
        assert (hasattr(self.clf, 'labels_') and self.clf.labels_ is not None)
        assert (hasattr(self.clf, 'threshold_')
                and self.clf.threshold_ is not None)
        assert (hasattr(self.clf, '_mu') and self.clf._mu is not None)
        assert (hasattr(self.clf, '_sigma') and self.clf._sigma is not None)
        assert (hasattr(self.clf, 'hist_') and self.clf.hist_ is not None)
        assert (hasattr(self.clf, 'bin_edges_')
                and self.clf.bin_edges_ is not None)

    def test_train_scores(self):
        assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])

    def test_prediction_scores(self):
        pred_scores = self.clf.decision_function(self.X_test)

        # check score shapes
        assert_equal(pred_scores.shape[0], self.X_test.shape[0])

        # check performance
        assert (roc_auc_score(self.y_test, pred_scores) >= self.roc_floor)

    def test_prediction_labels(self):
        pred_labels = self.clf.predict(self.X_test)
        assert_equal(pred_labels.shape, self.y_test.shape)

    def test_prediction_proba(self):
        pred_proba = self.clf.predict_proba(self.X_test)
        assert (pred_proba.min() >= 0)
        assert (pred_proba.max() <= 1)

    def test_prediction_proba_linear(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='linear')
        assert (pred_proba.min() >= 0)
        assert (pred_proba.max() <= 1)

    def test_prediction_proba_unify(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='unify')
        assert (pred_proba.min() >= 0)
        assert (pred_proba.max() <= 1)

    def test_prediction_proba_parameter(self):
        with assert_raises(ValueError):
            self.clf.predict_proba(self.X_test, method='something')

    def test_fit_predict(self):
        pred_labels = self.clf.fit_predict(self.X_train)
        assert_equal(pred_labels.shape, self.y_train.shape)

    def test_fit_predict_score(self):
        self.clf.fit_predict_score(self.X_test, self.y_test)
        self.clf.fit_predict_score(self.X_test,
                                   self.y_test,
                                   scoring='roc_auc_score')
        self.clf.fit_predict_score(self.X_test,
                                   self.y_test,
                                   scoring='prc_n_score')
        with assert_raises(NotImplementedError):
            self.clf.fit_predict_score(self.X_test,
                                       self.y_test,
                                       scoring='something')

    # def test_score(self):
    #     self.clf.score(self.X_test, self.y_test)
    #     self.clf.score(self.X_test, self.y_test, scoring='roc_auc_score')
    #     self.clf.score(self.X_test, self.y_test, scoring='prc_n_score')
    #     with assert_raises(NotImplementedError):
    #         self.clf.score(self.X_test, self.y_test, scoring='something')

    def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks)

    def test_predict_rank_normalized(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, 1.01)
        assert_array_less(-0.1, pred_ranks)

    def test_model_clone(self):
        clone_clf = clone(self.clf)

    def tearDown(self):
        pass

Ejemplo n.º 6

0

Mostrar archivo

Archivo: hbos_example.py Proyecto: zhuyansen/Pyod

    n_train = 1000
    n_test = 500

    X_train, y_train, c_train, X_test, y_test, c_test = generate_data(
        n_train=n_train, n_test=n_test, contamination=contamination)

    # train a HBOS detector (default version)
    clf = HBOS()
    clf.fit(X_train)

    # get the prediction on the training data
    y_train_pred = clf.y_pred
    y_train_score = clf.decision_scores

    # get the prediction on the test data
    y_test_pred = clf.predict(X_test)
    y_test_score = clf.decision_function(X_test)

    print('Train ROC:{roc}, precision@n:{prn}'.format(
        roc=roc_auc_score(y_train, y_train_score),
        prn=precision_n_scores(y_train, y_train_score)))

    print('Test ROC:{roc}, precision@n:{prn}'.format(
        roc=roc_auc_score(y_test, y_test_score),
        prn=precision_n_scores(y_test, y_test_score)))

    #######################################################################
    # Visualizations
    # initialize the log directory if it does not exist
    pathlib.Path('example_figs').mkdir(parents=True, exist_ok=True)

Ejemplo n.º 7

0

Mostrar archivo

Archivo: test_hbos.py Proyecto: flaviassantos/pyod

class TestHBOS(unittest.TestCase):
    def setUp(self):
        self.n_train = 100
        self.n_test = 50
        self.contamination = 0.1
        self.roc_floor = 0.6
        self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
            n_train=self.n_train, n_test=self.n_test,
            contamination=self.contamination, random_state=42)

        self.clf = HBOS(contamination=self.contamination)
        self.clf.fit(self.X_train)

    def test_sklearn_estimator(self):
        check_estimator(self.clf)

    def test_parameters(self):
        assert_true(hasattr(self.clf, 'decision_scores_') and
                    self.clf.decision_scores_ is not None)
        assert_true(hasattr(self.clf, 'labels_') and
                    self.clf.labels_ is not None)
        assert_true(hasattr(self.clf, 'threshold_') and
                    self.clf.threshold_ is not None)
        assert_true(hasattr(self.clf, '_mu') and
                    self.clf._mu is not None)
        assert_true(hasattr(self.clf, '_sigma') and
                    self.clf._sigma is not None)
        assert_true(hasattr(self.clf, 'hist_') and
                    self.clf.hist_ is not None)
        assert_true(hasattr(self.clf, 'bin_edges_') and
                    self.clf.bin_edges_ is not None)

    def test_train_scores(self):
        assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])

    def test_prediction_scores(self):
        pred_scores = self.clf.decision_function(self.X_test)

        # check score shapes
        assert_equal(pred_scores.shape[0], self.X_test.shape[0])

        # check performance
        assert_greater(roc_auc_score(self.y_test, pred_scores), self.roc_floor)

    def test_prediction_labels(self):
        pred_labels = self.clf.predict(self.X_test)
        assert_equal(pred_labels.shape, self.y_test.shape)

    def test_prediction_proba(self):
        pred_proba = self.clf.predict_proba(self.X_test)
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_linear(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='linear')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_unify(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='unify')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_parameter(self):
        with assert_raises(ValueError):
            self.clf.predict_proba(self.X_test, method='something')

    def test_fit_predict(self):
        pred_labels = self.clf.fit_predict(self.X_train)
        assert_equal(pred_labels.shape, self.y_train.shape)

    def test_fit_predict_score(self):
        self.clf.fit_predict_score(self.X_test, self.y_test)
        self.clf.fit_predict_score(self.X_test, self.y_test,
                                   scoring='roc_auc_score')
        self.clf.fit_predict_score(self.X_test, self.y_test,
                                   scoring='prc_n_score')
        with assert_raises(NotImplementedError):
            self.clf.fit_predict_score(self.X_test, self.y_test,
                                       scoring='something')

    # def test_score(self):
    #     self.clf.score(self.X_test, self.y_test)
    #     self.clf.score(self.X_test, self.y_test, scoring='roc_auc_score')
    #     self.clf.score(self.X_test, self.y_test, scoring='prc_n_score')
    #     with assert_raises(NotImplementedError):
    #         self.clf.score(self.X_test, self.y_test, scoring='something')

    def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks)

    def test_predict_rank_normalized(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, 1.01)
        assert_array_less(-0.1, pred_ranks)

    def tearDown(self):
        pass

Ejemplo n.º 8

0

Mostrar archivo

                      n_test=n_test,
                      n_features=2,
                      contamination=contamination,
                      random_state=42)

    # train HBOS detector
    clf_name = 'HBOS'
    clf = HBOS()
    clf.fit(X_train)

    # get the prediction labels and outlier scores of the training data
    y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)
    y_train_scores = clf.decision_scores_  # raw outlier scores

    # get the prediction on the test data
    y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)
    y_test_scores = clf.decision_function(X_test)  # outlier scores

    # evaluate and print the results
    print("\nOn Training Data:")
    evaluate_print(clf_name, y_train, y_train_scores)
    print("\nOn Test Data:")
    evaluate_print(clf_name, y_test, y_test_scores)

    # visualize the results
    visualize(clf_name,
              X_train,
              y_train,
              X_test,
              y_test,
              y_train_pred,

Ejemplo n.º 9

0

Mostrar archivo

def detect_anomaly(df, type):
    clf = HBOS()  #
    if type == "forest":
        clf = IForest()

    x_values = df.index.values.reshape(df.index.values.shape[0], 1)
    y_values = df.close.values.reshape(df.close.values.shape[0], 1)
    clf.fit(y_values)
    clf.predict(y_values)
    df["label_close"] = clf.predict(y_values)
    df["score_close"] = clf.decision_function(y_values)  #.round(6)

    y_values = df.volume.values.reshape(df.volume.values.shape[0], 1)
    clf.fit(y_values)
    clf.predict(y_values)
    df["label_volume"] = clf.predict(y_values)
    df["score_volume"] = clf.decision_function(y_values)  #.round(4)

    # x_values = df.index.values.reshape(df.index.values.shape[0],1)
    # y_values = df.close.values.reshape(df.close.values.shape[0],1)
    # clf = KNN()
    # clf.fit(y_values)
    # clf.predict(y_values)
    # df["label_close_knn"] = clf.predict(y_values)
    # df["score_close_knn"] = clf.decision_function(y_values)#.round(6)

    # y_values = df.volume.values.reshape(df.volume.values.shape[0],1)
    # clf = KNN()
    # clf.fit(y_values)
    # clf.predict(y_values)
    # df["label_volume_knn"] = clf.predict(y_values)
    # df["score_volume_knn"] = clf.decision_function(y_values)#.round(4)

    # x_values = df.index.values.reshape(df.index.values.shape[0],1)
    # y_values = df.close.values.reshape(df.close.values.shape[0],1)
    # clf = PCA()
    # clf.fit(y_values)
    # clf.predict(y_values)
    # df["label_close_pca"] = clf.predict(y_values)
    # df["score_close_pca"] = clf.decision_function(y_values)#.round(6)

    # y_values = df.volume.values.reshape(df.volume.values.shape[0],1)
    # clf = PCA()
    # clf.fit(y_values)
    # clf.predict(y_values)
    # df["label_volume_pca"] = clf.predict(y_values)
    # df["score_volume_pca"] = clf.decision_function(y_values)#.round(4)

    # x_values = df.index.values.reshape(df.index.values.shape[0],1)
    # y_values = df.close.values.reshape(df.close.values.shape[0],1)
    # clf = IForest()
    # clf.fit(y_values)
    # clf.predict(y_values)
    # df["label_close_iforest"] = clf.predict(y_values)
    # df["score_close_iforest"] = clf.decision_function(y_values)#.round(6)

    # y_values = df.volume.values.reshape(df.volume.values.shape[0],1)
    # clf = IForest()
    # clf.fit(y_values)
    # clf.predict(y_values)
    # df["label_volume_iforest"] = clf.predict(y_values)
    # df["score_volume_iforest"] = clf.decision_function(y_values)#.round(4)

    return df

Ejemplo n.º 10

0

Mostrar archivo

Archivo: test_hbos.py Proyecto: deltat99/Pyod

class TestHBOS(unittest.TestCase):
    def setUp(self):
        self.n_train = 100
        self.n_test = 50
        self.contamination = 0.1
        self.roc_floor = 0.6
        self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
            n_train=self.n_train,
            n_test=self.n_test,
            contamination=self.contamination)

        self.clf = HBOS(contamination=self.contamination)
        self.clf.fit(self.X_train)

    def test_sklearn_estimator(self):
        check_estimator(self.clf)

    def test_parameters(self):
        if not hasattr(
                self.clf,
                'decision_scores_') or self.clf.decision_scores_ is None:
            self.assertRaises(AttributeError, 'decision_scores_ is not set')
        if not hasattr(self.clf, 'labels_') or self.clf.labels_ is None:
            self.assertRaises(AttributeError, 'labels_ is not set')
        if not hasattr(self.clf, 'threshold_') or self.clf.threshold_ is None:
            self.assertRaises(AttributeError, 'threshold_ is not set')
        if not hasattr(self.clf, '_mu') or self.clf._mu is None:
            self.assertRaises(AttributeError, '_mu is not set')
        if not hasattr(self.clf, '_sigma') or self.clf._sigma is None:
            self.assertRaises(AttributeError, '_sigma is not set')
        if not hasattr(self.clf, 'hist_') or self.clf.hist_ is None:
            self.assertRaises(AttributeError, 'hist_ is not set')
        if not hasattr(self.clf, 'bin_edges_') or self.clf.bin_edges_ is None:
            self.assertRaises(AttributeError, 'bin_edges_ is not set')

    def test_train_scores(self):
        assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])

    def test_prediction_scores(self):
        pred_scores = self.clf.decision_function(self.X_test)

        # check score shapes
        assert_equal(pred_scores.shape[0], self.X_test.shape[0])

        # check performance
        assert_greater(roc_auc_score(self.y_test, pred_scores), self.roc_floor)

    def test_prediction_labels(self):
        pred_labels = self.clf.predict(self.X_test)
        assert_equal(pred_labels.shape, self.y_test.shape)

    def test_prediction_proba(self):
        pred_proba = self.clf.predict_proba(self.X_test)
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_linear(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='linear')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_unify(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='unify')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_parameter(self):
        with assert_raises(ValueError):
            self.clf.predict_proba(self.X_test, method='something')

    def test_fit_predict(self):
        pred_labels = self.clf.fit_predict(self.X_train)
        assert_equal(pred_labels.shape, self.y_train.shape)

    def test_evaluate(self):
        self.clf.fit_predict_evaluate(self.X_test, self.y_test)

    def tearDown(self):
        pass

Ejemplo n.º 11

0

Mostrar archivo

Archivo: outlierdetection.py Proyecto: mahtabbigverdi/Data-Cleaning-for-image-based-profiling-Enhancement

  data['plate'] = f
  data = data[data['Metadata_broad_sample'].isin(drugs)]
  data = data[data.columns.intersection(selected_cols)]


  b = data['Metadata_broad_sample']
  w = data['Metadata_Well']
  p =  data['plate']
  del data['Metadata_broad_sample']
  del data['Metadata_Well']
  del data['plate']
  
  outliers_fraction = 0.01
  clf = HBOS (contamination= outliers_fraction)
  clf.fit(data)
  y_pred = clf.predict(data)
  X = pd.DataFrame()
  X['outlier'] = y_pred.tolist()
  X['Metadata_broad_sample'] = b
  X['Metadata_Well'] = w
  X['plate'] = p
  X.to_csv('outlier_without_regress/'+f)


  #target = y_pred.tolist()
  #tsne = TSNE(n_components= 2, verbose=1, perplexity=40, n_iter=2000)
  #tsne_results = tsne.fit_transform(data)
  #fig = plt.figure()
  #ax = fig.add_subplot(111, projection='3d')
  #ax.scatter(tsne_results[:,0], tsne_results[:,1],tsne_results[:,2], cmap = "coolwarm", edgecolor = "None" , c = target)
  #plt.scatter(tsne_results[:,0],tsne_results[:,1], c=target,

Ejemplo n.º 12

0

Mostrar archivo

Archivo: hbos_example.py Proyecto: flaviassantos/pyod

    # Generate sample data
    X_train, y_train, X_test, y_test = \
        generate_data(n_train=n_train,
                      n_test=n_test,
                      n_features=2,
                      contamination=contamination,
                      random_state=42)

    # train HBOS detector
    clf_name = 'HBOS'
    clf = HBOS()
    clf.fit(X_train)

    # get the prediction labels and outlier scores of the training data
    y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)
    y_train_scores = clf.decision_scores_  # raw outlier scores

    # get the prediction on the test data
    y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)
    y_test_scores = clf.decision_function(X_test)  # outlier scores

    # evaluate and print the results
    print("\nOn Training Data:")
    evaluate_print(clf_name, y_train, y_train_scores)
    print("\nOn Test Data:")
    evaluate_print(clf_name, y_test, y_test_scores)

    # visualize the results
    visualize(clf_name, X_train, y_train, X_test, y_test, y_train_pred,
              y_test_pred, show_figure=True, save_figure=False)

Ejemplo n.º 13

0

Mostrar archivo

Archivo: detection.py Proyecto: wyb9975/anomaly_detection

 df.loc[df['ground.truth'] == 'anomaly', 'ground.truth'] = 1
 df.loc[df['ground.truth'] == 'nominal', 'ground.truth'] = 0
 y = df['ground.truth'].values.reshape(-1)
 df[['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7']] = scaler.fit_transform(
     df[['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7']])
 x1 = df['V1'].values.reshape(-1, 1)
 x2 = df['V2'].values.reshape(-1, 1)
 x3 = df['V3'].values.reshape(-1, 1)
 x4 = df['V4'].values.reshape(-1, 1)
 x5 = df['V5'].values.reshape(-1, 1)
 x6 = df['V6'].values.reshape(-1, 1)
 x7 = df['V7'].values.reshape(-1, 1)
 x = np.concatenate((x1, x2, x3, x4, x5, x6, x7), axis=1)
 hbos = HBOS(contamination=outliers_fraction)
 hbos.fit(x)
 y_pred = hbos.predict(x)
 fpr, tpr, threshold = roc_curve(y, y_pred)  ###计算真阳性率和假阳性率
 roc_auc = auc(fpr, tpr)  ###计算auc的值
 lw = 2
 ax = fig.add_subplot(3, 3, i)
 plt.plot(fpr,
          tpr,
          color='darkorange',
          lw=lw,
          label='ROC curve (area = %0.3f)' % roc_auc)  ###假正率为横坐标，真正率为纵坐标做曲线
 plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
 plt.xlim([0.0, 1.0])
 plt.ylim([0.0, 1.05])
 plt.xlabel('False Positive Rate')
 plt.ylabel('True Positive Rate')
 plt.title('AUC')

Ejemplo n.º 14

0

Mostrar archivo

Archivo: model_pyod.py Proyecto: andrewm4894/ndmldev

def do_pyod(model, colnames, arr_baseline, arr_highlight):

    # init some counters
    n_charts, n_dims, n_bad_data, fit_success, fit_default, fit_fail = init_counters(
        colnames)

    # dict to collect results into
    results = {}

    n_lags = model.get('n_lags', 0)
    model_level = model.get('model_level', 'dim')
    model = model.get('type', 'hbos')

    # model init
    clf = pyod_init(model)

    # get map of cols to loop over
    col_map = get_col_map(colnames, model_level)

    # build each model
    for colname in col_map:

        chart = colname.split('|')[0]
        dimension = colname.split('|')[1] if '|' in colname else '*'
        arr_baseline_dim = arr_baseline[:, col_map[colname]]
        arr_highlight_dim = arr_highlight[:, col_map[colname]]

        # check for bad data
        bad_data = False

        # skip if bad data
        if bad_data:

            n_bad_data += 1
            log.info(f'... skipping {colname} due to bad data')

        else:

            if n_lags > 0:
                arr_baseline_dim = add_lags(arr_baseline_dim, n_lags=n_lags)
                arr_highlight_dim = add_lags(arr_highlight_dim, n_lags=n_lags)

            # remove any nan rows
            arr_baseline_dim = arr_baseline_dim[~np.isnan(arr_baseline_dim).
                                                any(axis=1)]
            arr_highlight_dim = arr_highlight_dim[~np.isnan(arr_highlight_dim).
                                                  any(axis=1)]

            log.debug(f'... chart = {chart}')
            log.debug(f'... dimension = {dimension}')
            log.debug(f'... arr_baseline_dim.shape = {arr_baseline_dim.shape}')
            log.debug(
                f'... arr_highlight_dim.shape = {arr_highlight_dim.shape}')
            log.debug(f'... arr_baseline_dim = {arr_baseline_dim}')
            log.debug(f'... arr_highlight_dim = {arr_highlight_dim}')

            if model == ['auto_encoder']:
                clf = pyod_init(model, n_features=arr_baseline_dim.shape[1])

            clf, result = try_fit(clf, colname, arr_baseline_dim,
                                  PyODDefaultModel)
            fit_success += 1 if result == 'success' else 0
            fit_default += 1 if result == 'default' else 0

            # try predictions and if they fail use default model
            try:
                preds = clf.predict(arr_highlight_dim)
                probs = clf.predict_proba(arr_highlight_dim)[:, 1]
            except:
                fit_success -= 1
                fit_default += 1
                clf = PyODDefaultModel()
                clf.fit(arr_baseline_dim)
                preds = clf.predict(arr_highlight_dim)
                probs = clf.predict_proba(arr_highlight_dim)[:, 1]

            log.debug(f'... preds.shape = {preds.shape}')
            log.debug(f'... preds = {preds}')
            log.debug(f'... probs.shape = {probs.shape}')
            log.debug(f'... probs = {probs}')

            # save results
            score = (np.mean(probs) + np.mean(preds)) / 2
            if chart in results:
                results[chart].append({dimension: {'score': score}})
            else:
                results[chart] = [{dimension: {'score': score}}]

    # log some summary stats
    log.info(
        summary_info(n_charts, n_dims, n_bad_data, fit_success, fit_fail,
                     fit_default, model_level))

    return results