Python AutoEncoder.decision_function 예제들, pyod.models.auto_encoder.AutoEncoder.decision_function Python 예제들

예제 #1

0

파일 보기

def main():
    plt.close('all')
    matplotlib.use('Qt5Agg')  # override PyCharm pro's scientific view

    create_links()

    warnings.showwarning = silence_warnings
    contamination = 0.1  # percentage of outliers
    n_train = 500  # number of training points
    n_test = 500  # number of testing points
    n_features = 25  # Number of features

    X_test, y_test, X_train, y_train = _generate_random_data(
        contamination, n_features, n_test, n_train)
    # X_test, y_test, X_train, y_train = ?

    _plot_using_pca(X_train, y_train)

    hidden_neurons = [25, 2, 2, 25]
    clf1 = AutoEncoder(hidden_neurons=hidden_neurons)
    clf1.fit(X_train)
    y_train_scores = clf1.decision_scores_

    # Predict the anomaly scores
    y_test_scores = clf1.decision_function(X_test)  # outlier scores
    y_test_scores = pd.Series(y_test_scores)

    # Plot anomaly scores
    plt.hist(y_test_scores, bins='auto')
    plt.title("Histogram for Model Clf1 Anomaly Scores")
    plt.show()

    manual_score_thres = 4
    df_test = X_test.copy()
    df_test['score'] = y_test_scores
    # assign cluster=0 to samples with low anomaly score, and cluster=1 to samples with high anomaly score.
    df_test['cluster'] = np.where(df_test['score'] < manual_score_thres, 0, 1)
    df_test['cluster'].value_counts()

    df_test.groupby('cluster').mean()
    print(df_test)

예제 #2

0

파일 보기

def ele_outliers(num):
    dataSetType = ALL_DATA_TYPE[0]
    trainType = ALL_TRAIN_TYPE[1]
    
    X, yc = load_data(dataSetType, trainType, num)

    # 10 fold validation
    KF = KFold(n_splits=10, shuffle=True, random_state=10)
    report_list = []
    for train_index, test_index in KF.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = yc[train_index], yc[test_index]

        # split into train and test
        # X_train, X_test, y_train, y_test = train_test_split(X, yc, test_size=0.2, random_state=10)
        # split train to ele and mice
        X_train_ele = X_train[y_train == 1]
        X_train_mice = X_train[y_train == 0]

        # use mice to fit the model mice: 1, ele: -1
        # clf = svm.OneClassSVM(nu=nu, kernel='rbf', gamma='scale')
        # clf = IsolationForest(max_samples=0.2, n_estimators=300, contamination=conta, random_state=rng)
        # clf.fit(X_train_mice)
        clf_name = 'AutoEncoder'
        clf = AutoEncoder(hidden_neurons=[256, 64, 20, 64, 256], epochs=epochs, contamination=conta, random_state=10, verbose=0)
        clf.fit(X_train_mice)

        y_pred_test = clf.predict(X_test)
        # get outlier scores
        y_pred_scores = clf.decision_function(X_test)

        c_matrix = confusion_matrix(y_test, y_pred_test)
        print(c_matrix)
        temp_report = classification_report(y_test, y_pred_test, output_dict=True)
        report_list.append(temp_report)
        print(classification_report(y_test, y_pred_test, output_dict=False))
        # evaluate_print(clf_name, y_pred_test, y_pred_scores)
    final_report = get_avg_report(report_list)
    print("final report", final_report)

예제 #3

0

파일 보기

class AutoEncoderODD(abstract_occ_model):
    def __init__(self,
                 hidden_neurons,
                 nu,
                 epochs,
                 batch_size=32,
                 output_activation='sigmoid'):
        self.model = AutoEncoder(hidden_neurons=hidden_neurons,
                                 contamination=nu,
                                 epochs=epochs,
                                 batch_size=batch_size,
                                 validation_size=0,
                                 output_activation=output_activation)

    def fit(self, X):
        self.model.fit(X)

    def predict(self, X):
        prediction = self.model.predict(X)
        return np.where(prediction == 0.0, 1,
                        np.where(prediction == 1.0, -1, prediction))

    def score_samples(self, X):
        return -self.model.decision_function(X)

예제 #4

0

파일 보기

파일: auto_encoder_example.py 프로젝트: RaekaWu/Anamoly-Detection

    n_train = 20000  # number of training points
    n_test = 2000  # number of testing points
    n_features = 300  # number of features

    # Generate sample data
    X_train, y_train, X_test, y_test = \
        generate_data(n_train=n_train,
                      n_test=n_test,
                      n_features=n_features,
                      contamination=contamination,
                      random_state=42)

    # train AutoEncoder detector
    clf_name = 'AutoEncoder'
    clf = AutoEncoder(epochs=30, contamination=contamination)
    clf.fit(X_train)

    # get the prediction labels and outlier scores of the training data
    y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)
    y_train_scores = clf.decision_scores_  # raw outlier scores

    # get the prediction on the test data
    y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)
    y_test_scores = clf.decision_function(X_test)  # outlier scores

    # evaluate and print the results
    print("\nOn Training Data:")
    evaluate_print(clf_name, y_train, y_train_scores)
    print("\nOn Test Data:")
    evaluate_print(clf_name, y_test, y_test_scores)

예제 #5

0

파일 보기

test_x = test.drop(columns=['user_id', 'index'])

np.any(np.isnan(train_x))
np.all(np.isfinite(train_x))
train_norm = StandardScaler().fit_transform(train_x.dropna())
test_norm = StandardScaler().fit_transform(test_x.dropna())

clf1 = AutoEncoder(hidden_neurons=[25, 2, 2, 25])
clf1.fit(train_norm)

y_train_scores = clf1.decision_scores_  # raw outlier scores

# get the prediction on the test data
y_test_pred = clf1.predict(test_norm)  # outlier labels (0 or 1)

y_test_scores = clf1.decision_function(test_norm)  # outlier scores

y_test_pred = pd.Series(y_test_pred)
y_test_scores = pd.Series(y_test_scores)

y_test_pred.value_counts()

y_test_scores.describe()

plt.hist(y_test_scores, bins='auto')
plt.title("Histogram for Model Clf1 Anomaly Scores")
plt.xlim(-1, 2)
plt.show()

df_test = test_x.copy()
df_test.insert(loc=0, column="user_id", value=test.dropna()['user_id'])

예제 #6

0

파일 보기

class TestAutoEncoder(unittest.TestCase):
    def setUp(self):
        self.n_train = 6000
        self.n_test = 1000
        self.n_features = 300
        self.contamination = 0.1
        self.roc_floor = 0.8
        self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
            n_train=self.n_train,
            n_test=self.n_test,
            n_features=self.n_features,
            contamination=self.contamination,
            random_state=42)

        self.clf = AutoEncoder(epochs=5, contamination=self.contamination)
        self.clf.fit(self.X_train)

    def test_parameters(self):
        assert (hasattr(self.clf, 'decision_scores_')
                and self.clf.decision_scores_ is not None)
        assert (hasattr(self.clf, 'labels_') and self.clf.labels_ is not None)
        assert (hasattr(self.clf, 'threshold_')
                and self.clf.threshold_ is not None)
        assert (hasattr(self.clf, '_mu') and self.clf._mu is not None)
        assert (hasattr(self.clf, '_sigma') and self.clf._sigma is not None)
        assert (hasattr(self.clf, 'model_') and self.clf.model_ is not None)

    def test_train_scores(self):
        assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])

    def test_prediction_scores(self):
        pred_scores = self.clf.decision_function(self.X_test)

        # check score shapes
        assert_equal(pred_scores.shape[0], self.X_test.shape[0])

        # check performance
        assert (roc_auc_score(self.y_test, pred_scores) >= self.roc_floor)

    def test_prediction_labels(self):
        pred_labels = self.clf.predict(self.X_test)
        assert_equal(pred_labels.shape, self.y_test.shape)

    def test_prediction_proba(self):
        pred_proba = self.clf.predict_proba(self.X_test)
        assert (pred_proba.min() >= 0)
        assert (pred_proba.max() <= 1)

    def test_prediction_proba_linear(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='linear')
        assert (pred_proba.min() >= 0)
        assert (pred_proba.max() <= 1)

    def test_prediction_proba_unify(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='unify')
        assert (pred_proba.min() >= 0)
        assert (pred_proba.max() <= 1)

    def test_prediction_proba_parameter(self):
        with assert_raises(ValueError):
            self.clf.predict_proba(self.X_test, method='something')

    def test_prediction_labels_confidence(self):
        pred_labels, confidence = self.clf.predict(self.X_test,
                                                   return_confidence=True)
        assert_equal(pred_labels.shape, self.y_test.shape)
        assert_equal(confidence.shape, self.y_test.shape)
        assert (confidence.min() >= 0)
        assert (confidence.max() <= 1)

    def test_prediction_proba_linear_confidence(self):
        pred_proba, confidence = self.clf.predict_proba(self.X_test,
                                                        method='linear',
                                                        return_confidence=True)
        assert (pred_proba.min() >= 0)
        assert (pred_proba.max() <= 1)

        assert_equal(confidence.shape, self.y_test.shape)
        assert (confidence.min() >= 0)
        assert (confidence.max() <= 1)

    def test_fit_predict(self):
        pred_labels = self.clf.fit_predict(self.X_train)
        assert_equal(pred_labels.shape, self.y_train.shape)

    def test_fit_predict_score(self):
        self.clf.fit_predict_score(self.X_test, self.y_test)
        self.clf.fit_predict_score(self.X_test,
                                   self.y_test,
                                   scoring='roc_auc_score')
        self.clf.fit_predict_score(self.X_test,
                                   self.y_test,
                                   scoring='prc_n_score')
        with assert_raises(NotImplementedError):
            self.clf.fit_predict_score(self.X_test,
                                       self.y_test,
                                       scoring='something')

    def test_model_clone(self):
        # for deep models this may not apply
        clone_clf = clone(self.clf)

    def tearDown(self):
        pass

예제 #7

0

파일 보기

파일: AutoEncoder.py 프로젝트: jieuhyl/Machine_Learning

plt.show()



# Step 1: Build the model
clf1 = AutoEncoder(hidden_neurons =[25, 2, 2, 25])
clf1.fit(X_train)

clf2 = AutoEncoder(hidden_neurons =[25, 10,2, 10, 25])
clf2.fit(X_train)

clf3 = AutoEncoder(hidden_neurons =[25, 15, 10, 2, 10,15, 25])
clf3.fit(X_train)

# Predict the anomaly scores
y_test_scores = clf1.decision_function(X_test)  
y_test_scores = pd.Series(y_test_scores)

# Step 2: Determine the cut point
import matplotlib.pyplot as plt
plt.hist(y_test_scores, bins='auto')  
plt.title("Histogram with Model Clf3 Anomaly Scores")
plt.show()

df_test = X_test.copy()
df_test['score'] = y_test_scores
df_test['cluster'] = np.where(df_test['score']<4, 0, 1)
df_test['cluster'].value_counts()

# Step 3: Get the summary statistics by cluster
df_test.groupby('cluster').mean()

예제 #8

0

파일 보기

파일: AE_train.py 프로젝트: ImbaPlayer/anomaly_detection

def ele_outliers(num):
    # num = 10
    # fileName1 = "/data/sym/one-class-svm/data/mean_of_five/dec-feature/caida-A-50W-5-{}.csv".format(num)
    # fileName2 = "/data/sym/one-class-svm/data/mean_of_five/bin-feature/caida-A-50W-5-{}.csv".format(num)
    # fileName1 = "/data/sym/one-class-svm/data/mean_of_five/dec-feature/univ1-50W-{0}-{1}.csv".format(5, num)
    # fileName2 = "/data/sym/one-class-svm/data/mean_of_five/bin-feature/univ1-50W-{0}-{1}.csv".format(5, num)
    fileName1 = "data/dec-test.csv"
    fileName2 = "data/bin-test.csv"
    df = pd.read_csv(fileName1)
    dfb = pd.read_csv(fileName2)

    #conver to matrix
    X = dfb.values
    X[X == '0'] = -1
    X[X == '1'] = 1
    yr = df['flowSize']

    # thres = int(sys.argv[1])

    yc = yr.copy(deep=True)
    yc[yr <= thres] = 0
    yc[yr > thres] = 1
    print("original mice count: ", sum(yc == 0))
    print("original elephant count: ", sum(yc == 1))

    # 10 fold validation
    KF = KFold(n_splits=10, shuffle=True, random_state=10)
    report_list = []
    for train_index, test_index in KF.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = yc[train_index], yc[test_index]

        # split into train and test
        # X_train, X_test, y_train, y_test = train_test_split(X, yc, test_size=0.2, random_state=10)
        # split train to ele and mice
        X_train_ele = X_train[y_train == 1]
        X_train_mice = X_train[y_train == 0]

        # use mice to fit the model mice: 1, ele: -1
        # clf = svm.OneClassSVM(nu=nu, kernel='rbf', gamma='scale')
        # clf = IsolationForest(max_samples=0.2, n_estimators=300, contamination=conta, random_state=rng)
        # clf.fit(X_train_mice)
        clf_name = 'AutoEncoder'
        clf = AutoEncoder(hidden_neurons=[256, 64, 20, 64, 256],
                          epochs=epochs,
                          contamination=conta,
                          random_state=10,
                          verbose=0)
        clf.fit(X_train_mice)

        y_pred_test = clf.predict(X_test)
        # get outlier scores
        y_pred_scores = clf.decision_function(X_test)

        c_matrix = confusion_matrix(y_test, y_pred_test)
        print(c_matrix)
        temp_report = classification_report(y_test,
                                            y_pred_test,
                                            output_dict=True)
        report_list.append(temp_report)
        print(classification_report(y_test, y_pred_test, output_dict=False))
        # evaluate_print(clf_name, y_pred_test, y_pred_scores)
    final_report = get_avg_report(report_list)
    print("final report", final_report)

예제 #9

0

파일 보기

dropCleanScale = StandardScaler().fit_transform(nnData)
dropCleanScale = pd.DataFrame(dropCleanScale)

nnDataTest = pd.read_excel("nnViewDataTest.xlsx")
nnDataTest = nnDataTest.drop(['date_time'], axis=1)
dropCleanScaleTest = StandardScaler().fit_transform(nnDataTest)
dropCleanScaleTest = pd.DataFrame(dropCleanScaleTest)

clf1 = AutoEncoder(hidden_neurons=[14, 2, 2, 14])
clf1.fit(dropCleanScale)
y_train_scores1 = clf1.decision_scores_

clf2 = AutoEncoder(hidden_neurons=[14, 10, 2, 10, 14])
clf2.fit(dropCleanScale)
y_train_scores2 = clf2.decision_scores_

y_test1 = clf1.decision_function(dropCleanScaleTest)
y_test2 = clf2.decision_function(dropCleanScaleTest)
## plotting the Remaining lifetime score

plt.hist(y_test1, bins='auto', color='green')
plt.hist(y_test2, bins='auto', color='blue')

plt.title("Histogram for Model Clf1 Anomaly Scores")
plt.show()

df_test = y_train_scores2.copy()
df_test['score'] = y_test2
df_test['cluster'] = np.where(df_test['score'] < 4, 0, 1)
df_test['cluster'].value_counts()
df_test.groupby('cluster').mean()

예제 #10

0

파일 보기

    x_train = data_dict["train"]
    x_test = data_dict["test"]
    x_test_labels = data_dict["test_labels"]

    start = time.time()
    # data preprocessing for MSCRED
    od = AutoEncoder(
        hidden_neurons=hidden_neurons,
        batch_size=batch_size,
        epochs=epochs,
        l2_regularizer=l2_regularizer,
        verbose=1,
    )
    od.fit(x_train)

    # get outlier scores
    anomaly_score = od.decision_function(x_test)

    anomaly_label = x_test_labels

    end = time.time()

    time = end - start

    evaluate_all(anomaly_score, anomaly_label)
    salience = compute_salience(anomaly_score, anomaly_label)
    print('time')
    print('   ', time)
    print('salience')
    print('   ', salience)

예제 #11

0

파일 보기

class TestAutoEncoder(unittest.TestCase):
    def setUp(self):
        self.n_train = 6000
        self.n_test = 1000
        self.n_features = 300
        self.contamination = 0.1
        self.roc_floor = 0.8
        self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
            n_train=self.n_train,
            n_test=self.n_test,
            n_features=self.n_features,
            contamination=self.contamination,
            random_state=42)

        self.clf = AutoEncoder(epochs=5, contamination=self.contamination)
        self.clf.fit(self.X_train)

    def test_sklearn_estimator(self):
        # TODO: fix estimator check for AutoEncoder
        # check_estimator(self.clf)
        pass

    def test_parameters(self):
        assert_true(
            hasattr(self.clf, 'decision_scores_')
            and self.clf.decision_scores_ is not None)
        assert_true(
            hasattr(self.clf, 'labels_') and self.clf.labels_ is not None)
        assert_true(
            hasattr(self.clf, 'threshold_')
            and self.clf.threshold_ is not None)
        assert_true(hasattr(self.clf, '_mu') and self.clf._mu is not None)
        assert_true(
            hasattr(self.clf, '_sigma') and self.clf._sigma is not None)
        assert_true(
            hasattr(self.clf, 'model_') and self.clf.model_ is not None)

    def test_train_scores(self):
        assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])

    def test_prediction_scores(self):
        pred_scores = self.clf.decision_function(self.X_test)

        # check score shapes
        assert_equal(pred_scores.shape[0], self.X_test.shape[0])

        # check performance
        assert_greater(roc_auc_score(self.y_test, pred_scores), self.roc_floor)

    def test_prediction_labels(self):
        pred_labels = self.clf.predict(self.X_test)
        assert_equal(pred_labels.shape, self.y_test.shape)

    def test_prediction_proba(self):
        pred_proba = self.clf.predict_proba(self.X_test)
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_linear(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='linear')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_unify(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='unify')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_parameter(self):
        with assert_raises(ValueError):
            self.clf.predict_proba(self.X_test, method='something')

    def test_fit_predict(self):
        pred_labels = self.clf.fit_predict(self.X_train)
        assert_equal(pred_labels.shape, self.y_train.shape)

    def test_fit_predict_score(self):
        self.clf.fit_predict_score(self.X_test, self.y_test)
        self.clf.fit_predict_score(self.X_test,
                                   self.y_test,
                                   scoring='roc_auc_score')
        self.clf.fit_predict_score(self.X_test,
                                   self.y_test,
                                   scoring='prc_n_score')
        with assert_raises(NotImplementedError):
            self.clf.fit_predict_score(self.X_test,
                                       self.y_test,
                                       scoring='something')

    def tearDown(self):
        pass

예제 #12

0

파일 보기

파일: autoencoder_anomal_detection.py 프로젝트: wkslearner/scikit_learn

import matplotlib.pyplot as plt
# plt.scatter(X_train[0], X_train[1], c=y_train, alpha=0.8)
# plt.title('Scatter plot')
# plt.xlabel('x')
# plt.ylabel('y')
# plt.show()

# 自编码函数
clf1 = AutoEncoder(hidden_neurons=[25, 2, 2, 25])
clf1.fit(X_train)

# Get the outlier scores for the train data
y_train_scores = clf1.decision_scores_

# Predict the anomaly scores  预测函数
y_test_scores = clf1.decision_function(X_test)  # outlier scores
y_test_scores = pd.Series(y_test_scores)

# Plot it!
import matplotlib.pyplot as plt
# plt.hist(y_test_scores, bins='auto')
# plt.title("Histogram for Model Clf1 Anomaly Scores")
# plt.show()

#
df_test = X_test.copy()
df_test['score'] = y_test_scores
df_test['cluster'] = np.where(df_test['score'] < 4, 0, 1)
df_test['cluster'].value_counts()
df_test.groupby('cluster').mean()