Beispiel #1
0
def dorc(preprocessedData, random_state, outliers_fraction=0.1):

    t0 = time.time()
    clf = IForest(contamination=outliers_fraction,
                  random_state=random_state,
                  n_jobs=-1)
    clf.fit(preprocessedData)
    scores = clf.decision_function(preprocessedData)

    # Apply IQR-based criteria to identify rare cells for further downstream analysis.
    q3 = np.percentile(scores, 75)
    iqr = stats.iqr(scores)
    th = q3 + (1.5 * iqr)

    # Select indexes that satisfy IQR-based thresholding criteria.
    indIqr = np.where(scores >= th)[0]
    print('shape of selected cells : {}'.format(indIqr.shape))

    # Create a file with binary predictions
    predictions = np.zeros(preprocessedData.shape[0])
    predictions[indIqr] = 1  # Replace predictions for rare cells with '1'.

    t1 = time.time()
    duration = round(t1 - t0, ndigits=4)
    print("Total running DoRC time is :" + str(duration) + " s")

    return predictions, scores, duration
Beispiel #2
0
def do_iforest(x, n_estimators=100, max_samples=512):
    clf = IForest(behaviour="new",
                  n_estimators=n_estimators,
                  max_samples=max_samples,
                  random_state=None)
    y_pred = clf.fit_predict(x)
    scores = clf.decision_function(x)
    index = np.where(y_pred == 1)[0]
    return clf, scores, index
Beispiel #3
0
class IForestPyOD(BaseAlgorithm):
    name = "iForest_pyod"

    def __init__(self, t=100, psi=256):

        self.iforest = IForest(max_samples=psi, n_estimators=t, behaviour="new", contamination=0.1)

    def fit(self, X):

        self.iforest.fit(X)

    def predict(self, X):

        return self.iforest.decision_function(X)
 def detect(self, X, y=None):
     """
     :param X: Dataframe
     :param y: np.array
     :return: outlier scores
     """
     rng = np.random.RandomState(42)
     # 构造训练样本
     n_estimators = 200  # 森林中树的棵数
     outliers_fraction = 0.5  # 异常样本比例
     clf = IForest(max_samples='auto', random_state=rng, contamination=outliers_fraction, n_estimators=n_estimators)
     clf.fit(X)
     scores = clf.decision_function(X)
     return scores
def main():
    dataset, label = pre_data()
    from numpy import nan as NA
    from sklearn.impute import SimpleImputer
    imputer = SimpleImputer(missing_values=NA, strategy="mean")
    dataset = imputer.fit_transform(dataset)
    x_train, x_test, y_train, y_label = train_test_split(dataset,
                                                         label,
                                                         test_size=0.3,
                                                         random_state=44)
    # x_train, x_test, y_train, y_label =[], [], [], []
    # for i in range(1000):
    #     x_train.append(dataset[i])
    #     y_train.append(label[i])
    # for i in range(6000,10000):
    #     x_train.append(dataset[i])
    #     y_train.append(label[i])
    # x_test = dataset[1000:6000]
    # y_label = label[1000:6000]
    for i in range(3):
        clf_name = 'IForest'
        clf = IForest()
        clf.fit(x_train)

        # get the prediction label and outlier scores of the training data
        y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)
        y_train_scores = clf.decision_scores_  # raw outlier scores
        from sklearn.metrics import accuracy_score
        from sklearn.metrics import precision_score
        from sklearn.metrics import recall_score
        print(accuracy_score(y_train, y_train_pred))
        print(precision_score(y_train, y_train_pred))
        print(recall_score(y_train, y_train_pred))
        # get the prediction on the test data
        y_test_pred = clf.predict(x_test)  # outlier labels (0 or 1)
        y_test_scores = clf.decision_function(x_test)  # outlier scores

        # evaluate and print the results
        print("\nOn Training Data:")
        evaluate_print(clf_name, y_train, y_train_scores)
        print(accuracy_score(y_label, y_test_pred))
        print(precision_score(y_train, y_train_pred))
        print(recall_score(y_train, y_train_pred))
        print("\nOn Test Data:")
        evaluate_print(clf_name, y_label, y_test_scores)
Beispiel #6
0
    def get_IF_scores(dataframe,
                      cols,
                      outliers_fraction=0.01,
                      standardize=True):
        '''Takes df, a list selected column nmaes, outliers_fraction = 0.01 default
        
        Returns:
            df with Isolation Forest (IF) scores added         
        '''
        if standardize:
            #standardize selected variables
            minmax = MinMaxScaler(feature_range=(0, 1))
            dataframe[cols] = minmax.fit_transform(dataframe[cols])

        #Convert dataframe to a numpy array in order to incorprate our algorithm
        arrays = []
        for row in cols:
            row = dataframe[row].values.reshape(-1, 1)
            arrays.append(row)
        X = np.concatenate((arrays), axis=1)

        #fit
        clf = IForest(contamination=outliers_fraction, random_state=0)
        clf.fit(X)

        # predict raw anomaly score
        scores_pred = clf.decision_function(X) * -1

        # prediction of a datapoint category outlier or inlier
        y_pred = clf.predict(X)
        n_inliers = len(y_pred) - np.count_nonzero(y_pred)
        n_outliers = np.count_nonzero(y_pred == 1)

        CheckOutliers.df3 = dataframe
        CheckOutliers.df3['outlier'] = y_pred.tolist()

        print('OUTLIERS:', n_outliers, 'INLIERS:', n_inliers,
              'found with HBOS')
Beispiel #7
0
    X_train, y_train, X_test, y_test = generate_data(
        n_train=n_train, n_test=n_test, contamination=contamination)

    # train IForest detector
    clf_name = 'IForest'
    clf = IForest()
    clf.fit(X_train)

    # get the prediction label and decision_scores_ on the training data
    y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)
    y_train_scores = clf.decision_scores_  # raw outlier scores

    # get the prediction on the test data
    y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)
    y_test_scores = clf.decision_function(X_test)  # outlier scores

    # evaluate and print the results
    print("\nOn Training Data:")
    evaluate_print(clf_name, y_train, y_train_scores)
    print("\nOn Test Data:")
    evaluate_print(clf_name, y_test, y_test_scores)

    # visualize the results
    visualize(clf_name,
              X_train,
              y_train,
              X_test,
              y_test,
              y_train_pred,
              y_test_pred,
Beispiel #8
0
    #划分测试集和训练集
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.33)

    #使用pyod中的IForest算法拟合数据
    clf_name = 'IForest'
    clf = IForest()
    clf.fit(X_train)

    #预测得到由0和1组成的数组,1表示离群点,0表示飞离群点
    y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)
    y_train_scores = clf.decision_scores_  # raw outlier scores,The outlier scores of the training data.

    #预测样本是不是离群点,返回0和1 的数组
    y_test_pred = clf.predict(X_test)

    y_test_scores = clf.decision_function(
        X_test)  # outlier scores,The anomaly score of the input samples.
    #使用sklearn中的roc_auc_score方法得到auc值,即roc曲线下面的面积
    try:
        sumAuc_train += sklearn.metrics.roc_auc_score(y_train,
                                                      y_train_scores,
                                                      average='macro')
        sumAuc_test += sklearn.metrics.roc_auc_score(y_test,
                                                     y_test_scores,
                                                     average='macro')
        #s=precision_score(y_train, y_train_scores, average='macro')
        i += 1
        print(sumAuc_train, sumAuc_test)
    except ValueError:
        pass

    #得到ROC值和精确度 prn
Beispiel #9
0
class TestIForest(unittest.TestCase):
    def setUp(self):
        self.n_train = 200
        self.n_test = 100
        self.contamination = 0.1
        self.roc_floor = 0.8
        self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
            n_train=self.n_train, n_test=self.n_test,
            contamination=self.contamination, random_state=42)

        self.clf = IForest(contamination=self.contamination, random_state=42)
        self.clf.fit(self.X_train)

    def test_parameters(self):
        assert (hasattr(self.clf, 'decision_scores_') and
                self.clf.decision_scores_ is not None)
        assert (hasattr(self.clf, 'labels_') and
                self.clf.labels_ is not None)
        assert (hasattr(self.clf, 'threshold_') and
                self.clf.threshold_ is not None)
        assert (hasattr(self.clf, '_mu') and
                self.clf._mu is not None)
        assert (hasattr(self.clf, '_sigma') and
                self.clf._sigma is not None)
        assert (hasattr(self.clf, 'estimators_') and
                self.clf.estimators_ is not None)
        assert (hasattr(self.clf, 'estimators_samples_') and
                self.clf.estimators_samples_ is not None)
        assert (hasattr(self.clf, 'max_samples_') and
                self.clf.max_samples_ is not None)

    def test_train_scores(self):
        assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])

    def test_prediction_scores(self):
        pred_scores = self.clf.decision_function(self.X_test)

        # check score shapes
        assert_equal(pred_scores.shape[0], self.X_test.shape[0])

        # check performance
        assert (roc_auc_score(self.y_test, pred_scores) >= self.roc_floor)

    def test_prediction_labels(self):
        pred_labels = self.clf.predict(self.X_test)
        assert_equal(pred_labels.shape, self.y_test.shape)

    def test_prediction_proba(self):
        pred_proba = self.clf.predict_proba(self.X_test)
        assert (pred_proba.min() >= 0)
        assert (pred_proba.max() <= 1)

    def test_prediction_proba_linear(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='linear')
        assert (pred_proba.min() >= 0)
        assert (pred_proba.max() <= 1)

    def test_prediction_proba_unify(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='unify')
        assert (pred_proba.min() >= 0)
        assert (pred_proba.max() <= 1)

    def test_prediction_proba_parameter(self):
        with assert_raises(ValueError):
            self.clf.predict_proba(self.X_test, method='something')

    def test_fit_predict(self):
        pred_labels = self.clf.fit_predict(self.X_train)
        assert_equal(pred_labels.shape, self.y_train.shape)

    def test_fit_predict_score(self):
        self.clf.fit_predict_score(self.X_test, self.y_test)
        self.clf.fit_predict_score(self.X_test, self.y_test,
                                   scoring='roc_auc_score')
        self.clf.fit_predict_score(self.X_test, self.y_test,
                                   scoring='prc_n_score')
        with assert_raises(NotImplementedError):
            self.clf.fit_predict_score(self.X_test, self.y_test,
                                       scoring='something')

    def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks)

    def test_predict_rank_normalized(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
        assert_array_less(pred_ranks, 1.01)
        assert_array_less(-0.1, pred_ranks)

    def test_model_clone(self):
        clone_clf = clone(self.clf)

    def tearDown(self):
        pass
Beispiel #10
0
#Training
clf1 = IForest(random_state = 42) # Default contamination = 0.1
clf1.fit(X_train1)

#Setting threshold using the contamination parameter
dec_scores = clf1.decision_scores_
dec_scores_sorted=sorted(dec_scores, reverse=True)
a = round(len(X_train1) * clf1.contamination)
print(a)

anomalies=dec_scores_sorted[:a]
threshold = anomalies[-1]

# Validation data is scored
y_valid_scores = clf1.decision_function(X_valid1)
y_valid_scores = pd.Series(y_valid_scores)

valid_SrcIP = np.load('preprocessing1_valid_srcIP.npy',allow_pickle=True)

# For each score, if it is above threshold value, it is considered outlier, else inlier
valid_outliers = []
y_pred_valid = []
for score in range(0,len(y_valid_scores)):
  if y_valid_scores[score] > threshold:
    reg = (valid_SrcIP[score], y_valid_scores[score])
    valid_outliers.append(reg)
    y_pred_valid.append(1.0)
  else:
    y_pred_valid.append(0.0)
Beispiel #11
0
    # Generate sample data
    X_train, y_train, X_test, y_test = \
        generate_data(n_train=n_train,
                      n_test=n_test,
                      n_features=2,
                      contamination=contamination,
                      random_state=42)

    # train IForest detector
    clf_name = 'IForest'
    clf = IForest()
    clf.fit(X_train)

    # get the prediction labels and outlier scores of the training data
    y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)
    y_train_scores = clf.decision_scores_  # raw outlier scores

    # get the prediction on the test data
    y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)
    y_test_scores = clf.decision_function(X_test)  # outlier scores

    # evaluate and print the results
    print("\nOn Training Data:")
    evaluate_print(clf_name, y_train, y_train_scores)
    print("\nOn Test Data:")
    evaluate_print(clf_name, y_test, y_test_scores)

    # visualize the results
    visualize(clf_name, X_train, y_train, X_test, y_test, y_train_pred,
              y_test_pred, show_figure=True, save_figure=False)
Beispiel #12
0
class IF(IForest):
    def __init__(self,
                 n_estimators=100,
                 max_samples='auto',
                 contamination=0.1,
                 random_state=42,
                 verbose=1):
        """Isolation Forest (IF)

        Parameters
        ----------
        n_estimators : int, optional (default=100)
            The number of base estimators in the ensemble.

        max_samples : int or float, optional (default="auto")
            The number of samples to draw from X to train each base estimator.

        contamination : float in (0., 0.5), optional (default=0.1)
            The amount of contamination of the data set, i.e., the proportion of outliers in the data set.
            Used when fitting to define the threshold on the decision function.

        verbose: int (default is 1)
            A print level is to control what information should be printed according to the given value.
            The higher the value is, the more info is printed.

        random_state: int (default is 42)


        """
        self.n_estimators = n_estimators
        self.max_samples = max_samples
        self.contamination = contamination
        self.verbose = verbose
        self.random_state = random_state

    def fit(self, X_train, y_train=None):
        """Fit the model. y is ignored in unsupervised methods.

       Parameters
       ----------
       X_train : numpy array of shape (n_samples, n_features)
           The input samples.

       y_train : Ignored
           Not used, present for API consistency by convention.

       Returns
       -------
       self : object
           The fitted estimator.
       """
        self.model_ = IForest(
            n_estimators=self.n_estimators,
            max_samples=self.max_samples,
            contamination=self.contamination,
            max_features=1.,
            bootstrap=False,
            n_jobs=-1,
            behaviour='deprecated',  # no use any more in sklean 0.24.
            random_state=self.random_state,
            verbose=self.verbose)

        self.model_.fit(X=X_train)

        return self

    def decision_function(self, X):
        """Predict raw anomaly score of X using the fitted detector.

        The anomaly score of an input sample is computed based on different
        detector algorithms. For consistency, outliers are assigned with
        larger anomaly scores.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only
            if they are supported by the base estimator.

        Returns
        -------
        anomaly_scores : numpy array of shape (n_samples,)
            The anomaly score of the input samples.
        """
        return self.model_.decision_function(X)

    def predict_proba(self, X):
        raise NotImplementedError
Beispiel #13
0
def train_model(request):
    global clf
    if request.method == 'POST':
        try:
            json_data = json.loads(request.body)
            print(json_data)
            file = json_data['file']
            data = pd.read_csv(file)
            data = data.fillna(0)
            s = data["Birth year"]
            s[s != 0]
            data["Birth year"] = s[s != 0].str.replace("/", "").astype(int)
            data = data.fillna(0)

            data['Birth year'].apply(type)
            data['Uid'] = data['Uid'].astype(str).str.replace(' ',
                                                              '').astype(float)

            s = data['Uid']

            X1 = data['Birth year'].values.reshape(-1, 1)
            X2 = data['Uid'].values.reshape(-1, 1)

            X = np.concatenate((X1, X2), axis=1)
            outliers_fraction = 0.01
            outliers_fraction = 0.01
            xx, yy = np.meshgrid(np.linspace(0, 1, 100),
                                 np.linspace(0, 1, 100))
            clf = IForest(contamination=outliers_fraction, random_state=0)
            clf.fit(X)
            # predict raw anomaly score
            scores_pred = clf.decision_function(X) * -1

            # prediction of a datapoint category outlier or inlier
            y_pred = clf.predict(X)
            n_inliers = len(y_pred) - np.count_nonzero(y_pred)
            n_outliers = np.count_nonzero(y_pred == 1)
            plt.figure(figsize=(8, 8))
            # copy ofa dataframe
            data1 = data
            data['outlier'] = y_pred.tolist()

            # sales - inlier feature 1,  profit - inlier feature 2
            inliers_Uid = np.array(data['Uid'][data['outlier'] == 0]).reshape(
                -1, 1)
            inliers_Birth_year = np.array(
                data['Birth year'][data['outlier'] == 0]).reshape(-1, 1)

            # sales - outlier feature 1, profit - outlier feature 2
            outliers_Uid = data1['Uid'][data1['outlier'] == 1].values.reshape(
                -1, 1)
            outliers_Birth_year = data1['Birth year'][data1['outlier'] ==
                                                      1].values.reshape(-1, 1)

            print('OUTLIERS: ', n_outliers, 'INLIERS: ', n_inliers)

            output = {'OUTLIERS ': n_outliers, 'INLIERS ': n_inliers}

            return JsonResponse(output)
        except Exception:
            return JsonResponse(Exception, safe=False)
Beispiel #14
0
class TestIForest(unittest.TestCase):
    def setUp(self):
        self.n_train = 100
        self.n_test = 50
        self.contamination = 0.1
        self.roc_floor = 0.6
        self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
            n_train=self.n_train, n_test=self.n_test,
            contamination=self.contamination, random_state=42)

        self.clf = IForest(contamination=self.contamination, random_state=42)
        self.clf.fit(self.X_train)

    def test_sklearn_estimator(self):
        check_estimator(self.clf)

    def test_parameters(self):
        assert_true(hasattr(self.clf, 'decision_scores_') and
                    self.clf.decision_scores_ is not None)
        assert_true(hasattr(self.clf, 'labels_') and
                    self.clf.labels_ is not None)
        assert_true(hasattr(self.clf, 'threshold_') and
                    self.clf.threshold_ is not None)
        assert_true(hasattr(self.clf, '_mu') and
                    self.clf._mu is not None)
        assert_true(hasattr(self.clf, '_sigma') and
                    self.clf._sigma is not None)
        assert_true(hasattr(self.clf, 'estimators_') and
                    self.clf.estimators_ is not None)
        assert_true(hasattr(self.clf, 'estimators_samples_') and
                    self.clf.estimators_samples_ is not None)
        assert_true(hasattr(self.clf, 'max_samples_') and
                    self.clf.max_samples_ is not None)

    def test_train_scores(self):
        assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])

    def test_prediction_scores(self):
        pred_scores = self.clf.decision_function(self.X_test)

        # check score shapes
        assert_equal(pred_scores.shape[0], self.X_test.shape[0])

        # check performance
        assert_greater(roc_auc_score(self.y_test, pred_scores), self.roc_floor)

    def test_prediction_labels(self):
        pred_labels = self.clf.predict(self.X_test)
        assert_equal(pred_labels.shape, self.y_test.shape)

    def test_prediction_proba(self):
        pred_proba = self.clf.predict_proba(self.X_test)
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_linear(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='linear')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_unify(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='unify')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_parameter(self):
        with assert_raises(ValueError):
            self.clf.predict_proba(self.X_test, method='something')

    def test_fit_predict(self):
        pred_labels = self.clf.fit_predict(self.X_train)
        assert_equal(pred_labels.shape, self.y_train.shape)

    def test_fit_predict_score(self):
        self.clf.fit_predict_score(self.X_test, self.y_test)
        self.clf.fit_predict_score(self.X_test, self.y_test,
                                   scoring='roc_auc_score')
        self.clf.fit_predict_score(self.X_test, self.y_test,
                                   scoring='prc_n_score')
        with assert_raises(NotImplementedError):
            self.clf.fit_predict_score(self.X_test, self.y_test,
                                       scoring='something')

    def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks)

    def test_predict_rank_normalized(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
        assert_array_less(pred_ranks, 1.01)
        assert_array_less(-0.1, pred_ranks)

    def tearDown(self):
        pass
Beispiel #15
0
class TestIForest(unittest.TestCase):
    def setUp(self):
        self.n_train = 100
        self.n_test = 50
        self.contamination = 0.1
        self.roc_floor = 0.6
        self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
            n_train=self.n_train,
            n_test=self.n_test,
            contamination=self.contamination)

        self.clf = IForest(contamination=self.contamination)
        self.clf.fit(self.X_train)

    def test_sklearn_estimator(self):
        check_estimator(self.clf)

    def test_parameters(self):
        if not hasattr(
                self.clf,
                'decision_scores_') or self.clf.decision_scores_ is None:
            self.assertRaises(AttributeError, 'decision_scores_ is not set')
        if not hasattr(self.clf, 'labels_') or self.clf.labels_ is None:
            self.assertRaises(AttributeError, 'labels_ is not set')
        if not hasattr(self.clf, 'threshold_') or self.clf.threshold_ is None:
            self.assertRaises(AttributeError, 'threshold_ is not set')
        if not hasattr(self.clf,
                       'estimators_') or self.clf.estimators_ is None:
            self.assertRaises(AttributeError, 'estimators_ is not set')
        if not hasattr(
                self.clf,
                'estimators_samples_') or self.clf.estimators_samples_ is None:
            self.assertRaises(AttributeError, 'estimators_samples_ is not set')
        if not hasattr(self.clf,
                       'max_samples_') or self.clf.max_samples_ is None:
            self.assertRaises(AttributeError, 'max_samples_ is not set')

    def test_train_scores(self):
        assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])

    def test_prediction_scores(self):
        pred_scores = self.clf.decision_function(self.X_test)

        # check score shapes
        assert_equal(pred_scores.shape[0], self.X_test.shape[0])

        # check performance
        assert_greater(roc_auc_score(self.y_test, pred_scores), self.roc_floor)

    def test_prediction_labels(self):
        pred_labels = self.clf.predict(self.X_test)
        assert_equal(pred_labels.shape, self.y_test.shape)

    def test_prediction_proba(self):
        pred_proba = self.clf.predict_proba(self.X_test)
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_linear(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='linear')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_unify(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='unify')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_parameter(self):
        with assert_raises(ValueError):
            self.clf.predict_proba(self.X_test, method='something')

    def test_fit_predict(self):
        pred_labels = self.clf.fit_predict(self.X_train)
        assert_equal(pred_labels.shape, self.y_train.shape)

    def test_evaluate(self):
        self.clf.fit_predict_evaluate(self.X_test, self.y_test)

    def tearDown(self):
        pass
Beispiel #16
0
    data_dict = load_dataset(
        dataset,
        subdataset,
        "all",
    )

    x_train = data_dict["train"]
    x_test = data_dict["test"]
    x_test_labels = data_dict["test_labels"]

    start = time.time()

    od = IForest(n_estimators=n_estimators)

    od.fit(x_train)

    anomaly_score = od.decision_function(x_test)

    anomaly_label = x_test_labels

    end = time.time()

    time = end - start

    # Make evaluation
    evaluate_all(anomaly_score, anomaly_label)
    salience = compute_salience(anomaly_score, anomaly_label)
    print('time')
    print('   ', time)
    print('salience')
    print('   ', salience)