Exemple #1
0
    def fit(self, X):
        np.random.seed(self.random_state)
        self.n_sample = X.shape[0]

        x_arr, y_arr = [], []
        for i in np.arange(self.lower_bound, self.higher_bound):
            sample_size = 2**i
            sample = X[np.random.choice(self.n_sample,
                                        sample_size,
                                        replace=True)]
            clf = IsolationForest(random_state=self.random_state,
                                  max_samples=sample_size,
                                  contamination='auto').fit(
                                      sample, max_depth=100000000)
            depths = np.mean(clf._compute_actual_depth_leaf(sample)[0], axis=0)

            bins = np.arange(int(depths.min()), int(depths.max() + 2))
            y, x = np.histogram(depths, bins=bins)
            y, x = y + 1, x[:-1]
            break_point = np.argmax(y)

            x_arr.append([i])
            y_arr.append(x[break_point])

        self.reg = LinearRegression(fit_intercept=False).fit(x_arr, y_arr)
        self.clf = IsolationForest(random_state=self.random_state,
                                   max_samples=len(X),
                                   contamination='auto').fit(
                                       X, max_depth=self.max_depth)

        return self
 def __init__(self, n_estimators=100, max_samples=256):
     self.model = IsolationForest(n_estimators, max_samples)
     self.threshold = 0.6  ## Recommended threshold in IForest paper.
     self.trainedStatus = False