Beispiel #1
0
class RC30(ClassifierMixin, BaseEstimator):

    def __init__(self, 
                n_estimators=30, 
                max_depth=3,
                min_samples_split=2,
                min_samples_leaf=1, 
                ctype="isotonic"):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.min_samples_leaf = min_samples_leaf
        self.ctype = ctype

    def fit(self, X, y):
        X, y = check_X_y(X, y)
        self.model = RandomForestClassifier(n_estimators=self.n_estimators,
                                            max_depth=self.max_depth,
                                            min_samples_split=self.min_samples_split,
                                            min_samples_leaf=self.min_samples_leaf)
        if self.ctype == "logistic":
            self.calibrator = LogisticRegression(C=1e20, solver="lbfgs")
        elif self.ctype == "isotonic":
            self.calibrator = IsotonicRegression(y_min=0, y_max=1,
                                                out_of_bounds="clip")
        X0, X1, y0, y1 = train_test_split(X, y, test_size=0.3) 
        self.model.fit(X0, y0)
        if self.ctype == "logistic":
            y_est = self.model.predict_proba(X1)[:,[1]]
            self.calibrator.fit(y_est, y1)
        elif self.ctype == "isotonic":
            y_est = self.model.predict_proba(X1)[:,1]
            self.calibrator.fit(y_est, y1)

        self.is_fitted_ = True
        return self
 
    def predict_proba(self, X):
        X = check_array(X)
        check_is_fitted(self, 'is_fitted_')
        
        if self.ctype == "logistic":
            return self.calibrator.predict_proba(
                    self.model.predict_proba(X)[:,[1]])
        elif self.ctype == "isotonic":
            n, m = X.shape
            y = np.zeros((n,2))
            y[:,1] = self.calibrator.predict(
                        self.model.predict_proba(X)[:,1])
            y[:,0] = 1 - y[:,1]
            return y
Beispiel #2
0
class CaliForest(ClassifierMixin, BaseEstimator):
    def __init__(self,
                 n_estimators=300,
                 criterion="gini",
                 max_depth=5,
                 min_samples_split=2,
                 min_samples_leaf=1,
                 ctype="isotonic",
                 alpha0=100,
                 beta0=25):

        self.n_estimators = n_estimators
        self.criterion = criterion
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.min_samples_leaf = min_samples_leaf
        self.ctype = ctype
        self.alpha0 = alpha0
        self.beta0 = beta0

    def fit(self, X, y):
        X, y = check_X_y(X, y, accept_sparse=False)
        self.estimators = []
        self.calibrator = None
        for i in range(self.n_estimators):
            self.estimators.append(
                Tree(criterion=self.criterion,
                     max_depth=self.max_depth,
                     min_samples_split=self.min_samples_split,
                     min_samples_leaf=self.min_samples_leaf,
                     max_features="auto"))
        if self.ctype == "logistic":
            self.calibrator = LR(penalty="none", solver="saga", max_iter=5000)
        elif self.ctype == "isotonic":
            self.calibrator = Iso(y_min=0, y_max=1, out_of_bounds="clip")
        n, m = X.shape
        Y_oob = np.full((n, self.n_estimators), np.nan)
        n_oob = np.zeros(n)
        IB = np.zeros((n, self.n_estimators), dtype=int)
        OOB = np.full((n, self.n_estimators), True)

        for eid in range(self.n_estimators):
            IB[:, eid] = np.random.choice(n, n)
            OOB[IB[:, eid], eid] = False

        for eid, est in enumerate(self.estimators):
            ib_idx = IB[:, eid]
            oob_idx = OOB[:, eid]
            est.fit(X[ib_idx, :], y[ib_idx])
            Y_oob[oob_idx, eid] = est.predict_proba(X[oob_idx, :])[:, 1]
            n_oob[oob_idx] += 1

        oob_idx = n_oob > 1
        Y_oob_ = Y_oob[oob_idx, :]
        n_oob_ = n_oob[oob_idx]
        z_hat = np.nanmean(Y_oob_, axis=1)
        z_true = y[oob_idx]

        beta = self.beta0 + np.nanvar(Y_oob_, axis=1) * n_oob_ / 2
        alpha = self.alpha0 + n_oob_ / 2
        z_weight = alpha / beta

        if self.ctype == "logistic":
            self.calibrator.fit(z_hat[:, np.newaxis], z_true, z_weight)
        elif self.ctype == "isotonic":
            self.calibrator.fit(z_hat, z_true, z_weight)
        self.is_fitted_ = True
        return self

    def predict_proba(self, X):
        X = check_array(X)
        check_is_fitted(self, 'is_fitted_')

        n, m = X.shape
        n_est = len(self.estimators)
        z = np.zeros(n)
        y_mat = np.zeros((n, 2))
        for eid, est in enumerate(self.estimators):
            z += est.predict_proba(X)[:, 1]
        z /= n_est

        if self.ctype == "logistic":
            y_mat[:, 1] = self.calibrator.predict_proba(z[:, np.newaxis])[:, 1]
        elif self.ctype == "isotonic":
            y_mat[:, 1] = self.calibrator.predict(z)

        y_mat[:, 0] = 1 - y_mat[:, 1]
        return y_mat

    def predict(self, X):
        proba = self.predict_proba(X)
        return np.argmax(proba, axis=1)