Ejemplo n.º 1
0
    def test_cls(self):

        np.random.seed(1)
        n_samples = 10000
        test_size = 0.2
        n_est = 100
        max_depth = 7
        lr = 0.1

        X, y = make_hastie_11_2(n_samples)
        X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                test_size=test_size)

        model_palo = PaloBoost(distribution="bernoulli",
                                n_estimators=n_est, 
                                learning_rate=lr,
                                max_depth=max_depth)
        model_sklr = GradientBoostingClassifier(
                                n_estimators=n_est, 
                                learning_rate=lr,
                                max_depth=max_depth)


        model_palo.fit(X_train, y_train)
        y_hat = model_palo.predict_proba(X_test)[:,1]
        auc_palo = roc_auc_score(y_test, y_hat)

        model_sklr.fit(X_train, y_train)
        y_hat = model_sklr.predict_proba(X_test)[:,1]
        auc_sklr = roc_auc_score(y_test, y_hat)

        self.assertTrue(auc_palo > auc_sklr)
Ejemplo n.º 2
0
    def fit(self, X, y):
        np.random.seed(self.random_state)
        n, m = X.shape
        idx = np.arange(n)
        self.estimators = []

        if (self.distribution == "bernoulli"
                and (np.sum(y) < 3 or np.sum(y) > n - 3)):
            logging.error(("the target (y) needs to have "
                           "at least one examples on each class"))
            return None

        i = 0
        while i < self.n_paloboost:
            mask = np.full(n, True)
            if self.block_size is not None:
                n_block = int(n / self.block_size) + 1
                mask_block = (np.random.rand(n_block) < self.subsample0)
                mask = np.repeat(mask_block, self.block_size)[:n]
            else:
                mask = (np.random.rand(n) < self.subsample0)

            X_i, y_i = X[mask, :], y[mask]
            X_j, y_j = X[~mask, :], y[~mask]

            if (self.distribution == "bernoulli"
                    and (np.unique(y_i).shape[0] == 1
                         or np.unique(y_j).shape[0] == 1)):
                continue

            est = PaloBoost(distribution=self.distribution,
                            learning_rate=self.learning_rate,
                            max_depth=self.max_depth,
                            n_estimators=self.n_estimators,
                            subsample=self.subsample1,
                            subsample_splts=self.subsample2,
                            random_state=i * self.n_estimators)
            est.fit(X_i, y_i)
            self.estimators.append(est)
            if self.feature_importances_ is None:
                self.feature_importances_ = est.feature_importances_
            else:
                self.feature_importances_ += est.feature_importances_

            if (self.distribution == "bernoulli" and self.calibrate):
                z_j = est.predict_proba(X_j)[:, 1]
                clb = IsotonicRegression(y_min=0,
                                         y_max=1,
                                         out_of_bounds="clip")
                clb.fit(z_j, y_j)
                self.calibrators.append(clb)
            i += 1

        self.feature_importances_ /= self.n_paloboost