예제 #1
0
 def test_poisson(self):
     model = PoissonRegressor()
     y = Poisson().fitted(self.eta)
     model.fit(self.X, y)
     y_hat = model.predict(self.X)
     diff = y_hat - y
     rsq = 1 - np.mean(diff**2) / np.mean((y-np.mean(y))**2)
     assert_true(rsq > .9)
예제 #2
0
 def test_with_pipeline(self):
     model = Pipeline([('PCA', PCA()), ('Poisson', PoissonRegressor())])
     y = Poisson().fitted(self.eta)
     model.fit(self.X, y)
     y_hat = model.predict(self.X)
     diff = y_hat - y
     rsq = 1 - np.mean(diff**2) / np.mean((y - np.mean(y))**2)
     assert_greater(rsq, .9)
예제 #3
0
 def test_poisson_exposure(self):
     model = PoissonRegressor()
     exposure = np.random.exponential(scale=10, size=100)
     y = Poisson().fitted(self.eta + np.log(exposure))
     model.fit(self.X, y, exposure=exposure)
     y_hat = model.predict(self.X, exposure=exposure)
     diff = y_hat - y
     rsq = 1 - np.mean(diff**2) / np.mean((y-np.mean(y))**2)
     assert_true(rsq > .9)
예제 #4
0
def fit_poisson(X, Y):
    """ Fits the Poisson regression model with the training data
        :param X: the feature matrix
        :param Y: the label matrix
        :return: the fitted Poisson model (instance of statsmodels.genmod.generalized_linear_model.GLMResults)
    """
    t = sum(Y, axis=1)
    pr = GLM(t, X, family=Poisson())
    return pr.fit()
예제 #5
0
 def test_with_pipeline(self):
     model = Pipeline([('PCA',PCA()), ('Poisson',PoissonRegressor())])
     y = Poisson().fitted(self.eta)
     model.fit(self.X, y)
     y_hat = model.predict(self.X)
     diff = y_hat - y
     rsq = 1 - np.mean(diff**2) / np.mean((y-np.mean(y))**2)
     assert_true(rsq > .9)
     assert_equal(str(model), '''Pipeline(PCA=PCA(copy=True, n_components=None, whiten=False), PCA__copy=True,
  PCA__n_components=None, PCA__whiten=False, Poisson=PoissonRegressor())''')
예제 #6
0
 def setUpClass(cls):
     cls.forwarding_family = PoissonWrapper()
     cls.family = Poisson()
예제 #7
0
 def __init__(self, link=L.log):
     # For now the statsmodels 0.8.0 still takes a link as an argument
     # will follow the changes in statsmodels whenever it happens
     self.family = Poisson(link=link)
예제 #8
0
    def fit(self, X, y):
        """Fit the model

        Parameters
        ----------
        X : numpy array
            The feature (or design) matrix.
        y : numpy array
            The response variable.

        Returns
        -------
        self
            Updates internal attributes, such as `coef_` and `intercept_`.

        """
        t0 = dt()
        X = check_array(X,
                        force_all_finite='allow-nan',
                        estimator=self,
                        copy=True)
        if not self.has_constant:
            X = add_constant(X, prepend=True)

        if self.start is not None:
            pass
        else:
            if self.verbose:
                print_time("Initializing Coefficients...",
                           t0,
                           dt(),
                           backsn=True)
            if self.initialize_weights == 'sklearn':
                C = self.C
                if C is None:
                    C = 1
                # TODO: update with Weibull Regression starting values
                ### If using sklearn version 0.23.2, can use this line instead
                #mod = glm_pois(alpha=1/C, fit_intercept=False, max_iter=1000).fit(X, y)
                #self.start = mod.coef_.reshape(-1, )
                ### else, use statsmodels
                mod = glm_pois(y, X, family=Poisson()).fit()
                self.start = mod.params.reshape(-1, )
                if self.extra_params > 0:
                    self.start = np.concatenate(
                        (self.start, np.repeat(1, self.extra_params)))
            elif self.initialize_weights == 'ones':
                self.start = np.ones(shape=X.shape[1] + self.extra_params)
            elif self.initialize_weights == 'random':
                self.start = np.random.normal(X.shape[1] + self.extra_params)
            else:
                self.start = np.zeros(shape=X.shape[1] + self.extra_params)

        if self.verbose:
            print_time("Beginning MCMC...", t0, dt(), backsn=True)

        postArgs = {
            'X': X,
            'Y': y,
            'l_scale': self.C if self.C is None else 2 * self.C
        }

        algo_res = applyMCMC(st=self.start,
                             ni=self.niter,
                             lp=self.lpost,
                             algo=self.algo,
                             postArgs=postArgs,
                             algoOpts=self.algo_options,
                             sd=self.retry_sd,
                             max_tries=self.retry_max_tries)

        self.mcmc_params = algo_res['parameters']
        self.prev_vals = algo_res['prev_vals']

        self.coef_, self.intercept_, self.extra_params_sum_ = self._create_coefs(
            self.mcmc_params, self.param_summary, self.extra_params)
        self.n_iter_ = self.niter

        #get model summaries
        weights = _check_sample_weight(None, X)
        y_pred = self.predict(X[:, 1:])
        y_mean = np.average(y, weights=weights)
        dev = np.sum(weights * (2 * (xlogy(y, y / y_pred) - y + y_pred)))
        dev_null = np.sum(weights * (2 * (xlogy(y, y / y_mean) - y + y_mean)))
        self.deviance_ = dev
        self.null_deviance_ = dev_null
        self.pearson_residuals_ = (y - y_pred) / np.sqrt(y_pred)
        self.pearson_chi2_ = np.sum(self.pearson_residuals_**2)
        self.model_d2_ = 1 - dev / dev_null
        self.df_model_ = X.shape[1] - 1
        self.df_residuals_ = X.shape[0] - X.shape[1]
        self.dispersion_scale_ = self.pearson_chi2_ / self.df_residuals_
        self.dispersion_scale_sqrt_ = np.sqrt(self.dispersion_scale_)

        return self
예제 #9
0
    def fit(self, X, y):
        """Fit the model

        Parameters
        ----------
        X : numpy array
            The feature (or design) matrix.
        y : numpy array
            The response variable.

        Returns
        -------
        self
            Updates internal attributes, such as `coef_` and `intercept_`.

        """
        t0 = dt()
        X = check_array(X,
                        force_all_finite='allow-nan',
                        estimator=self,
                        copy=True)
        if not self.has_constant:
            X = add_constant(X, prepend=True)

        if self.start is not None:
            pass
        else:
            if self.verbose:
                print_time("Initializing Coefficients...",
                           t0,
                           dt(),
                           backsn=True)
            if self.initialize_weights == 'sklearn':
                C = self.C
                if C is None:
                    C = 1
                try:
                    from sklearn.linear_model import PoissonRegressor as glm_pois_sk
                    mod = glm_pois_sk(alpha=1 / C,
                                      fit_intercept=False,
                                      max_iter=1000).fit(X, y)
                    self.start = mod.coef_.reshape(-1, )
                except ImportError:
                    print(
                        'Older sklearn, no PoissonRegressor. Using statsmodels instead'
                    )
                    mod = glm_pois(y, X, family=Poisson()).fit()
                    self.start = mod.params.reshape(-1, )
                if self.extra_params > 0:
                    self.start = np.concatenate(
                        (self.start, np.repeat(1, self.extra_params)))
            elif self.initialize_weights == 'ones':
                self.start = np.ones(shape=X.shape[1] + self.extra_params)
            elif self.initialize_weights == 'random':
                self.start = np.random.normal(size=(X.shape[1] +
                                                    self.extra_params, ))
            else:
                self.start = np.zeros(shape=X.shape[1] + self.extra_params)

        if self.verbose:
            print_time("Beginning MCMC...", t0, dt(), backsn=True)

        postArgs = {
            'X': X,
            'Y': y,
            'l_scale': self.C if self.C is None else 2 * self.C
        }

        algo_res = applyMCMC(st=self.start,
                             ni=self.niter,
                             lp=self.lpost,
                             algo=self.algo,
                             postArgs=postArgs,
                             algoOpts=self.algo_options,
                             sd=self.retry_sd,
                             max_tries=self.retry_max_tries)

        self.mcmc_params = algo_res['parameters']
        self.prev_vals = algo_res['prev_vals']

        self.coef_, self.intercept_, self.extra_params_sum_ = self._create_coefs(
            self.mcmc_params, self.param_summary, self.extra_params)
        self.n_iter_ = self.niter

        # get model summaries
        if self.over_dispersion:
            self.dispersion_delta_ = self.extra_params_sum_[-1]
            self.dispersion_estimation_ = 1 / (1 - self.dispersion_delta_)**2
        else:
            self.dispersion_delta_ = 0
            self.dispersion_estimation_ = None

        ddu = self._deviance_dispersion_update(X[:, 1:], y, sample_weight=None)
        self.deviance_ = ddu['deviance_']
        self.null_deviance_ = ddu['null_deviance_']
        self.pearson_residuals_ = ddu['pearson_residuals_']
        self.pearson_chi2_ = ddu['pearson_chi2_']
        self.model_d2_ = ddu['model_d2_']
        self.df_model_ = ddu['df_model_']
        self.df_residuals_ = ddu['df_residuals_']
        self.df_total_ = ddu['df_total_']
        self.dispersion_scale_ = ddu['dispersion_scale_']
        self.dispersion_scale_sqrt_ = ddu['dispersion_scale_sqrt_']

        return self