def test_poisson(self): model = PoissonRegressor() y = Poisson().fitted(self.eta) model.fit(self.X, y) y_hat = model.predict(self.X) diff = y_hat - y rsq = 1 - np.mean(diff**2) / np.mean((y-np.mean(y))**2) assert_true(rsq > .9)
def test_with_pipeline(self): model = Pipeline([('PCA', PCA()), ('Poisson', PoissonRegressor())]) y = Poisson().fitted(self.eta) model.fit(self.X, y) y_hat = model.predict(self.X) diff = y_hat - y rsq = 1 - np.mean(diff**2) / np.mean((y - np.mean(y))**2) assert_greater(rsq, .9)
def test_poisson_exposure(self): model = PoissonRegressor() exposure = np.random.exponential(scale=10, size=100) y = Poisson().fitted(self.eta + np.log(exposure)) model.fit(self.X, y, exposure=exposure) y_hat = model.predict(self.X, exposure=exposure) diff = y_hat - y rsq = 1 - np.mean(diff**2) / np.mean((y-np.mean(y))**2) assert_true(rsq > .9)
def fit_poisson(X, Y): """ Fits the Poisson regression model with the training data :param X: the feature matrix :param Y: the label matrix :return: the fitted Poisson model (instance of statsmodels.genmod.generalized_linear_model.GLMResults) """ t = sum(Y, axis=1) pr = GLM(t, X, family=Poisson()) return pr.fit()
def test_with_pipeline(self): model = Pipeline([('PCA',PCA()), ('Poisson',PoissonRegressor())]) y = Poisson().fitted(self.eta) model.fit(self.X, y) y_hat = model.predict(self.X) diff = y_hat - y rsq = 1 - np.mean(diff**2) / np.mean((y-np.mean(y))**2) assert_true(rsq > .9) assert_equal(str(model), '''Pipeline(PCA=PCA(copy=True, n_components=None, whiten=False), PCA__copy=True, PCA__n_components=None, PCA__whiten=False, Poisson=PoissonRegressor())''')
def setUpClass(cls): cls.forwarding_family = PoissonWrapper() cls.family = Poisson()
def __init__(self, link=L.log): # For now the statsmodels 0.8.0 still takes a link as an argument # will follow the changes in statsmodels whenever it happens self.family = Poisson(link=link)
def fit(self, X, y): """Fit the model Parameters ---------- X : numpy array The feature (or design) matrix. y : numpy array The response variable. Returns ------- self Updates internal attributes, such as `coef_` and `intercept_`. """ t0 = dt() X = check_array(X, force_all_finite='allow-nan', estimator=self, copy=True) if not self.has_constant: X = add_constant(X, prepend=True) if self.start is not None: pass else: if self.verbose: print_time("Initializing Coefficients...", t0, dt(), backsn=True) if self.initialize_weights == 'sklearn': C = self.C if C is None: C = 1 # TODO: update with Weibull Regression starting values ### If using sklearn version 0.23.2, can use this line instead #mod = glm_pois(alpha=1/C, fit_intercept=False, max_iter=1000).fit(X, y) #self.start = mod.coef_.reshape(-1, ) ### else, use statsmodels mod = glm_pois(y, X, family=Poisson()).fit() self.start = mod.params.reshape(-1, ) if self.extra_params > 0: self.start = np.concatenate( (self.start, np.repeat(1, self.extra_params))) elif self.initialize_weights == 'ones': self.start = np.ones(shape=X.shape[1] + self.extra_params) elif self.initialize_weights == 'random': self.start = np.random.normal(X.shape[1] + self.extra_params) else: self.start = np.zeros(shape=X.shape[1] + self.extra_params) if self.verbose: print_time("Beginning MCMC...", t0, dt(), backsn=True) postArgs = { 'X': X, 'Y': y, 'l_scale': self.C if self.C is None else 2 * self.C } algo_res = applyMCMC(st=self.start, ni=self.niter, lp=self.lpost, algo=self.algo, postArgs=postArgs, algoOpts=self.algo_options, sd=self.retry_sd, max_tries=self.retry_max_tries) self.mcmc_params = algo_res['parameters'] self.prev_vals = algo_res['prev_vals'] self.coef_, self.intercept_, self.extra_params_sum_ = self._create_coefs( self.mcmc_params, self.param_summary, self.extra_params) self.n_iter_ = self.niter #get model summaries weights = _check_sample_weight(None, X) y_pred = self.predict(X[:, 1:]) y_mean = np.average(y, weights=weights) dev = np.sum(weights * (2 * (xlogy(y, y / y_pred) - y + y_pred))) dev_null = np.sum(weights * (2 * (xlogy(y, y / y_mean) - y + y_mean))) self.deviance_ = dev self.null_deviance_ = dev_null self.pearson_residuals_ = (y - y_pred) / np.sqrt(y_pred) self.pearson_chi2_ = np.sum(self.pearson_residuals_**2) self.model_d2_ = 1 - dev / dev_null self.df_model_ = X.shape[1] - 1 self.df_residuals_ = X.shape[0] - X.shape[1] self.dispersion_scale_ = self.pearson_chi2_ / self.df_residuals_ self.dispersion_scale_sqrt_ = np.sqrt(self.dispersion_scale_) return self
def fit(self, X, y): """Fit the model Parameters ---------- X : numpy array The feature (or design) matrix. y : numpy array The response variable. Returns ------- self Updates internal attributes, such as `coef_` and `intercept_`. """ t0 = dt() X = check_array(X, force_all_finite='allow-nan', estimator=self, copy=True) if not self.has_constant: X = add_constant(X, prepend=True) if self.start is not None: pass else: if self.verbose: print_time("Initializing Coefficients...", t0, dt(), backsn=True) if self.initialize_weights == 'sklearn': C = self.C if C is None: C = 1 try: from sklearn.linear_model import PoissonRegressor as glm_pois_sk mod = glm_pois_sk(alpha=1 / C, fit_intercept=False, max_iter=1000).fit(X, y) self.start = mod.coef_.reshape(-1, ) except ImportError: print( 'Older sklearn, no PoissonRegressor. Using statsmodels instead' ) mod = glm_pois(y, X, family=Poisson()).fit() self.start = mod.params.reshape(-1, ) if self.extra_params > 0: self.start = np.concatenate( (self.start, np.repeat(1, self.extra_params))) elif self.initialize_weights == 'ones': self.start = np.ones(shape=X.shape[1] + self.extra_params) elif self.initialize_weights == 'random': self.start = np.random.normal(size=(X.shape[1] + self.extra_params, )) else: self.start = np.zeros(shape=X.shape[1] + self.extra_params) if self.verbose: print_time("Beginning MCMC...", t0, dt(), backsn=True) postArgs = { 'X': X, 'Y': y, 'l_scale': self.C if self.C is None else 2 * self.C } algo_res = applyMCMC(st=self.start, ni=self.niter, lp=self.lpost, algo=self.algo, postArgs=postArgs, algoOpts=self.algo_options, sd=self.retry_sd, max_tries=self.retry_max_tries) self.mcmc_params = algo_res['parameters'] self.prev_vals = algo_res['prev_vals'] self.coef_, self.intercept_, self.extra_params_sum_ = self._create_coefs( self.mcmc_params, self.param_summary, self.extra_params) self.n_iter_ = self.niter # get model summaries if self.over_dispersion: self.dispersion_delta_ = self.extra_params_sum_[-1] self.dispersion_estimation_ = 1 / (1 - self.dispersion_delta_)**2 else: self.dispersion_delta_ = 0 self.dispersion_estimation_ = None ddu = self._deviance_dispersion_update(X[:, 1:], y, sample_weight=None) self.deviance_ = ddu['deviance_'] self.null_deviance_ = ddu['null_deviance_'] self.pearson_residuals_ = ddu['pearson_residuals_'] self.pearson_chi2_ = ddu['pearson_chi2_'] self.model_d2_ = ddu['model_d2_'] self.df_model_ = ddu['df_model_'] self.df_residuals_ = ddu['df_residuals_'] self.df_total_ = ddu['df_total_'] self.dispersion_scale_ = ddu['dispersion_scale_'] self.dispersion_scale_sqrt_ = ddu['dispersion_scale_sqrt_'] return self