def fit_poisson_simulation(arrivals_departures): y_arr, X_arr = patsy.dmatrices( "arrivals ~ C(months, Treatment) + C(hours, Treatment) + C(weekday_dummy, Treatment)", arrivals_departures, return_type='dataframe') y_dep, X_dep = patsy.dmatrices( "departures ~ C(months, Treatment) + C(hours, Treatment) + C(weekday_dummy, Treatment)", arrivals_departures, return_type='dataframe') y_dep[pd.isnull(y_dep)] = 0 # Fit poisson distributions for arrivals and departures, print results arr_poisson_model = sm.Poisson(y_arr, X_arr) arr_poisson_results = arr_poisson_model.fit(disp=0) dep_poisson_model = sm.Poisson(y_dep, X_dep) dep_poisson_results = dep_poisson_model.fit(disp=0) # print arr_poisson_results.summary(), dep_poisson_results.summary() poisson_results = [arr_poisson_results, dep_poisson_results] return poisson_results
def setupClass(cls): cls.kvars = 10 # Number of variables cls.m = 7 # Number of unregularized parameters rand_data = sm.datasets.randhie.load() rand_exog = rand_data.exog.view(float).reshape(len(rand_data.exog), -1) rand_exog = sm.add_constant(rand_exog, prepend=True) # Drop some columns and do an unregularized fit exog_no_PSI = rand_exog[:, :cls.m] cls.res_unreg = sm.Poisson( rand_data.endog, exog_no_PSI).fit(method="newton", disp=False) # Do a regularized fit with alpha, effectively dropping the last column alpha = 10 * len(rand_data.endog) * np.ones(cls.kvars) alpha[:cls.m] = 0 cls.res_reg = sm.Poisson(rand_data.endog, rand_exog).fit_regularized( method='l1', alpha=alpha, disp=False, acc=1e-10, maxiter=2000, trim_mode='auto')
def fit_poisson_using_goals(self, matches, team_name, scored): """fits and returns a poisson distribution using goals scored or allowed depending on 'scored' param. Uses the statsmodel library.""" elos = [] num_goals = [] for match in matches: if match.home_team == team_name: elos.append([ match.away_team_resulting_rating - match.away_team_rating_change, 1 ]) if scored: num_goals.append(match.home_team_score) else: num_goals.append(match.away_team_score) else: elos.append([ match.home_team_resulting_rating - match.home_team_rating_change, 1 # a0 term. ]) if scored: num_goals.append(match.away_team_score) else: num_goals.append(match.home_team_score) poisson = sm.Poisson(num_goals, elos) poisson_fitted = poisson.fit(method=self.OPTIMIZATION_METHOD) return poisson_fitted
def main(args): logger.info('==================================') logger.info('COUNT POISSON FIT') df_poisson_params = pd.DataFrame(columns=[ 'id', 'lambda', 'count_mean', 'count_var', 'count_len', 'loglikelihood' ]) for i, (subj, counts) in enumerate( utils.event_partition_generator(args, num_days=7)): df_poisson_params.loc[i] = [ np.nan for j in range(len(df_poisson_params.columns)) ] df_poisson_params['id'].values[i] = subj df_poisson_params['count_mean'].values[i] = np.mean(counts) df_poisson_params['count_var'].values[i] = np.var(counts) df_poisson_params['count_len'].values[i] = len(counts) try: res = sm.Poisson(counts, np.ones_like(counts)).fit(disp=0) lambda_param = res.params[0] loglikelihood = -res.llf except Exception: logger.info('Could not fit negative binomial on %s' % subj) (lambda_param, loglikelihood) = (np.nan, np.nan) df_poisson_params['lambda'].values[i] = lambda_param df_poisson_params['loglikelihood'].values[i] = loglikelihood df_poisson_params.to_csv(os.path.join(args.working_dir, 'params_poisson_count.csv'), index=False)
def tiny_poisson(l): poi_mod, poi_ppf_obs = [None for i in range(2)] poi_rmse = 0 xtr = np.array([item[1:] for item in l]) ytr = np.array([item[0] for item in l]).reshape(-1, 1) poi_res = [] try: if np.count_nonzero(ytr) > 0: poi_mod = sm.Poisson(ytr, xtr).fit(method="nm", maxiter=10000, disp=0, maxfun=10000) # method nm works without singular mat poi_mean_pred = poi_mod.predict(xtr) poi_ppf_obs = stats.poisson.ppf(q=0.99, mu=poi_mean_pred) poi_ppf_obs[poi_ppf_obs>150] = 150 poi_rmse_tr = np.sqrt(mean_squared_error(ytr, poi_ppf_obs)) poi_res = [poi_mod, poi_ppf_obs, poi_rmse_tr] else: poi_res = return_zeros(ytr, "AllZeros") except np.linalg.LinAlgError as e: if 'Singular matrix' in str(e): # print(" You should not have reached this point. ") # print(" Regularization should avoid the singular matrix. ") nzeros = len(ytr) - np.count_nonzero(ytr) prop = round((100 * nzeros) / len(ytr), 2) # print(" Proportion of zeros: ", prop) poi_prop_err_singmat.append(prop) nb_res = return_zeros(ytr, "Singular") except AssertionError as e: nb_res = return_zeros(ytr, "Assert") except ValueError as e: print("\t\t\tIgnored output containing np.nan or np.inf") pass return poi_res
def setup(self): #fit for each test, because results will be changed by test x = self.exog np.random.seed(987689) y_count = np.random.poisson(np.exp(x.sum(1) - x.mean())) model = sm.Poisson(y_count, x) self.results = model.fit_regularized(method='l1', disp=0, alpha=10)
def core(X, Y, Z=None): ''' X: X变量 Y: 预测值 Z 为基数, 可以为空或者为一个Series,长度与Y一致 ''' X = sm.add_constant(X, prepend=False) X = X.rename(columns={'const': '截距'}) if Z is None: Z = pd.Series([1 for i in range(len(Y))]) ### 除以基数 然后取对数 y = Y / Z y_log = np.log(y) # building the model poisson_mod = sm.Poisson(y_log, X) res = poisson_mod.fit(method="bfgs") y_pre = res.predict(X) Y_predict = np.exp(y_pre) * Z Y_predict.name = '预测值' df_predict_result = Y.to_frame(name='实际值').join(Y_predict) #model description tables = res.summary().tables df_list = [pd.read_html(StringIO(t.as_html()))[0] for t in tables] dfinfo1 = df_list[1].fillna('Variables').set_index(0) dfinfo1 = dfinfo1.T.set_index('Variables').T dfinfo1.index.name = '项' dfinfo1.columns.name = '参数类型' dfinfo1.columns = ['回归系数', '标准误差', 'Z值', 'p值', '95%CI(下限)', '95%CI(上限)'] dfinfo1['or值'] = np.exp(res.params) dfinfo1 = dfinfo1.round(3) R_Squared = r2_score(y_log, y_pre) tb2 = { 'BIC': res.bic, 'AIC': res.aic, 'df': res.df_model, 'p': res.llr_pvalue, '似然比卡方值': res.llr, 'R²': R_Squared, 'Pseud_R²': res.prsquared } dfinfo2 = pd.DataFrame([tb2]).round(3) dfinfo2 = dfinfo2.set_index('似然比卡方值') r = { '模型似然比检验和效果汇总': dfinfo2, 'Poisson回归分析结果汇总': dfinfo1, '实际值与预测值': df_predict_result } return r
def fit_poisson(snowflake_connection, cfg, station, include_rebalance=False, time_interval='1H'): # Use the correct delta data station_updates = stations.GetStationData(snowflake_connection, cfg, station["station_id"], station["latitude"], station["longitude"]) #print(station_updates.dtypes) arrivals_departures = rebalance_station_poisson_data( station_updates, station["station_id"], time_interval, include_rebalance=False) # Create design matrix for months, hours, and weekday vs. weekend. # We can't just create a "month" column to toss into our model, because it doesnt # understand what "June" is. Instead, we need to create a column for each month # and code each row according to what month it's in. Ditto for hours and weekday (=1). y_arr, X_arr = patsy.dmatrices( "arrivals ~ C(months, Treatment) + C(hours, Treatment) + C(weekday_dummy, Treatment)", arrivals_departures, return_type='dataframe') y_dep, X_dep = patsy.dmatrices( "departures ~ C(months, Treatment) + C(hours, Treatment) + C(weekday_dummy, Treatment)", arrivals_departures, return_type='dataframe') y_dep[pd.isnull(y_dep)] = 0 # Fit poisson distributions for arrivals and departures, print results arr_poisson_model = sm.Poisson(y_arr, X_arr) arr_poisson_results = arr_poisson_model.fit(disp=0) dep_poisson_model = sm.Poisson(y_dep, X_dep) dep_poisson_results = dep_poisson_model.fit(disp=0) # print arr_poisson_results.summary(), dep_poisson_results.summary() poisson_results = [arr_poisson_results, dep_poisson_results] return poisson_results
def setup(self): #fit for each test, because results will be changed by test x = self.exog np.random.seed(987689) y_count = np.random.poisson(np.exp(x.sum(1) - x.mean())) model = sm.Poisson(y_count, x) # use start_params to converge faster start_params = np.array([0.75334818, 0.99425553, 1.00494724, 1.00247112]) self.results = model.fit(start_params=start_params, method='bfgs', disp=0)
def test_poisson_newton(): #GH: 24, Newton doesn't work well sometimes nobs = 10000 np.random.seed(987689) x = np.random.randn(nobs, 3) x = sm.add_constant(x, prepend=True) y_count = np.random.poisson(np.exp(x.sum(1))) mod = sm.Poisson(y_count, x) res = mod.fit(start_params=-np.ones(4), method='newton', disp=0) assert_(not res.mle_retvals['converged'])
def train_glm_sm_unconstrained(xtrain, ytrain, tmodel): # initalize model if tmodel == "linear" or tmodel == "lin": model = sm.OLS( ytrain, xtrain) elif tmodel == "logistic" or tmodel == "log": model = sm.Logit( ytrain, xtrain) elif tmodel == "poisson" or tmodel == "poi": model = sm.Poisson( ytrain, xtrain) result = model.fit(disp=0) return model, result.params
def test_pd_offset_exposure(self): endog = pd.DataFrame({'F': [0.0, 0.0, 0.0, 0.0, 1.0]}) exog = pd.DataFrame({'I': [1.0, 1.0, 1.0, 1.0, 1.0], 'C': [0.0, 1.0, 0.0, 1.0, 0.0]}) exposure = pd.Series([1., 1, 1, 2, 1]) offset = pd.Series([1, 1, 1, 2, 1]) sm.Poisson(endog=endog, exog=exog, offset=offset).fit() inflations = ['logit', 'probit'] for inflation in inflations: sm.ZeroInflatedPoisson(endog=endog, exog=exog["I"], exposure=exposure, inflation=inflation).fit()
def test_poi_nb_zip_zinb_tiny_subset(meta, m): exog_names = r"rowid;latitude;longitude;target;dbuiltup;dforest;drecreation;dbrr;dwrl;dwrn;dwrr;dcamping;dcaravan;dcross;dgolf;dheem;dhaven;dsafari;dwater;attr;dbath;lu;lc;maxmeanhaz;maxstdhaz".split(";")[4:] np.random.seed(2) randint = np.random.randint(0, high=len(m)-1, size=800) msel = m[randint,:] Y = msel[:, 0] X = msel[:, 1:] # Ynz, Xnz = trim_value(Y, X, 0) print("Msel shape: ", msel.shape) xtrain, xtest, ytrain, ytest = train_test_split(X, Y, train_size=0.60, random_state=42) print(xtrain.shape, ytrain.shape, xtest.shape, ytest.shape) print print("Model: Poisson") poi_mod = sm.Poisson(ytrain, xtrain).fit(method="newton", maxiter=50) poi_mean_pred = poi_mod.predict(xtest) poi_ppf_obs = stats.poisson.ppf(q=0.95, mu=poi_mean_pred) poi_rmse = np.sqrt(mean_squared_error(ytest, poi_ppf_obs)) # print(np.unique(poi_ppf_obs, return_counts=True)) print("RMSE Poisson: ", poi_rmse) # print(poi_mod.summary(yname='tickbites', xname=exog_names)) print print("Model: Neg. Binomial") nb_mod = sm.NegativeBinomial(ytrain, xtrain).fit(start_params = None, method = 'newton', maxiter=50) nb_pred = nb_mod.predict(xtest) nb_rmse = np.sqrt(mean_squared_error(ytest, nb_pred)) # print(np.unique(nb_pred, return_counts=True)) print("RMSE Negative Binomial: ", nb_rmse) print print("Model: Zero Inflated Poisson") zip_mod = sm.ZeroInflatedPoisson(ytrain, xtrain).fit(method="newton", maxiter=50) zip_mean_pred = zip_mod.predict(xtest, exog_infl=np.ones((len(xtest), 1))) zip_ppf_obs = stats.poisson.ppf(q=0.95, mu=zip_mean_pred) zip_rmse = np.sqrt(mean_squared_error(ytest, zip_ppf_obs)) print("RMSE Zero-Inflated Poisson", zip_rmse) print print("Model: Zero Inflated Neg. Binomial") zinb_mod = sm.ZeroInflatedNegativeBinomialP(ytrain, xtrain).fit(method="newton", maxiter=50) zinb_pred = zinb_mod.predict(xtest, exog_infl=np.ones((len(xtest), 1))) zinb_rmse = np.sqrt(mean_squared_error(ytest, zinb_pred)) print("RMSE Zero-Inflated Negative Binomial: ", zinb_rmse)
def setup(self): #fit for each test, because results will be changed by test x = self.exog np.random.seed(987689) y_count = np.random.poisson(np.exp(x.sum(1) - x.mean())) model = sm.Poisson(y_count, x) #, exposure=np.ones(nobs), offset=np.zeros(nobs)) #bug with default # use start_params to converge faster start_params = np.array([0.75334818, 0.99425553, 1.00494724, 1.00247112]) self.results = model.fit(start_params=start_params, method='bfgs', disp=0) #TODO: temporary, fixed in master self.predict_kwds = dict(exposure=1, offset=0)
def regression(df, a, b, c, d, distribution): """[summary] Calculate VE and CI's according https://timeseriesreasoning.com/contents/estimation-of-vaccine-efficacy-using-logistic-regression/ * We'll use Patsy to carve out the X and y matrices * Build and train a Logit model (sm.Logit) Args: a ([type]): sick vax b ([type]): sick unvax c ([type]): total vax d ([type]): total unvax Returns: 0""" p_sick_unvax = b / d #Form the regression equation expr = 'INFECTED ~ VACCINATED' #We'll use Patsy to carve out the X and y matrices y_train, X_train = dmatrices(expr, df, return_type='dataframe') #Build and train a Logit model if distribution == "logit": model = sm.Logit(endog=y_train, exog=X_train, disp=False) elif distribution == "poisson": model = sm.Poisson(endog=y_train, exog=X_train, disp=False) elif distribution == "neg_bin": model = sm.NegativeBinomial(endog=y_train, exog=X_train, disp=False) results = model.fit(disp=False) params = results.params #Print the model summary #stl.write(logit_results.summary2()) VE = VE_(params[1], p_sick_unvax) # stl.write(f"\nConfidence intervals") # stl.write(logit_results.conf_int()) # confidence intervals conf = results.conf_int() high, low = conf[0][1], conf[1][1] prsquared = results.prsquared VE_low, VE_high = VE_(low, p_sick_unvax), VE_(high, p_sick_unvax) stl.write( f"VE Regression {distribution} : {VE} % [{VE_low} , {VE_high}] | pseudo-R2 = {prsquared}" )
def tiny_poisson(l): mean_pred, ppf_obs, poi_mod = [None for i in range(3)] xtr = np.array([item[1:] for item in l]) ytr = np.array([item[0] for item in l]).reshape(-1, 1) try: poi_mod = sm.Poisson(ytr, xtr).fit() mean_pred = poi_mod.predict(xtr) # or use a new x sf_obs = stats.poisson.sf(2 - 1, mean_pred) # average over x in sample pmf_obs = stats.poisson.pmf(2, mean_pred) ppf_obs = stats.poisson.ppf(q=0.95, mu=mean_pred) # average over x in sample except np.linalg.LinAlgError as e: if 'Singular matrix' in str(e): print("Ignored a singular matrix.") return [poi_mod, mean_pred, ppf_obs]
def poisson_reg(train_df, test_df): y = train_df.total_cases train_df.drop('total_cases', axis=1, inplace=True) train_df = add_constant(train_df) print(y.head(10)) print(train_df.head(10)) poisson_model = sm.Poisson(y, train_df).fit() preds = poisson_model.predict(train_df) diff = abs(preds - y) print(preds.head(10)) print(diff.head(10)) print(np.mean(diff))
def fit(dataframe, target, city, station): ''' Train the Poisson process to predict bikes or spaces. ''' features = [ column for column in dataframe.columns if column not in ['bikes', 'spaces'] ] # Create a GLM style formula (target ~ features) formula = '{0} ~ {1}'.format( target, ' + '.join( ['C({}), Treatment'.format(feature) for feature in features])) y, X = dmatrices(formula, dataframe, return_type='dataframe') model = sm.Poisson(y, X) parameters = model.fit(disp=0).params estimatedLambda = np.exp(np.sum(parameters)) return estimatedLambda
def test_poisson_predict(): #GH: 175, make sure poisson predict works without offset and exposure data = sm.datasets.randhie.load() exog = sm.add_constant(data.exog, prepend=True) res = sm.Poisson(data.endog, exog).fit(method='newton', disp=0) pred1 = res.predict() pred2 = res.predict(exog) assert_almost_equal(pred1, pred2) #exta options pred3 = res.predict(exog, offset=0, exposure=1) assert_almost_equal(pred1, pred3) pred3 = res.predict(exog, offset=0, exposure=2) assert_almost_equal(2 * pred1, pred3) pred3 = res.predict(exog, offset=np.log(2), exposure=1) assert_almost_equal(2 * pred1, pred3)
def SPPoisson(context): # 从 Context 中获取相关数据 args = context.args # 查看上一节点发送的 args.inputData 数据 df = args.inputData featureColumns = args.featureColumns labelColumn = args.labelColumn features = df[featureColumns].values label = df[labelColumn].values arma_mod = sm.Poisson(label, features, missing=args.missing) arma_res = arma_mod.fit(method=args.method) return arma_res
def tiny_poisson(l): print("\t\tRunning Poisson") poi_mod, poi_ppf_obs = [None for i in range(2)] poi_rmse = 0 xtr = np.array([item[1:] for item in l]) ytr = np.array([item[0] for item in l]).reshape(-1, 1) try: poi_mod = sm.Poisson(ytr, xtr).fit(method="newton", maxiter=50, disp=0) poi_mean_pred = poi_mod.predict(xtr) poi_ppf_obs = stats.poisson.ppf(q=0.95, mu=poi_mean_pred) # average over x in sample poi_rmse = np.sqrt(mean_squared_error(ytr, poi_ppf_obs)) except np.linalg.LinAlgError as e: if 'Singular matrix' in str(e): print("\t\t\tIgnored a singular matrix.") except ValueError: print("\t\t\tIgnored output containing np.nan or np.inf") return [poi_mod, poi_ppf_obs, poi_rmse]
def train_glm_sm(xtrain, ytrain, tmodel, constraints=None): if constraints != None: if tmodel == "linear" or tmodel == "lin": model = sm.GLM(ytrain, xtrain, family=sm.families.Gaussian()) elif tmodel == "logistic" or tmodel == "log": model = sm.GLM( ytrain, xtrain, family=sm.families.Binomial()) elif tmodel == "poisson" or tmodel == "poi": model = sm.GLM( ytrain, xtrain, family=sm.families.Poisson()) result = model.fit_constrained(constraints) else: if tmodel=="linear" or tmodel=="lin": model = sm.OLS( ytrain, xtrain ) result = model.fit(disp=0, skip_hessian=True) elif tmodel=="logistic" or tmodel=="log": model = sm.Logit( ytrain, xtrain ) result = model.fit(disp=0, method="newton", skip_hessian=True) elif tmodel=="poisson" or tmodel=="poi": model = sm.Poisson( ytrain, xtrain ) result = model.fit(disp=0, method="newton", skip_hessian=True) return model, result.params
def fit_model(papers_an, tm, comps_n): if comps_n == 0: reg_data = papers_an.copy() endog = reg_data["citation_count"].astype(float) exog = (add_constant(reg_data[["year", "is_comp", "num_auth"]])).astype(float) else: pca = PCA(n_components=comps_n) tm_pca = (pd.DataFrame(pca.fit_transform( tm.iloc[:, 1:].dropna())).assign(article_id=tm['article_id'])) tm_pca.columns = [str(x) for x in tm_pca] reg_data = papers_an.merge(tm_pca, on='article_id') endog = reg_data["citation_count"].astype(float) exog = (add_constant( reg_data[["year", "is_comp", "num_auth"] + tm_pca.drop(axis=1, labels=['article_id']).columns.tolist( )]).astype(float)) return sm.Poisson(endog=endog, exog=exog).fit_regularized(cov_type="HC1")
def test_poi_nb_zip_zinb_raw_data(meta, m): Y = m[:, 0] X = m[:, 1:] Ynz, Xnz = trim_value(Y, X, 0) xtrain, xtest, ytrain, ytest = train_test_split(X, Y, train_size=0.60, random_state=77) print("Training with: ", xtrain.shape, ytrain.shape) print("Testing with: ", xtest.shape, ytest.shape) print() print("Model: Poisson") poi_mod = sm.Poisson(ytrain, xtrain).fit(method="newton", maxiter=50) poi_mean_pred = poi_mod.predict(xtest) poi_ppf_obs = stats.poisson.ppf(q=0.95, mu=poi_mean_pred) poi_rmse = np.sqrt(mean_squared_error(ytest, poi_ppf_obs)) print("Model: Zero Inflated Poisson") zip_mod = sm.ZeroInflatedPoisson(ytrain, xtrain).fit(method="newton", maxiter=50) zip_mean_pred = zip_mod.predict(xtest, exog_infl=np.ones((len(xtest), 1))) zip_ppf_obs = stats.poisson.ppf(q=0.95, mu=zip_mean_pred) zip_rmse = np.sqrt(mean_squared_error(ytest, zip_ppf_obs)) print("Model: Zero Inflated Neg. Binomial") zinb_mod = sm.ZeroInflatedNegativeBinomialP(ytrain, xtrain).fit(method="newton", maxiter=50) zinb_pred = zinb_mod.predict(xtest, exog_infl=np.ones((len(xtest), 1))) zinb_rmse = np.sqrt(mean_squared_error(ytest, zinb_pred)) print() print("Model: Zero Inflated Neg. Binomial") zinb_mod = sm.ZeroInflatedNegativeBinomialP(ytrain, xtrain).fit(method="newton", maxiter=50) zinb_pred = zinb_mod.predict(xtest) zinb_rmse = np.sqrt(mean_squared_error(ytrain, zinb_pred)) print("RMSE Poisson: ", poi_rmse) print("RMSE Negative Binomial: ", nb_rmse) print("RMSE Zero-Inflated Poisson", zip_rmse) print("RMSE Zero-Inflated Negative Binomial: ", zinb_rmse)
m_logit = sm.Logit(y, X).fit() # option: Probit print(m_logit.summary2()) # estimation summary y_pred = m_logit.predict(X) # fitted/predicted values print(confusion_matrix(y, (y_pred > .5).astype(int))) # nominal data models (not tested) y = df.y_nominal # DV mn_logit = sm.MNLogit(y, X).fit() print(mn_logit.summary2()) # estimation summary y_pred = mn_logit.predict(X) # fitted/predicted values print(confusion_matrix(y, (y_pred > .5).astype(int))) # count data models (w/ exposure!) y = df.y_count # DV m_poiss = sm.Poisson( y, X, exposure=df['x_timespan'].values).fit() print(m_poiss.summary2()) m_NB2 = sm.NegativeBinomial( y, X, loglike_method='nb2', exposure=df['x_timespan'].values).fit() print(m_NB2.summary2()) m_NB1 = sm.NegativeBinomial( y, X, loglike_method='nb1', exposure=df['x_timespan'].values).fit() print(m_NB1.summary2()) m_NBP = sm.NegativeBinomialP( y, X, exposure=df['x_timespan'].values).fit() print(m_NBP.summary2()) #endregion
mlogit_res = mlogit_mod.fit() print(mlogit_res.params) # ## Poisson # # Load the Rand data. Note that this example is similar to Cameron and # Trivedi's `Microeconometrics` Table 20.5, but it is slightly different # because of minor changes in the data. rand_data = sm.datasets.randhie.load() rand_exog = rand_data.exog rand_exog = sm.add_constant(rand_exog, prepend=False) # Fit Poisson model: poisson_mod = sm.Poisson(rand_data.endog, rand_exog) poisson_res = poisson_mod.fit(method="newton") print(poisson_res.summary()) # ## Negative Binomial # # The negative binomial model gives slightly different results. mod_nbin = sm.NegativeBinomial(rand_data.endog, rand_exog) res_nbin = mod_nbin.fit(disp=False) print(res_nbin.summary()) # ## Alternative solvers # # The default method for fitting discrete data MLE models is Newton- # Raphson. You can use other solvers by using the ``method`` argument:
print 'cs', numdiff.approx_fprime_cs(test_params, loglike) print 'sm', hess(test_params) print 'fd', numdiff.approx_fprime1(test_params, score, epsilon) print 'cs', numdiff.approx_fprime_cs(test_params, score) #print 'fd', numdiff.approx_hess(test_params, loglike, epsilon) #TODO: bug ''' Traceback (most recent call last): File "C:\Josef\eclipsegworkspace\statsmodels-josef-experimental-gsoc\scikits\statsmodels\sandbox\regression\test_numdiff.py", line 74, in <module> print 'fd', numdiff.approx_hess(test_params, loglike, epsilon) File "C:\Josef\eclipsegworkspace\statsmodels-josef-experimental-gsoc\scikits\statsmodels\sandbox\regression\numdiff.py", line 118, in approx_hess xh = x + h TypeError: can only concatenate list (not "float") to list ''' hesscs = numdiff.approx_hess_cs(test_params, loglike) print 'cs', hesscs print maxabs(hess(test_params), hesscs) data = sm.datasets.anes96.load() exog = data.exog exog[:, 0] = np.log(exog[:, 0] + .1) exog = np.column_stack((exog[:, 0], exog[:, 2], exog[:, 5:8])) exog = sm.add_constant(exog) res1 = sm.MNLogit(data.endog, exog).fit(method="newton", disp=0) datap = sm.datasets.randhie.load() nobs = len(datap.endog) exogp = sm.add_constant(datap.exog.view(float).reshape(nobs, -1)) modp = sm.Poisson(datap.endog, exogp) resp = modp.fit(method='newton', disp=0)
# In[16]: np.shape(data_test) # In[17]: # scatter plots for conditional relationships g = sns.FacetGrid(new_data, row="sex", col="age", margin_titles=True) g.map(plt.scatter, "gdp_per_capita ($)2", "suicides_no", edgecolor="w") # ### Simple Poisson Regression # In[18]: model1 = sm.Poisson(endog=new_data['suicides_no'], exog=sm.add_constant( new_data[['sex', 'age', 'gdp_per_capita ($)2']])) res1 = model1.fit() # In[19]: poisson1 = sm.GLM(new_data['suicides_no'], sm.add_constant( new_data[['sex', 'age', 'gdp_per_capita ($)2']]), family=sm.families.Poisson()).fit() print(poisson1.summary()) # In[20]: #compute MSPE y_pred1 = res1.predict(
import pickle fname = 'try_shrink%d_ols.pickle' % shrinkit fh = open(fname, 'w') pickle.dump(results._results, fh) #pickling wrapper doesn't work fh.close() fh = open(fname, 'r') results2 = pickle.load(fh) fh.close() print results2.predict(xf) print results2.model.predict(results.params, xf) y_count = np.random.poisson(np.exp(x.sum(1) - x.mean())) model = sm.Poisson( y_count, x) #, exposure=np.ones(nobs), offset=np.zeros(nobs)) #bug with default results = model.fit(method='bfgs') results.summary() print results.model.predict(results.params, xf, exposure=1, offset=0) if shrinkit: results.remove_data() else: #work around pickling bug results.mle_settings['callback'] = None import pickle
from scipy import stats from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split X = np.random.randint(99, size=(800, 21)) Y = np.random.randint(2, size=(800, 1)) xtrain, xtest, ytrain, ytest = train_test_split(X, Y, train_size=0.60, random_state=42) print(xtrain.shape, ytrain.shape, xtest.shape, ytest.shape) print("Model: Poisson") poi_mod = sm.Poisson(ytrain, xtrain).fit(method="newton", maxiter=50) poi_mean_pred = poi_mod.predict(xtest) poi_ppf_obs = stats.poisson.ppf(q=0.95, mu=poi_mean_pred) poi_rmse = np.sqrt(mean_squared_error(ytest, poi_ppf_obs)) print("Model: Neg. Binomial") nb_mod = sm.NegativeBinomial(ytrain, xtrain).fit(start_params=None, method='newton', maxiter=50) nb_pred = nb_mod.predict(xtest) nb_rmse = np.sqrt(mean_squared_error(ytest, nb_pred)) print(np.ones(len(xtest)).shape) print("Model: Zero Inflated Poisson") zip_mod = sm.ZeroInflatedPoisson(ytrain, xtrain).fit(method="newton",