def run_regressions_3(data=[], endog=[], exog=[], options=0, clusterfirm=0): results = [] print(endog) for index, elem in enumerate(data): # name = 'endog' + '_' + str(index) if options == 0: mod = PanelOLS(elem[endog], elem[exog], entity_effects=True, time_effects=True) if options == 1: mod = PanelOLS(elem[endog], elem[exog], entity_effects=False, time_effects=True) if options == 2: print(type(elem)) mod = PooledOLS(elem[endog], elem[exog]) if clusterfirm == 0: results.append(mod.fit(cov_type='clustered', clusters=elem.gvkey)) if clusterfirm == 1: results.append(mod.fit(cov_type='clustered', cluster_entity=True)) if clusterfirm == 2: results.append(mod.fit()) return results
def model_pooled(df): df["age"] = (df["t"] - df["patent_date"]) / np.timedelta64(1, 'Y') df["agesq"] = np.square(df.age) df["t"] = pd.Categorical(df.t) df = df.rename(index=str, columns={ "log(knowledge_forward_cites)": "lknowledge_forward_cites" }) df.index = df.index.set_levels([ df.index.levels[0].astype(int), df.index.levels[1].astype('datetime64[ns]') ]) exog_vars = [ "t", "source", 'log(patent_num_claims)', 'log(avg_inventor_total_num_patents)', 'log(patent_processing_time)', 'one-hot_assignee_type_3', 'one-hot_assignee_type_4', 'one-hot_assignee_type_5', 'one-hot_assignee_type_6', 'one-hot_assignee_type_7', 'one-hot_assignee_type_9', 'age', 'agesq' ] exog = add_constant(df[exog_vars]) mod = PooledOLS(df.lknowledge_forward_cites, exog) # robust_res = fit_write(mod, "robust", cov_type='robust') fit_write(mod, "entity", cov_type='clustered', cluster_entity=True) fit_write(mod, "entity-time", cov_type='clustered', cluster_entity=True, cluster_time=True)
def regressions(data, endog, exog, options, clusterfirm, constant): #results = [] if constant == 1: exog = sm.add_constant(data[exog]) if constant == 0: exog = data[exog] if options == 0: mod = PanelOLS(data[endog], exog, entity_effects=True, time_effects=True) if options == 1: mod = PanelOLS(data[endog], exog, entity_effects=False, time_effects=True) if options == 2: #print(data[[endog]], exog) mod = PooledOLS(data[endog], exog) if clusterfirm == 0: results = mod.fit(cov_type='clustered', clusters=data.gvkey) if clusterfirm == 1: results = mod.fit(cov_type='clustered', cluster_entity=True) if clusterfirm == 2: results = mod.fit() return results
def task_factor_estimate_interactive_fixed_effects_model(produces): """ Task for estimating factor numbers in interactive fixed effects model. We choose different penalty functions g1,g2,g3 with criterias PC and IC. """ rmax = 8 nsims = 1000 all_N = [100, 100, 100, 100, 10, 20, 50] all_T = [10, 20, 50, 100, 100, 100, 100] dgp_func = dgp_interactive_fixed_effects_model_with_common_and_time_invariant tolerance = 0.0001 beta_true = {"beta1": 1, "beta2": 3, "mu": 5, "gamma": 2, "delta": 4} r0 = 8 df_factor_estimate = pd.DataFrame() np.random.seed(123) for case in range(len(all_N)): N = all_N[case] T = all_T[case] df_sim = pd.DataFrame( index=range(nsims), columns=["T", "N", "PC1", "PC2", "PC3", "IC1", "IC2", "IC3"], ) df_sim["T"] = [T] * nsims df_sim["N"] = [N] * nsims for i in range(nsims): X, Y, panel_df = dgp_func(T, N, **beta_true) start_value_estimator = PooledOLS( panel_df.y, panel_df[["x" + str(i) for i in range(1, 6)]]) start_value_result = start_value_estimator.fit() interactive_start_value = start_value_result.params.tolist() interactive_estimator = InteractiveFixedEffect(Y, X) beta_hat, beta_hat_list, f_hat, lambda_hat = interactive_estimator.fit( r0, interactive_start_value, tolerance) residual = Y - (X.T.dot(beta_hat)).T factor_estimator = FactorEstimator(residual) df_sim.loc[i, "PC1"] = factor_estimator.r_hat(rmax, "PC", 1) df_sim.loc[i, "PC2"] = factor_estimator.r_hat(rmax, "PC", 2) df_sim.loc[i, "PC3"] = factor_estimator.r_hat(rmax, "PC", 3) df_sim.loc[i, "IC1"] = factor_estimator.r_hat(rmax, "IC", 1) df_sim.loc[i, "IC2"] = factor_estimator.r_hat(rmax, "IC", 2) df_sim.loc[i, "IC3"] = factor_estimator.r_hat(rmax, "IC", 3) df_factor_estimate = df_factor_estimate.append( pd.DataFrame(df_sim.mean(axis=0)).T) df_factor_estimate = df_factor_estimate.reset_index(drop=True) df_factor_estimate.to_csv(produces, index=False)
jtrain.head() jtrain[jtrain['grant'] == 1].count() scrap_panel_pool = smf.ols('scrap~d88+d89+grant+grant_1', data=jtrain).fit() scrap_panel_pool.summary() jtrain2 = jtrain jtrain2[:5] ## Define the ID and Time column for Panel Regression jtrain2 = jtrain2.set_index(['fcode', 'year']) print(jtrain2.head(5)) exog_vars = ['d88', 'd89', 'grant', 'grant_1'] grant_vars = ['grant'] exog = sm.add_constant(jtrain2[exog_vars]) grant0 = sm.add_constant(jtrain2[grant_vars]) ## Model Pooled OLS model_pool = PooledOLS(jtrain2.lscrap, exog) pooled_res = model_pool.fit() print(pooled_res) ## Model Fixed Effects -- Entity Effects - True model_fe = PanelOLS(jtrain2.lscrap, exog, entity_effects=True) fe_res = model_fe.fit() print(fe_res) ## Model Fixed Effects -- Entity and Time Effects - True model_fe = PanelOLS(jtrain2.lscrap, exog, entity_effects=True, time_effects=True) fe_res = model_fe.fit() print(fe_res) ## Random Effects Model model_re = RandomEffects(jtrain2.lscrap, exog)
# 'dTrVo_pd', # 'vVol_pd', # 'dTrVa_pd',]) #corrMatt = corrMat.corr() corrMattt = {} for char in ttic: corrMattt[char] = [] corrMat = pd.concat([ esRol_pd[char], arES_pd[char], adRe_pd[char], dMaCa_pd[char], dTrVo_pd[char], vVol_pd[char], dTrVa_pd[char] ], axis=1, keys=[ 'esRol_pd', 'arES_pd', 'adRe_pd', 'dMaCa_pd', 'dTrVo_pd', 'vVol_pd', 'dTrVa_pd', ]) corrMatt = corrMat.corr() corrMattt[char] = corrMatt #Pooled regression: #https://bashtage.github.io/linearmodels/doc/panel/examples/examples.html #这里有问题…… 我不会pooled OLS…… pReg = pd.concat([dMaCa_pd['AY'], dTrVo_pd['AY'], vVol_pd['AY']]) pRegg = PooledOLS(esRol_pd['AY'], pReg) pooled_res = pRegg.fit()
dependent = data.lwage exog = sm.add_constant(data[['expersq','married','union']]) mod = PanelOLS(dependent, exog, entity_effects=True, time_effects=True) res = mod.fit(cov_type='unadjusted') res2 = mod.fit(cov_type='robust') exog = sm.add_constant(data[['exper', 'expersq','married','union']]) mod = PanelOLS(dependent, exog, entity_effects=True) res3 = mod.fit(cov_type='clustered',cluster_entity=True) mod = RandomEffects(dependent, exog) res4 = mod.fit(cov_type='robust') from linearmodels.panel.results import compare exog = sm.add_constant(data[['exper', 'expersq','married','union']].copy()) import pandas as pd exog['year'] = pd.Categorical(data.reset_index()['year']) mod = PooledOLS(dependent, exog) res5 = mod.fit(cov_type='robust') print(compare([res,res2, res3, res4, res5])) print(data.columns) from linearmodels.panel.data import PanelData import numpy as np import pandas as pd from statsmodels.datasets import grunfeld from linearmodels.panel.model import RandomEffects from linearmodels.tests.panel._utility import generate_data data = pd.read_stata(r'C:\git\linearmodels\linearmodels\tests\panel\results\simulated-panel.dta')
change_df = pd.DataFrame() for index, start in enumerate(yearlist[:-1]): end = yearlist[index + 1] ter_start = ternarydata[ternarydata.YEAR == start] ter_end = ternarydata[ternarydata.YEAR == end] termeta = ter_start.merge(ter_end, on='Country Code', how="inner") termeta = termeta.dropna() termeta['nm_change'] = termeta['NM_y'] - termeta['NM_x'] termeta['shm_change'] = termeta['SHM_y'] - termeta['SHM_x'] termeta['ne_change'] = termeta['NE_y'] - termeta['NE_x'] termeta['net_change'] = termeta['ne_change'] - termeta['nm_change'] termeta['growth_rate'] = (termeta['Income_y'] - termeta['Income_x']) / (termeta['Income_x']) termeta['date'] = index temp_df = termeta[[ 'Country Code', 'date', 'nm_change', 'shm_change', 'ne_change', 'net_change', 'Income_x', 'Income_y', 'growth_rate' ]] change_df = pd.concat([change_df, temp_df]) change_df = change_df.sort_values(by=['Country Code', 'date']) change_df = change_df.set_index(['Country Code', 'date']) change_df['log_income'] = np.log10(change_df['Income_x']) exog_vars = EXOG.split(",") exog = sm.add_constant(change_df[exog_vars]) mod = PooledOLS(change_df.growth_rate, exog) fe_res = mod.fit() with open(OUTPUT, 'w') as fh: fh.write(fe_res.summary.as_text())
# print(data1) d = pd.Categorical(data1['Date']) data1 = data1.set_index(['ID', 'Date']) data1['Date'] = d # print(data1) exog_vars = [ 'Kilo', 'Brakes', 'Range', 'Speed', 'RPM', 'Engine fuel rate', 'Date' ] a = ['Kilo', 'Brakes', 'Range', 'Speed', 'RPM', 'Engine fuel rate'] print(data1[a]) exog = sm.add_constant(data1[exog_vars]) exog1 = sm.add_constant(data1[a]) mod = PanelOLS(data1['Accelerator pedal position'], exog, entity_effects=True, time_effects=False) mod1 = PooledOLS(data1['Accelerator pedal position'], exog1) mod2 = RandomEffects(data1['Accelerator pedal position'], exog1) mod3 = BetweenOLS(data1['Accelerator pedal position'], exog1) res = mod.fit() pooled_res = mod1.fit() re_res = mod2.fit() be_res = mod3.fit() print(res) print(compare({'Pooled': pooled_res, 'RE': re_res, 'BE': be_res})) if __name__ == '__main__': pass
'/media/guolewen/intraday_data/needs/day_stock_computed_data/full_data_with_volg.csv' ) Date = pd.Categorical(df.Date) Ticker = pd.Categorical(df.Ticker) df = df.set_index(["Ticker", "Date"]) df["Date1"] = Date df["Ticker1"] = Ticker # Variable Constructions df['lognprints'] = np.log(df['NPRINTs']) w = (df['YENVOL'] / 100) * (df['volatility']) df['logwdbw'] = np.log(w / (601000 * 0.016)) # drop -inf when volatility equals to 0 df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)] # Pooled OLS exog = sm.add_constant(df['logwdbw']) mod = PooledOLS(df.lognprints, exog) pooled_res = mod.fit() print(pooled_res) """ # fixed effects with time dummy exog = sm.add_constant(df[['logwdbw','Date1']]) mod = PanelOLS(df.lognprints, exog, entity_effects=True) fe_res1 = mod.fit() print(fe_res1) """ # fixed effects with ticker dummy exog = sm.add_constant(df[['logwdbw', "Ticker1"]]) mod = PanelOLS(df.lognprints, exog, time_effects=True) fe_res3 = mod.fit() print(fe_res3)
# gerate simulation data X, Y, panel_df = dgp_func(T_N_sim.loc[case, "T"], T_N_sim.loc[case, "N"]) p = X.shape[0] # within model require no collinear variable combinations no_collinear_x_var = ["x" + str(i + 1) for i in range(min(p, 3))] # run estimator for starting value for interactive estimator if interactive_start_value_effect == "twoways": start_value_estimator = PanelOLS( panel_df.y, panel_df[no_collinear_x_var], entity_effects=True, time_effects=True, ) else: start_value_estimator = PooledOLS( panel_df.y, panel_df[["x" + str(i) for i in range(1, p + 1)]] ) start_value_result = start_value_estimator.fit() interactive_start_value = [ *start_value_result.params.tolist(), *np.zeros(p - len(start_value_result.params)), ] # run interactive fixed effect estimator interactive_estimator = InteractiveFixedEffect(Y, X) ( beta_hat_interactive, beta_hat_list, f_hat, lambda_hat, ) = interactive_estimator.fit(r, interactive_start_value, tolerance) # run within estimator with the same data