def regressions(data, endog, exog, options, clusterfirm, constant): #results = [] if constant == 1: exog = sm.add_constant(data[exog]) if constant == 0: exog = data[exog] if options == 0: mod = PanelOLS(data[endog], exog, entity_effects=True, time_effects=True) if options == 1: mod = PanelOLS(data[endog], exog, entity_effects=False, time_effects=True) if options == 2: #print(data[[endog]], exog) mod = PooledOLS(data[endog], exog) if clusterfirm == 0: results = mod.fit(cov_type='clustered', clusters=data.gvkey) if clusterfirm == 1: results = mod.fit(cov_type='clustered', cluster_entity=True) if clusterfirm == 2: results = mod.fit() return results
def run_regressions_3(data=[], endog=[], exog=[], options=0, clusterfirm=0): results = [] print(endog) for index, elem in enumerate(data): # name = 'endog' + '_' + str(index) if options == 0: mod = PanelOLS(elem[endog], elem[exog], entity_effects=True, time_effects=True) if options == 1: mod = PanelOLS(elem[endog], elem[exog], entity_effects=False, time_effects=True) if options == 2: print(type(elem)) mod = PooledOLS(elem[endog], elem[exog]) if clusterfirm == 0: results.append(mod.fit(cov_type='clustered', clusters=elem.gvkey)) if clusterfirm == 1: results.append(mod.fit(cov_type='clustered', cluster_entity=True)) if clusterfirm == 2: results.append(mod.fit()) return results
def run_regressions(dataa, datab, endog1, endog2, exog1, exog2, options=0): results = [] print(endog1) for index, elem in enumerate(endog1): name = 'endog1' + '_' + str(index) if options == 0: mod = PanelOLS(dataa[elem], dataa[exog1], entity_effects=True, time_effects=True) if options == 1: mod = PanelOLS(dataa[elem], dataa[exog1], entity_effects=False, time_effects=True) results.append(mod.fit(cov_type='clustered', clusters=dataa.gvkey)) for index, elem in enumerate(endog2): name = 'endog2' + '_' + str(index) if options == 0: mod = PanelOLS(datab[elem], datab[exog2], entity_effects=True, time_effects=True) if options == 1: mod = PanelOLS(datab[elem], datab[exog2], entity_effects=False, time_effects=True) results.append(mod.fit(cov_type='clustered', clusters=datab.gvkey)) return results
def balancing_tests_cohort_results(df, exog): post_exposure1 = PanelOLS(df.adult, exog, entity_effects=True, time_effects=True, singletons=False) result_balancing_canton1 = post_exposure1.fit(cov_type='clustered', clusters=df.id_e, singletons=False) post_exposure2 = PanelOLS(df.below_median_age_restr, exog, entity_effects=True, time_effects=True, singletons=False) result_balancing_canton2 = post_exposure2.fit(cov_type='clustered', clusters=df.id_e, singletons=False) post_exposure3 = PanelOLS(df.sex_ratio, exog, entity_effects=True, time_effects=True, singletons=False) result_balancing_canton3 = post_exposure3.fit(cov_type='clustered', clusters=df.id_e, singletons=False) post_exposure4 = PanelOLS(df.have_adults_patch, exog, entity_effects=True, time_effects=True, singletons=False) result_balancing_canton4 = post_exposure4.fit(cov_type='clustered', clusters=df.id_e, singletons=False) return (compare( { 'Size of cohort': result_balancing_canton1, 'Below median age': result_balancing_canton2, 'Sex ratio': result_balancing_canton3, 'Have families': result_balancing_canton4 }, stars=True))
def balancing_tests_cantonal_results(df, exog): ##These are the conditional results ##between countries as= asylum seekers mod_balancing = PanelOLS(df.share_AS_between * 100, exog, entity_effects=True, time_effects=True, singletons=False) result_balancing_canton = mod_balancing.fit(cov_type='clustered', clusters=df.id_e, singletons=False) mod_balancing2 = PanelOLS(df.share_AS_within * 100, exog, entity_effects=True, time_effects=True, singletons=False) result_balancing_canton2 = mod_balancing2.fit(cov_type='clustered', clusters=df.id_e, singletons=False) mod_balancing3 = PanelOLS(df.sex_ratio_AS_ntc * 100, exog, entity_effects=True, time_effects=True, singletons=False) result_balancing_canton3 = mod_balancing3.fit(cov_type='clustered', clusters=df.id_e, singletons=False) return (compare( { 'Between countries': result_balancing_canton, 'Within countries': result_balancing_canton2, 'Sex ratio': result_balancing_canton3 }, stars=True))
def preprocessing_regression(self): #Filling missing values with mean values. imputer = SimpleImputer(missing_values=np.nan, strategy='mean') self.df.iloc[:, :9] = imputer.fit_transform(self.df.iloc[:, :9]) data = self.df.iloc[:, :10] #Taking natural log of variable that have outliers data.mezun = np.log(self.df.iloc[:, 2]) data.yogunluk = np.log(self.df.iloc[:, 3]) data.dogum = np.log(self.df.iloc[:, 4]) #Setting indexes in order to shape to data into panel form. data = data.set_index(['iller', 'yil']) #Regressing variables to find out time effect on the relation between regressand and regressors. mod = PanelOLS(data.mezun, data.iloc[:, 1:9], time_effects=True) res = mod.fit(cov_type='clustered', cluster_entity=True) return res
def run_regressions_2(data, endog=[], exog=[], options=0): results = [] print(endog) for index, elem in enumerate(endog): name = 'endog' + '_' + str(index) if options == 0: for i, e in enumerate(endog): mod = PanelOLS(data[elem], data[e], entity_effects=True, time_effects=True) if options == 1: mod = PanelOLS(data[elem], data[e], entity_effects=False, time_effects=True) results.append(mod.fit(cov_type='clustered', clusters=data.gvkey)) return results
def panel_regression(y, xs, years, country, list_x, prev=0, show=False, save=True, path="", diff=False, constant=False, entity_effects=False): data = bdf.filter_origin_country_dataset(y, country, years, xs.index.levels[0].tolist(), xs, prev) if constant == False: exog = data[list_x] else: exog = sm.add_constant(data[list_x]) # if diff == False: mod = PanelOLS(data.y, exog, entity_effects=entity_effects) else: mod = FirstDifferenceOLS(data.y, exog) res = mod.fit() #print("The R-squared of the regression model is %f." %res.rsquared) #print("Estimated parameters:") #print(pd.DataFrame(res.params)) evaluation(data, res.fitted_values, constant, len(xs.columns.tolist())) if show == True: pmf.plot_real_VS_prediction(y, res.fitted_values, xs, years, country, 45, "Regression model", save, path) else: pass return (res.params, res.fitted_values)
import sys import pandas as pd import statsmodels.api as sm from linearmodels.panel import PanelOLS DATA_FILE = sys.argv[1] OUTPUT_FILE = sys.argv[2] change_df = pd.read_csv(DATA_FILE) change_df = change_df.set_index(["Code", "date"]) exog_vars = ["Income_t0_log", "nm_change", "shm_change", "ne_change", "sum_adv_t0"] exog = sm.add_constant(change_df[exog_vars]) mod = PanelOLS(change_df.growth_rate, exog) fe_res = mod.fit() with open(OUTPUT_FILE, "w") as f: f.write(fe_res.summary.as_text())
jtrain2[:5] ## Define the ID and Time column for Panel Regression jtrain2 = jtrain2.set_index(['fcode', 'year']) print(jtrain2.head(5)) exog_vars = ['d88', 'd89', 'grant', 'grant_1'] grant_vars = ['grant'] exog = sm.add_constant(jtrain2[exog_vars]) grant0 = sm.add_constant(jtrain2[grant_vars]) ## Model Pooled OLS model_pool = PooledOLS(jtrain2.lscrap, exog) pooled_res = model_pool.fit() print(pooled_res) ## Model Fixed Effects -- Entity Effects - True model_fe = PanelOLS(jtrain2.lscrap, exog, entity_effects=True) fe_res = model_fe.fit() print(fe_res) ## Model Fixed Effects -- Entity and Time Effects - True model_fe = PanelOLS(jtrain2.lscrap, exog, entity_effects=True, time_effects=True) fe_res = model_fe.fit() print(fe_res) ## Random Effects Model model_re = RandomEffects(jtrain2.lscrap, exog) re_res = model_re.fit() print(fe_res) ################################################# ## Regress scrap~grant ## Model Pooled OLS
#mod = PanelOLS(temp.UE6M, temp[['activeWeight3MSquared']], entity_effects = True, time_effects = True) #mod = PanelOLS(temp.UE6M, temp[['activeWeight6MSquared']], entity_effects = True, time_effects = True) #mod = PanelOLS(temp.UE6M, temp[['activeWeight12MSquared']], entity_effects = True, time_effects = True) #mod = PanelOLS(temp.UE12M, temp[['activeWeight3MSquared', 'activeWeight6MSquared', 'activeWeight12MSquared']], entity_effects = True, time_effects = True) #mod = PanelOLS(temp.UE12M, temp[['activeWeight3MSquared']], entity_effects = True, time_effects = True) #mod = PanelOLS(temp.UE12M, temp[['activeWeight6MSquared']], entity_effects = True, time_effects = True) #mod = PanelOLS(temp.UE12M, temp[['activeWeight12MSquared']], entity_effects = True, time_effects = True) # ============================================================================= # Specify standard error clustering # There are two options: cluster standard errors by # firm (cluster_entity) or by time (cluster_time) # ============================================================================= res = mod.fit(cov_type = 'clustered', cluster_entity = True) #res = mod.fit(cov_type = 'clustered', cluster_time = True) # ============================================================================= # Print the regression result # ============================================================================= print(res) del temp
# CLO has much more positive holding period return than corporate bonds # In[41]: #Part B # 1. OLS without fixed effect hpr_OLS = smf.ols(formula='lnhpr ~ clo+tmkt_rf+tsmb+thml+tterm+tdef+hp', data=ps5) # I use panel data to regression holding period return on common risk factors (tmkt_rf,tsmb,thml,tterm,and tdef) and # holding period. CLO is an indicator which is 1 if bond is CLO. If CLO is significant and positive, CLO has higher # return than corporate bond. res = hpr_OLS.fit() print(res.summary()) # The significant positive coefficient for CLO shows that CLO has higher excess return than corporate bond # In[59]: # 2. OLS with firm fixed effect startyear = pd.Categorical(ps5.startyear) ps5 = ps5.set_index(['entity_name', 'startyear']) # In[67]: exog_vars = ['clo', 'tmkt_rf', 'tsmb', 'thml', 'tterm', 'tdef', 'hp'] exog = sm.add_constant(ps5[exog_vars]) mod = PanelOLS(ps5.lnhpr, exog, entity_effects=True) res = mod.fit() print(res) # After adding firm fixed effect, the coefficient of CLO is still significant positive and at similiar magnititude. # The argument that CLO has higher excess return than corporate return is valid.
x = np.stack([calc_mat[:, 1], calc_mat[:, 2], calc_mat[:, 3], calc_mat[:, 4]]) ones = np.ones(len(x[0])) X = sm.add_constant(np.column_stack((x[0], ones))) for elem in x[1:]: X = sm.add_constant(np.column_stack((elem, X))) res = sm.OLS(y,X).fit() print(res.summary()) FE模型回归 company_codes = [] for each_file in file_list: company_code = each_file.split('.')[0] company_code = int(company_code) company_codes.append(company_code) time = [2019] * 50 df = pd.DataFrame({ 'TDA': x[0], 'CR5': x[1], 'SIZE': x[2], 'ROE': x[3], 'REWARD': y, 'YEAR': time, 'CODE': company_codes }) df.to_stata('Stock/res.dta') df = df.set_index(['CODE', 'YEAR']) exog_vars = ['TDA', 'LDA', 'SIZE', 'ROE'] exog = sm.add_constant(df[exog_vars]) model = PanelOLS(df['REWARD'], exog, entity_effects=True) fe = model.fit() print(fe)
import numpy as np import linearmodels as lm lm.WARN_ON_MISSING = False from linearmodels import utility utility.missing_warning(np.array([True, True, False])) from linearmodels.panel import PanelOLS, RandomEffects, PooledOLS from linearmodels.datasets import wage_panel import statsmodels.api as sm data = wage_panel.load() data = data.set_index(['nr','year']) dependent = data.lwage exog = sm.add_constant(data[['expersq','married','union']]) mod = PanelOLS(dependent, exog, entity_effects=True, time_effects=True) res = mod.fit(cov_type='unadjusted') res2 = mod.fit(cov_type='robust') exog = sm.add_constant(data[['exper', 'expersq','married','union']]) mod = PanelOLS(dependent, exog, entity_effects=True) res3 = mod.fit(cov_type='clustered',cluster_entity=True) mod = RandomEffects(dependent, exog) res4 = mod.fit(cov_type='robust') from linearmodels.panel.results import compare exog = sm.add_constant(data[['exper', 'expersq','married','union']].copy()) import pandas as pd exog['year'] = pd.Categorical(data.reset_index()['year']) mod = PooledOLS(dependent, exog) res5 = mod.fit(cov_type='robust') print(compare([res,res2, res3, res4, res5])) print(data.columns)
VIF[y] = 1 / (1 - res.rsquared) with open('../result/VIF.txt', 'w') as f: print(VIF, file=f) # pooled 回归 x = data[["MV", "RM", "BM", "ROE", "Inv"]] y = data["Ret"] results = sm.OLS(y, x).fit() with open('../result/pooled_reg.txt', 'w') as f: print(results.summary(), file=f) # 固定效应回归 data['Time'] = pd.to_datetime(data['Time']) data = data.set_index(['Stkcd', 'Time']) dependent = data.Ret exog = sm.add_constant(data[['MV', 'BM', 'RM', 'ROE', 'Inv']]) mod = PanelOLS(dependent, exog, entity_effects=True) res = mod.fit(cov_type='clustered') with open('../result/fixed_effects.txt', 'w') as f: print(res, file=f) # 控制行业回归 data = pd.read_csv("../data/data_all.csv") data['Time'] = pd.to_datetime(data['Time']) data = data.set_index(['Industry', 'Time']) dependent = data.Ret exog = sm.add_constant(data[['MV', 'BM', 'RM', 'ROE', 'Inv']]) mod = PanelOLS(dependent, exog, entity_effects=True) res = mod.fit(cov_type='clustered') with open('../result/industry_control.txt', 'w') as f: print(res, file=f)
independent_vars = [ 'Gross fixed capital formation (% of GDP)', 'Gross domestic savings (% of GDP)', 'Population growth (annual %)', 'FDI, net inflows (% of GDP)', 'Aid/Gdp_sqr', 'Aid/Gdp', 'ln_ODA', 'wopen', 'Trade' ] mod = PanelOLS(df['ln_gdp_pc'], df[independent_vars], entity_effects=True, time_effects=True ) # you can turn on or off both entity_effects and time_effects res = mod.fit( cov_type='clustered', cluster_entity=True) # here cov_type means covariance estimators type. # cov_type can be ‘unadjusted’, ‘homoskedastic’ or ‘robust’, ‘heteroskedastic’ or ‘clustered` - One or two way clustering. print(res) # <h4> ODA - Official development assistance # Three significant Independent Varibles: Aid/GDP, Aid/GDP^2, ln_ODA </h4> # <h2><center>DIAGNOSTIC ANALYSIS</center></h2> # <h3> 1. Unit root test </h3> # # The Augmented Dickey-Fuller test is a type of statistical test called a unit root test # The null hypothesis of the test is that the time series can be represented by a unit root, that it is not stationary (has some time-dependent structure). The alternate hypothesis (rejecting the null hypothesis) is that the time series is stationary. #
p = X.shape[0] # within model require no collinear variable combinations no_collinear_x_var = ["x" + str(i + 1) for i in range(min(p, 3))] # run estimator for starting value for interactive estimator if interactive_start_value_effect == "twoways": start_value_estimator = PanelOLS( panel_df.y, panel_df[no_collinear_x_var], entity_effects=True, time_effects=True, ) else: start_value_estimator = PooledOLS( panel_df.y, panel_df[["x" + str(i) for i in range(1, p + 1)]] ) start_value_result = start_value_estimator.fit() interactive_start_value = [ *start_value_result.params.tolist(), *np.zeros(p - len(start_value_result.params)), ] # run interactive fixed effect estimator interactive_estimator = InteractiveFixedEffect(Y, X) ( beta_hat_interactive, beta_hat_list, f_hat, lambda_hat, ) = interactive_estimator.fit(r, interactive_start_value, tolerance) # run within estimator with the same data if within_effect == interactive_start_value_effect: within_result = start_value_result
#Preparing the variables temp = temp.set_index(['crsp_portno', 'slided_caldt']) temp = temp[temp.AFP != -np.inf] temp = temp[temp.AFP != np.inf] # Please comment / uncomment the following if you want to choose for the dependent variable # (1) Carhart Alpha #mod = PanelOLS(temp.CarhartAlpha, temp.AFP, time_effects = True) # (2) Fama-French Alpha mod = PanelOLS(temp.FFAlpha, temp.AFP, time_effects=True) # Please comment / uncomment the following if you want to choose for the standard errors # (1) to be clustered by fund #res = mod.fit(cov_type = 'clustered', cluster_entity = True) # (2) to be clustered by time res = mod.fit(cov_type='clustered', cluster_time=True) #Print regression result print(res) del temp #%% # ============================================================================= # Part 2: Panel Data Regression with multiple independent variables: # - AFP # - Fund characteristics: log(TNA), log(Age), expense ratio, turnover ratio # - First order lag of dependent variable # # ============================================================================= from linearmodels.panel import PanelOLS
def panel_regression_training_test(y, xs, years_training, years_test, country, list_x, prev=0, show=False, save=True, path="", diff=False, constant=False, entity_effects=False): years = years_training + years_test data = bdf.filter_origin_country_dataset(y, country, years, xs.index.levels[0].tolist(), xs, prev) data_tr = data.loc[(slice(None), years_training), :] data_te = data.loc[(slice(None), years_test), :] if constant == False: exog_tr = data_tr[list_x] exog_te = data_te[list_x] else: exog_tr = sm.add_constant(data_tr[list_x]) exog_te = sm.add_constant(data_te[list_x]) # if diff == False: mod = PanelOLS(data_tr.y, exog_tr, entity_effects=entity_effects) else: mod = FirstDifferenceOLS(data_tr.y, exog_tr) res_tr = mod.fit() #print("---------------- Training Results ----------------") #evaluation(data_tr, res_tr.fitted_values, constant) fitted_values_te = res_tr.params.values * exog_te fitted_values_te["fitted_values"] = fitted_values_te.sum(axis=1) fitted_values_ = fitted_values_te.append(res_tr.fitted_values) fitted_values_ = fitted_values_.sort_index() if show == True: pmf.plot_real_VS_prediction(y, fitted_values_, xs, years, country, 45, "Regression model", save=save, path="") else: pass print("-------------- Trainin-Test Results --------------") #print(data.head()) #print(fitted_values_.head()) #print(fitted_values_te.head()) evaluation(data.loc[(slice(None), years_test), ], fitted_values_te, constant, len(xs.columns.tolist())) #evaluation(data.loc[(slice(None), years_test), ], fitted_values_.loc[(slice(None), years_test), ], constant, len(xs.columns.tolist())) return (res_tr.params, fitted_values_)
def baseline_results_women(df): CPRT_baseline_female = df.groupby(by=['sex']) CPRT_baseline_women = CPRT_baseline_female.get_group("F") CPRT_baseline_womenage = CPRT_baseline_women[~( CPRT_baseline_women['age'] <= 18)] mi_data_women = CPRT_baseline_womenage.set_index(["id_e_t", "id_a"]) exog_vars = [ "kid012_all", "all_exp_13", "all_exp_14", "all_exp_15", "all_exp_16", "all_exp_17", "all_exp_18", "all_exp_19", "all_exp_20", "all_exp_21", "all_exp_22", "all_exp_23", "all_exp_24", "all_exp_25", "all_exp_26", "all_exp_27", "all_exp_28", "all_exp_29", "all_exp_30", "all_exp_31", "all_exp_32", "all_exp_33", "all_exp_34", "all_exp_35", "all_exp_36", "all_exp_37", "all_exp_38", "all_exp_39", "all_exp_40", "all_exp_41", "all_exp_42", "all_exp_43", "all_exp_44", "all_exp_45", "all_exp_46", "all_exp_47", "all_exp_48", "all_exp_49", "all_exp_50", "all_exp_51", "all_exp_52", "all_exp_53", "all_exp_54", "all_exp_55", "all_exp_56", "all_exp_57", "all_exp_58", "all_exp_59", "all_exp_60", "all_exp_61", "all_exp_62", "all_exp_63", "all_exp_64", "all_exp_65", "all_exp_66", "all_exp_67", "all_exp_68", "all_exp_69", "all_exp_70" ] exog_women = sm.add_constant(mi_data_women[exog_vars]) CPRT_baseline_womenage.head() mod_women = PanelOLS(mi_data_women.crime_rate_all_violent_p30, exog_women, entity_effects=True, time_effects=True, drop_absorbed=True, singletons=False) res_women = mod_women.fit(cov_type='clustered', cluster=mi_data_women.id_e, singletons=False) CPRT_baseline_womenage_sub = CPRT_baseline_womenage[( CPRT_baseline_womenage['allmk_periode'] == 1)] mi_data2_women = CPRT_baseline_womenage_sub.set_index(["id_a", "id_e_t"]) exog_vars2 = [ "kid012_all", "all_exp_13", "all_exp_14", "all_exp_15", "all_exp_16", "all_exp_17", "all_exp_18", "all_exp_19", "all_exp_20", "all_exp_21", "all_exp_22", "all_exp_23", "all_exp_24", "all_exp_25", "all_exp_26", "all_exp_27", "all_exp_28", "all_exp_29", "all_exp_30", "all_exp_31", "all_exp_32", "all_exp_33", "all_exp_34", "all_exp_35", "all_exp_36", "all_exp_37", "all_exp_38", "all_exp_39", "all_exp_40", "all_exp_41", "all_exp_42", "all_exp_43", "all_exp_44", "all_exp_45", "all_exp_46", "all_exp_47", "all_exp_48", "all_exp_49", "all_exp_50", "all_exp_51", "all_exp_52", "all_exp_53", "all_exp_54", "all_exp_55", "all_exp_56", "all_exp_57", "all_exp_58", "all_exp_59", "all_exp_60", "all_exp_61", "all_exp_62", "all_exp_63", "all_exp_64", "all_exp_65", "all_exp_66", "all_exp_67", "all_exp_68", "all_exp_69", "all_exp_70", "all_exp_71" ] exog2_women = sm.add_constant(mi_data2_women[exog_vars2]) mod2_women = PanelOLS(mi_data2_women.crime_rate_all_violent_p30, exog2_women, entity_effects=True, time_effects=True, drop_absorbed=True, singletons=False) res2_women = mod2_women.fit(cov_type='clustered', cluster=mi_data2_women["id_e"], singletons=False) CPRT_baseline_womenage_sub_sub = CPRT_baseline_womenage[( CPRT_baseline_womenage['all_periode'] == 1)] mi_data3_women = CPRT_baseline_womenage_sub_sub.set_index( ["id_a", "id_e_t"]) exog_vars3 = [ "kid012", "exp_all_13", "exp_all_14", "exp_all_15", "exp_all_16", "exp_all_17", "exp_all_18", "exp_all_19", "exp_all_20", "exp_all_21", "exp_all_22", "exp_all_23", "exp_all_24", "exp_all_25", "exp_all_26", "exp_all_27", "exp_all_28", "exp_all_29", "exp_all_30", "exp_all_31", "exp_all_32", "exp_all_33", "exp_all_34", "exp_all_35", "exp_all_36", "exp_all_37", "exp_all_38", "exp_all_39", "exp_all_40", "exp_all_41", "exp_all_42", "exp_all_43", "exp_all_44", "exp_all_45", "exp_all_46", "exp_all_47", "exp_all_48", "exp_all_49", "exp_all_50", "exp_all_51", "exp_all_52", "exp_all_53", "exp_all_54", "exp_all_55", "exp_all_56", "exp_all_57", "exp_all_58", "exp_all_59", "exp_all_60", "exp_all_61", "exp_all_62", "exp_all_63", "exp_all_64", "exp_all_65", "exp_all_66", "exp_all_67", "exp_all_68", "exp_all_69", "exp_all_70", "exp_all_71" ] exog3_women = sm.add_constant(mi_data3_women[exog_vars3]) mod3_women = PanelOLS(mi_data3_women.crime_rate_all_violent_p30, exog3_women, entity_effects=True, time_effects=True, drop_absorbed=True, singletons=False) res3_women = mod3_women.fit(cov_type='clustered', cluster=mi_data3_women["id_e"], singletons=False) ##Table 5 column 4 women CPRT_baseline_womenage_sub4 = CPRT_baseline_womenage[( CPRT_baseline_womenage['mk_periode'] == 1)] mi_data4_women = CPRT_baseline_womenage_sub4.set_index(["id_a", "id_e_t"]) ##had to delete nr. 71-86 exog_vars4 = [ "MK_kid012", "exp_mk_13", "exp_mk_14", "exp_mk_15", "exp_mk_16", "exp_mk_17", "exp_mk_18", "exp_mk_19", "exp_mk_20", "exp_mk_21", "exp_mk_22", "exp_mk_23", "exp_mk_24", "exp_mk_25", "exp_mk_26", "exp_mk_27", "exp_mk_28", "exp_mk_29", "exp_mk_30", "exp_mk_31", "exp_mk_32", "exp_mk_33", "exp_mk_34", "exp_mk_35", "exp_mk_36", "exp_mk_37", "exp_mk_38", "exp_mk_39", "exp_mk_40", "exp_mk_41", "exp_mk_42", "exp_mk_43", "exp_mk_44", "exp_mk_45", "exp_mk_46", "exp_mk_47", "exp_mk_48", "exp_mk_49", "exp_mk_50", "exp_mk_51", "exp_mk_52", "exp_mk_53", "exp_mk_54", "exp_mk_55", "exp_mk_56", "exp_mk_57", "exp_mk_58", "exp_mk_59", "exp_mk_60", "exp_mk_61", "exp_mk_62", "exp_mk_63", "exp_mk_64", "exp_mk_65", "exp_mk_66", "exp_mk_67", "exp_mk_68", "exp_mk_69", "exp_mk_70" ] exog4_women = sm.add_constant(mi_data4_women[exog_vars4]) mod4_women = PanelOLS(mi_data4_women.crime_rate_all_violent_p30, exog4_women, entity_effects=True, time_effects=True, drop_absorbed=True, singletons=False) res4_women = mod4_women.fit(cov_type='clustered', cluster=CPRT_baseline_womenage_sub["id_e"], singletons=False) return (compare( { 'Full': res_women, 'CC and MK': res2_women, 'CC': res3_women, 'MK': res4_women }, stars=True))
def crime_by_type(df): mi_data = df.set_index(["id_e_t", "id_a"]) exog_vars = [ "kid012_all", "all_exp_13", "all_exp_14", "all_exp_15", "all_exp_16", "all_exp_17", "all_exp_18", "all_exp_19", "all_exp_20", "all_exp_21", "all_exp_22", "all_exp_23", "all_exp_24", "all_exp_25", "all_exp_26", "all_exp_27", "all_exp_28", "all_exp_29", "all_exp_30", "all_exp_31", "all_exp_32", "all_exp_33", "all_exp_34", "all_exp_35", "all_exp_36", "all_exp_37", "all_exp_38", "all_exp_39", "all_exp_40", "all_exp_41", "all_exp_42", "all_exp_43", "all_exp_44", "all_exp_45", "all_exp_46", "all_exp_47", "all_exp_48", "all_exp_49", "all_exp_50", "all_exp_51", "all_exp_52", "all_exp_53", "all_exp_54", "all_exp_55", "all_exp_56", "all_exp_57", "all_exp_58", "all_exp_59", "all_exp_60", "all_exp_61", "all_exp_62", "all_exp_63", "all_exp_64", "all_exp_65", "all_exp_66", "all_exp_67", "all_exp_68", "all_exp_69", "all_exp_70", "all_exp_71", "all_exp_72", "all_exp_73", "all_exp_74", "all_exp_75", "all_exp_76", "all_exp_77", "all_exp_78", "all_exp_79", "all_exp_80", "all_exp_81", "all_exp_82", "all_exp_83", "all_exp_84", "all_exp_85", "all_exp_86" ] exog_baseline_type = sm.add_constant(mi_data[exog_vars]) result_6_1 = PanelOLS(mi_data.crime_rate_violent_p30, exog_baseline_type, entity_effects=True, time_effects=True, drop_absorbed=True, singletons=True) res_6_violent = result_6_1.fit(cov_type='clustered', cluster=mi_data["id_e"]) result_6_2 = PanelOLS(mi_data.crime_rate_freedom_p30, exog_baseline_type, entity_effects=True, time_effects=True, drop_absorbed=True, singletons=True) res_6_freedom = result_6_2.fit(cov_type='clustered', cluster=mi_data["id_e"]) result_6_3 = PanelOLS(mi_data.crime_rate_sexual_p30, exog_baseline_type, entity_effects=True, time_effects=True, drop_absorbed=True, singletons=True) res_6_sexual = result_6_3.fit(cov_type='clustered', cluster=mi_data["id_e"]) result_6_4 = PanelOLS(mi_data.crime_rate_property_p30, exog_baseline_type, entity_effects=True, time_effects=True, drop_absorbed=True, singletons=True) res_6_property = result_6_4.fit(cov_type='clustered', cluster=mi_data["id_e"]) return (compare( { 'violent': res_6_violent, 'freedom': res_6_freedom, 'sexual': res_6_sexual, 'property': res_6_property }, stars=True))
def baseline_results(df): ##first column of baseline mi_data = df.set_index(["id_e_t", "id_a"]) exog_vars = [ "kid012_all", "all_exp_13", "all_exp_14", "all_exp_15", "all_exp_16", "all_exp_17", "all_exp_18", "all_exp_19", "all_exp_20", "all_exp_21", "all_exp_22", "all_exp_23", "all_exp_24", "all_exp_25", "all_exp_26", "all_exp_27", "all_exp_28", "all_exp_29", "all_exp_30", "all_exp_31", "all_exp_32", "all_exp_33", "all_exp_34", "all_exp_35", "all_exp_36", "all_exp_37", "all_exp_38", "all_exp_39", "all_exp_40", "all_exp_41", "all_exp_42", "all_exp_43", "all_exp_44", "all_exp_45", "all_exp_46", "all_exp_47", "all_exp_48", "all_exp_49", "all_exp_50", "all_exp_51", "all_exp_52", "all_exp_53", "all_exp_54", "all_exp_55", "all_exp_56", "all_exp_57", "all_exp_58", "all_exp_59", "all_exp_60", "all_exp_61", "all_exp_62", "all_exp_63", "all_exp_64", "all_exp_65", "all_exp_66", "all_exp_67", "all_exp_68", "all_exp_69", "all_exp_70", "all_exp_71", "all_exp_72", "all_exp_73", "all_exp_74", "all_exp_75", "all_exp_76", "all_exp_77", "all_exp_78", "all_exp_79", "all_exp_80", "all_exp_81", "all_exp_82", "all_exp_83", "all_exp_84", "all_exp_85", "all_exp_86" ] exog_baseline = sm.add_constant(mi_data[exog_vars]) mod = PanelOLS(mi_data.crime_rate_all_violent_p30, exog_baseline, entity_effects=True, time_effects=True, singletons=False) res = mod.fit(cov_type='clustered', clusters=mi_data.id_e, singletons=False) ##second column of baseline results CPRT_baseline_maleage_sub = df[(df['allmk_periode'] == 1)] mi_data2 = CPRT_baseline_maleage_sub.set_index(["id_a", "id_e_t"]) exog_vars2 = [ "kid012_all", "all_exp_13", "all_exp_14", "all_exp_15", "all_exp_16", "all_exp_17", "all_exp_18", "all_exp_19", "all_exp_20", "all_exp_21", "all_exp_22", "all_exp_23", "all_exp_24", "all_exp_25", "all_exp_26", "all_exp_27", "all_exp_28", "all_exp_29", "all_exp_30", "all_exp_31", "all_exp_32", "all_exp_33", "all_exp_34", "all_exp_35", "all_exp_36", "all_exp_37", "all_exp_38", "all_exp_39", "all_exp_40", "all_exp_41", "all_exp_42", "all_exp_43", "all_exp_44", "all_exp_45", "all_exp_46", "all_exp_47", "all_exp_48", "all_exp_49", "all_exp_50", "all_exp_51", "all_exp_52", "all_exp_53", "all_exp_54", "all_exp_55", "all_exp_56", "all_exp_57", "all_exp_58", "all_exp_59", "all_exp_60", "all_exp_61", "all_exp_62", "all_exp_63", "all_exp_64", "all_exp_65", "all_exp_66", "all_exp_67", "all_exp_68", "all_exp_69", "all_exp_70", "all_exp_71", "all_exp_72", "all_exp_73", "all_exp_74", "all_exp_75", "all_exp_76", "all_exp_77", "all_exp_78", "all_exp_79", "all_exp_80", "all_exp_81", "all_exp_82", "all_exp_83", "all_exp_84", "all_exp_85", "all_exp_86" ] exog2 = sm.add_constant(mi_data2[exog_vars2]) mod2 = PanelOLS(mi_data2.crime_rate_all_violent_p30, exog2, entity_effects=True, time_effects=True, singletons=False) res2 = mod2.fit(cov_type='clustered', clusters=mi_data2.id_e, singletons=False) ##third column of baseline results CPRT_baseline_maleage_sub_sub = df[(df['all_periode'] == 1)] CPRT_baseline_maleage_sub_sub = CPRT_baseline_maleage_sub_sub.drop( ['kid012_all'], axis=1) CPRT_baseline_maleage_sub_sub = CPRT_baseline_maleage_sub_sub.rename( columns={"kid012": "kid012_all"}) mi_data3 = CPRT_baseline_maleage_sub_sub.set_index(["id_a", "id_e_t"]) exog_vars3 = [ "kid012_all", "exp_all_13", "exp_all_14", "exp_all_15", "exp_all_16", "exp_all_17", "exp_all_18", "exp_all_19", "exp_all_20", "exp_all_21", "exp_all_22", "exp_all_23", "exp_all_24", "exp_all_25", "exp_all_26", "exp_all_27", "exp_all_28", "exp_all_29", "exp_all_30", "exp_all_31", "exp_all_32", "exp_all_33", "exp_all_34", "exp_all_35", "exp_all_36", "exp_all_37", "exp_all_38", "exp_all_39", "exp_all_40", "exp_all_41", "exp_all_42", "exp_all_43", "exp_all_44", "exp_all_45", "exp_all_46", "exp_all_47", "exp_all_48", "exp_all_49", "exp_all_50", "exp_all_51", "exp_all_52", "exp_all_53", "exp_all_54", "exp_all_55", "exp_all_56", "exp_all_57", "exp_all_58", "exp_all_59", "exp_all_60", "exp_all_61", "exp_all_62", "exp_all_63", "exp_all_64", "exp_all_65", "exp_all_66", "exp_all_67", "exp_all_68", "exp_all_69", "exp_all_70", "exp_all_71", "exp_all_72", "exp_all_73", "exp_all_74", "exp_all_75", "exp_all_76", "exp_all_77", "exp_all_78", "exp_all_79", "exp_all_80", "exp_all_81", "exp_all_82", "exp_all_83", "exp_all_84", "exp_all_85", "exp_all_86" ] exog3 = sm.add_constant(mi_data3[exog_vars3]) mod3 = PanelOLS(mi_data3.crime_rate_all_violent_p30, exog3, entity_effects=True, time_effects=True, singletons=False) res3 = mod3.fit(cov_type='clustered', clusters=mi_data3.id_e, singletons=False) ##4th column CPRT_baseline_maleage_sub4 = df[(df['mk_periode'] == 1)] mi_data4 = CPRT_baseline_maleage_sub4.set_index(["id_a", "id_e_t"]) exog_vars4 = [ "MK_kid012", "exp_mk_13", "exp_mk_14", "exp_mk_15", "exp_mk_16", "exp_mk_17", "exp_mk_18", "exp_mk_19", "exp_mk_20", "exp_mk_21", "exp_mk_22", "exp_mk_23", "exp_mk_24", "exp_mk_25", "exp_mk_26", "exp_mk_27", "exp_mk_28", "exp_mk_29", "exp_mk_30", "exp_mk_31", "exp_mk_32", "exp_mk_33", "exp_mk_34", "exp_mk_35", "exp_mk_36", "exp_mk_37", "exp_mk_38", "exp_mk_39", "exp_mk_40", "exp_mk_41", "exp_mk_42", "exp_mk_43", "exp_mk_44", "exp_mk_45", "exp_mk_46", "exp_mk_47", "exp_mk_48", "exp_mk_49", "exp_mk_50", "exp_mk_51", "exp_mk_52", "exp_mk_53", "exp_mk_54", "exp_mk_55", "exp_mk_56", "exp_mk_57", "exp_mk_58", "exp_mk_59", "exp_mk_60", "exp_mk_61", "exp_mk_62", "exp_mk_63", "exp_mk_64", "exp_mk_65", "exp_mk_66", "exp_mk_67", "exp_mk_68", "exp_mk_69", "exp_mk_70", "exp_mk_71", "exp_mk_72", "exp_mk_73", "exp_mk_74", "exp_mk_75", "exp_mk_76", "exp_mk_77", "exp_mk_78", "exp_mk_79", "exp_mk_80", "exp_mk_81", "exp_mk_82", "exp_mk_83" ] exp_mk4 = [ "exp_mk_13", "exp_mk_14", "exp_mk_15", "exp_mk_16", "exp_mk_17", "exp_mk_18", "exp_mk_19", "exp_mk_20", "exp_mk_21", "exp_mk_22", "exp_mk_23", "exp_mk_24", "exp_mk_25", "exp_mk_26", "exp_mk_27", "exp_mk_28", "exp_mk_29", "exp_mk_30", "exp_mk_31", "exp_mk_32", "exp_mk_33", "exp_mk_34", "exp_mk_35", "exp_mk_36", "exp_mk_37", "exp_mk_38", "exp_mk_39", "exp_mk_40", "exp_mk_41", "exp_mk_42", "exp_mk_43", "exp_mk_44", "exp_mk_45", "exp_mk_46", "exp_mk_47", "exp_mk_48", "exp_mk_49", "exp_mk_50", "exp_mk_51", "exp_mk_52", "exp_mk_53", "exp_mk_54", "exp_mk_55", "exp_mk_56", "exp_mk_57", "exp_mk_58", "exp_mk_59", "exp_mk_60", "exp_mk_61", "exp_mk_62", "exp_mk_63", "exp_mk_64", "exp_mk_65", "exp_mk_66", "exp_mk_67", "exp_mk_68", "exp_mk_69", "exp_mk_70", "exp_mk_71", "exp_mk_72", "exp_mk_73", "exp_mk_74", "exp_mk_75", "exp_mk_76", "exp_mk_77", "exp_mk_78", "exp_mk_79", "exp_mk_80", "exp_mk_81", "exp_mk_82", "exp_mk_83", "exp_mk_84", "exp_mk_85", "exp_mk_86", "exp_mk_87", "exp_mk_88", "exp_mk_89", "exp_mk_90", "exp_mk_91", "exp_mk_92", "exp_mk_93", "exp_mk_94", "exp_mk_95", "exp_mk_96", "exp_mk_97", "exp_mk_98", "exp_mk_99" ] exog4 = sm.add_constant(mi_data4[exog_vars4]) mod4 = PanelOLS(mi_data4.crime_rate_all_violent_p30, exog4, entity_effects=True, time_effects=True, singletons=False) res4 = mod4.fit(cov_type='clustered', clusters=mi_data4.id_e, singletons=False) ##presentation return (compare({ 'Full': res, 'CC and MK': res2, 'CC': res3, 'MK': res4 }, stars=True))