def placebo_reg(df_placebo): # store (exogenous) regressors for first and second stage in a list regr = ['const', 'mean_hvac_share', 'mean_unempl', 'munempldiff', 'mskattekrdiff', 'mbefdiff', 'ekbistandpc_diff', 'dsmastad', 'dstorstad', 'dsv_maj_1', 'dseatsmp_1', 'dseatsop_1', 'flyktingandel_tplus1', 'flyktingandel_tplus2', 'flyktingandel_tplus3'] exog = ['const', 'mean_hvac_share', 'mean_unempl', 'munempldiff', 'mskattekrdiff', 'mbefdiff', 'ekbistandpc_diff', 'dsmastad', 'dstorstad', 'dsv_maj_1', 'dseatsmp_1', 'dseatsop_1'] # first stage reg_placebo1 = mt.reg(df_placebo, y_name = 'nonOECDshare_diff_tplus1', x_name = regr, cluster = 'kommun') reg_placebo2 = mt.reg(df_placebo, y_name = 'nonOECDshare_diff_tplus2', x_name = regr, cluster = 'kommun') reg_placebo3 = mt.reg(df_placebo, y_name = 'nonOECDshare_diff_tplus3', x_name = regr, cluster = 'kommun') # second stage iv_placebo = mt.ivreg(df_placebo, y_name = 'soc_bidr_diff', x_name = ['nonOECDshare_diff_tplus1', 'nonOECDshare_diff_tplus2', 'nonOECDshare_diff_tplus3'], z_name = ['flyktingandel_tplus1', 'flyktingandel_tplus2', 'flyktingandel_tplus3'], w_name = exog, iv_method = '2sls', cluster = 'kommun') # call function from auxiliary file that creates LaTex table fragments for Table 6 get_table6(df_placebo, reg_placebo1, reg_placebo2, reg_placebo3, iv_placebo) # perform joint test of the three placebo treatments in the second stage regression # return p-value of the test pvalue = iv_placebo.Ftest(['nonOECDshare_diff_tplus1', 'nonOECDshare_diff_tplus2', 'nonOECDshare_diff_tplus3'])[1] return(reg_placebo1, reg_placebo2, reg_placebo3, pvalue)
def main_reg(df): # store (exogenous) regressors for first and second stage in a list regressors_base = ['const', 'mean_hvac_share', 'mean_unempl', 'munempldiff', 'mskattekrdiff', 'mbefdiff', 'ekbistandpc_diff', 'dsmastad', 'dstorstad', 'dsv_maj_1', 'dseatsmp_1', 'dseatsop_1', 'panel8891', 'panel9194', 'flyktingandel'] exog_base = ['const', 'mean_hvac_share', 'mean_unempl', 'munempldiff', 'mskattekrdiff', 'mbefdiff', 'ekbistandpc_diff', 'dsmastad', 'dstorstad', 'dsv_maj_1', 'dseatsmp_1', 'dseatsop_1', 'panel8891', 'panel9194'] ### standard errors clustered at municipality level # first stage reg1 = mt.reg(df, y_name = 'nonOECDshare_diff', x_name = regressors_base, cluster = 'kommun') # second stage iv1 = mt.ivreg(df, y_name = 'soc_bidr_diff', x_name = 'nonOECDshare_diff', z_name = 'flyktingandel', w_name = exog_base, iv_method = '2sls', cluster = 'kommun') ### standard errors clustered at county level # first stage reg2 = mt.reg(df, y_name = 'nonOECDshare_diff', x_name = regressors_base, cluster = 'countykod') # second stage iv2 = mt.ivreg(df, y_name = 'soc_bidr_diff', x_name = 'nonOECDshare_diff', z_name = 'flyktingandel', w_name = exog_base, iv_method = '2sls', cluster = 'countykod') # call function from auxiliary file that creates Latex table fragments for Table 2 get_table2(df, reg1, iv1, reg2, iv2) # perform F-test for relevance of the instrument F_stat1 = reg1.Ftest('flyktingandel')[0] F_stat2 = reg2.Ftest('flyktingandel')[0] return(reg1, iv1, reg2, iv2, F_stat1, F_stat2)
def reg1(year,data,ss,control): for i in data["year"]: if i == year: reg= mt.reg(data.loc[(data["year"]==i)], # DataFrame to use 'lis_share', # Outcome pd.concat([data[[ss]],control]), # Indep. Variables fe_name='wkr', # Fixed-effects cluster='wkr') # Cluster return reg.beta[ss], reg.se[ss], reg.r2, reg.N,reg.Ftest(ss)
def setup_class(cls): """Stata reg output from `sysuse auto; reg price mpg`""" test_path = path.split(path.relpath(__file__))[0] auto_path = path.join(test_path, 'data', 'auto.dta') autodata = pd.read_stata(auto_path) y = 'price' x = ['mpg', 'length'] cls.result = reg(autodata, y, x, cluster='gear_ratio', addcons=True) cls.expected = ols_cluster
def reg4(year,data,ss,control): for i in data["year"]: if i == year: reg= mt.reg(data.loc[(data["year"]==i)], # DataFrame to use "dNazi_share", # Outcome pd.concat([data[[ss]],control]), # Indep. Variables fe_name="wkr", # Fixed-effects cluster='wkr') return reg.beta[ss],reg.se[ss], reg.N,reg.r2
def setup_class(cls): """Stata reg output from `sysuse auto; reg price mpg`""" cls.init(cls) cls.precision['vce'] = 6 test_path = path.split(path.relpath(__file__))[0] auto_path = path.join(test_path, 'data', 'auto.dta') autodata = pd.read_stata(auto_path) y = ['price'] x = ['mpg', 'length'] cls.result = reg(autodata, y, x, addcons=True) cls.expected = ols_std
def ghat_of_x(y, x, x0, h, degree, kernel): K = kernel_func(x - x0, h, kernel) if _sparse_data(K, degree): return np.nan X = _make_X(x, x0, degree) x_name = ['cons'] + ['x{}'.format(i) for i in range(1, degree + 1)] df = pd.DataFrame(np.hstack((y.reshape(-1, 1), X, K.reshape(-1, 1))), columns=['y'] + x_name + ['k']) res = reg(df, 'y', x_name, awt_name='k') beta = res.beta # plot_this(y, x, K, X, res) # XXX tmp, diagnostic return beta['cons']
def reg2(year,data,ss,control): for i in data["year"]: if i == year: locat = ["pops", "pops_2", "pops_3", "pops_4", "pops_5","bigd","city","altitude"] socioe= ["c25juden_share","c25kath_share", "c25arbei_share", "c25anges_share", "war_per1000","in_welfare_per1000", "sozialrentner_per1000" ,"logtaxprop"] elect = ["turnout24" ,"vote24", "nsfb24" , "spd24","z24"] reg= mt.reg(data.loc[(data["year"]==i)], # DataFrame to use ss, # Outcome control, # Indep. Variables fe_name="wkr", # Fixed-effects cluster='wkr') # Cluster return reg.beta["bigd"], reg.se["bigd"],reg.beta["altitude"], reg.se["altitude"],\ reg.Ftest(locat), reg.Ftest(socioe),reg.Ftest(elect),\ reg.r2, reg.N
def sens1_reg(df, df_sens1): # run regression without dummy for large-sized municipal population as we excluded big city counties # otherwise all values of that variable are zero and we cannot invert the regressor matrix regressors_sens1 = ['const', 'mean_hvac_share', 'mean_unempl', 'munempldiff', 'mskattekrdiff', 'mbefdiff', 'ekbistandpc_diff', 'dsmastad', 'dsv_maj_1', 'dseatsmp_1', 'dseatsop_1', 'panel8891', 'panel9194', 'flyktingandel'] exog_sens1 = ['const', 'mean_hvac_share', 'mean_unempl', 'munempldiff', 'mskattekrdiff', 'mbefdiff', 'ekbistandpc_diff', 'dsmastad', 'dsv_maj_1', 'dseatsmp_1', 'dseatsop_1', 'panel8891', 'panel9194'] # first stage reg_sens1 = mt.reg(df_sens1, y_name = 'nonOECDshare_diff', x_name = regressors_sens1, cluster = 'kommun') # second stage iv_sens1 = mt.ivreg(df_sens1, y_name = 'soc_bidr_diff', x_name = 'nonOECDshare_diff', z_name = 'flyktingandel', w_name = exog_sens1, iv_method = '2sls', cluster = 'kommun') # call function from auxiliary file that creates LaTex table fragments for Table 7 get_table7(df, reg_sens1, iv_sens1) return(reg_sens1, iv_sens1)
def regress(dependent_variable, dataframe, degree): """Regress the dependent variables on covariates (independent variables). Args: dependent_variable (float): the independent variable dataframe (pd.DataFrame): the dataframe of full sample, narrow window, and wide window degree (integer): degree of polynomials Returns: regression result(result) """ reg = mt.reg(dataframe, f"{dependent_variable}", get_covariates(degree), cluster="score") return reg
def first_stage(self, exogenous_varname, endogenous_varname, IV_varname): '''Check the first stage for IVs All input must be lists ''' import econtools import econtools.metrics as mt # get data df = self.products.sort_values(['market_id', 'product_id']).copy() x_varnames = exogenous_varname + IV_varname # first stage reg # 1. partial out x: #for y_varname in endogenous_varname: # result = mt.reg(df, y_varname, exogenous_varname) # df['{}_ddot'.format(y_varname)] = df[y_varname] - df[exogenous_varname][email protected] #IV_varname_ddot = [] #for iv_varname in IV_varname: # result = mt.reg(df, iv_varname, exogenous_varname) # df['{}_ddot'.format(iv_varname)] = df[iv_varname] - df[exogenous_varname][email protected] # IV_varname_ddot = IV_varname_ddot + ['{}_ddot'.format(iv_varname)] # 2. First stage regression for y_varname in endogenous_varname: #result = mt.reg(df, '{}_ddot'.format(y_varname), IV_varname_ddot) result = mt.reg(df, y_varname, x_varnames) joint_F = result.Ftest(IV_varname) print('#=======================================================') print('Endogenous var: {} '.format(y_varname)) print(' IV: {} '.format(IV_varname)) print(' ') print('Ftest = {}'.format(joint_F)) print(' ') print('regression:') print(result) print(' ') return
def sens2_reg(df, df_sens2): # run regression without dummy for panel period 1991/94 as we excluded the observations of that panel # otherwise all values of that variable are zero and we cannot invert the regressor matrix regressors_sens2 = ['const', 'mean_hvac_share', 'mean_unempl', 'munempldiff', 'mskattekrdiff', 'mbefdiff', 'ekbistandpc_diff', 'dsmastad', 'dstorstad', 'dsv_maj_1', 'dseatsmp_1', 'dseatsop_1', 'panel8891', 'flyktingandel'] exog_sens2 = ['const', 'mean_hvac_share', 'mean_unempl', 'munempldiff', 'mskattekrdiff', 'mbefdiff', 'ekbistandpc_diff', 'dsmastad', 'dstorstad', 'dsv_maj_1', 'dseatsmp_1', 'dseatsop_1', 'panel8891'] # first stage reg_sens2 = mt.reg(df_sens2, y_name = 'nonOECDshare_diff', x_name = regressors_sens2, cluster = 'kommun') # second stage iv_sens2 = mt.ivreg(df_sens2, y_name = 'soc_bidr_diff', x_name = 'nonOECDshare_diff', z_name = 'flyktingandel', w_name = exog_sens2, iv_method = '2sls', cluster = 'kommun') # call function from auxiliary file that creates LaTex table fragments for Table 8 get_table8(df, reg_sens2, iv_sens2) # perform F-test for relevance of the instrument F_stat = reg_sens2.Ftest('flyktingandel')[0] return(reg_sens2, iv_sens2, F_stat)
def _OLS_second_stage(self, df): res = mt.reg(df, 'log_PA_PB', ['const', 'NA_NB_diff']) return res
def _OLS_first_stage(self, df): res = mt.reg(df, 'log_PB_P0', ['const', 'X_m', 'N_B', 'N_A']) return res
print(df.head()) df = df.dropna() df = df[(df != 0).all(1)] df = df[df.year < 2017] df = df[df.year > 1972] print(df.head()) df['leadGDP'] = np.log(df.gdppercapitaconstantlcunygdppcap.shift(-5)) print(df.head()) #creatiing growth10 var df['growth5'] = (df.leadGDP) - np.log(df.gdppercapitaconstantlcunygdppcap) y = 'growth5' X = ['Competitive_Rise_Country', 'Competitive_Decline_Country'] fe_var = 'country' print(df.head()) results = mt.reg( df, # DataFrame y, # Dependent var (string) X, # Independent var(s) (string or list of strings) fe_name=fe_var, addcons=True # Adds a constant term ) print(results)
import pandas as pd import numpy as np from econtools.metrics import reg X = pd.DataFrame(np.stack([np.array([1]*10),np.array([0,1]*5),np.array([1,0]*5)]).T) beta = np.array([1,2,3]) y = X@beta data = pd.DataFrame(X) data.columns = ['cons','x1','x2'] data['y'] = y # This is fine reg(data,'y',['cons','x1']) # This should not be fine reg(data,'y',['cons','x1','x2'])