cluster_cols_var = 'msamd' FE_cols_vars = ['fips', 'cert'] how = 'fips, cert' # Set File names ## OLS file = 'Results/lpm_results_{}.{}' #------------------------------------------------------------ # Run benchmark Model #------------------------------------------------------------ # Loop over all years for year in range(2004, 2019 + 1): # Set df df = dd_main[dd_main.date == year].compute(scheduler='threads') df.reset_index(drop=True, inplace=True) # Run results_benchmark = MultiDimensionalOLS().fit(df[y], df[x],\ cov_type = cov_type, cluster_cols = df[cluster_cols_var],\ transform_data = True, FE_cols = df[FE_cols_vars], how = how) # Transform to pandas df df_results_benchmark = results_benchmark.to_dataframe() # Save to excel and csv df_results_benchmark.to_excel(file.format(year, 'xlsx')) df_results_benchmark.to_csv(file.format(year, 'csv')) del df, results_benchmark #, ols_benchmark
file = 'Robustness_checks/Distance_robust_benchmark_IV_{}.{}' file_1019 = 'Robustness_checks/Distance_robust_benchmark_IV_{}_1019.{}' #------------------------------------------------------------ # Perform 2SLS #------------------------------------------------------------ #------------------------------------------------------------ #------------------------------------------------------------ # FULL SAMPLE #------------------------------------------------------------ # First Stage ## Compute first stage results results_fs = MultiDimensionalOLS().fit(df_trans[x_endo], df_trans[[z] + x_exo],\ cov_type = cov_type, cluster_cols = df_trans[cluster_cols_var]) ## Transform to df df_results_fs = results_fs.to_dataframe() ''' TURN ON IF MULTIPLE INSTRUMENTS ## Perform partial f-test ### First calculate results without instrument results_fs_noinstr = MultiDimensionalOLS().fit(df_trans[x_endo], df_trans[x_exo],\ cov_type = cov_type, cluster_cols = df_trans[cluster_cols_var]) ### Do f-test f_stat = ((results_fs_noinstr.rss - results_fs.rss) / 1) / results_fs.mse_resid ''' ## Save to csv df_results_fs.to_csv(file.format('fs', 'csv'))
cluster_cols_var = 'msamd' FE_cols_vars = ['fips', 'cert'] how = 'fips, cert' # Set File names file_local = 'Results/ratespread_results_local.{}' file_local_year = 'Results/ratespread_results_local_{}.{}' #------------------------------------------------------------ # Run Model #------------------------------------------------------------ # Run ## Local results_local = MultiDimensionalOLS().fit(df[y], df[x_local],\ cov_type = cov_type, cluster_cols = df[cluster_cols_var],\ transform_data = True, FE_cols = df[FE_cols_vars], how = how) ### Transform to pandas df df_results_local = results_local.to_dataframe() ### Save to excel and csv df_results_local.to_excel(file_local.format('xlsx')) df_results_local.to_csv(file_local.format('csv')) del results_local ## 2018 and 2019 separate for year in range(2018, 2019 + 1): # Set df df_year = df[df.date == year]
elif counter == 3: x = ls_num + x_vw elif counter == 4: x = ls_num + x_vw + x_z + ['intercept'] ## Set FE and how if counter in (0,1): FE_cols = FE_cols_vars_t h = how_t else: FE_cols = FE_cols_vars h = how # Run Model if counter == 4: # pooled OLS results = MultiDimensionalOLS().fit(data[y], data[x], cov_type = cov_type, cluster_cols = data[cluster_cols_var]) else: results = MultiDimensionalOLS().fit(data[y], data[x],\ cov_type = cov_type, cluster_cols = data[cluster_cols_var],\ transform_data = True, FE_cols = data[FE_cols], how = h) if counter == 2: results_split = copy.deepcopy(results) # Transform results to pd.DataFrame df_results = results.to_dataframe() # Add count for msamd en cert df_results['msamd'] = msamd df_results['cert'] = cert # Save to csv
# Set File names file = 'Results/Distance_results_benchmark.{}' file_1019 = 'Results/Distance_results_benchmark_1019.{}' #------------------------------------------------------------ # Run Model #------------------------------------------------------------ # Run ## Full ''' results_local = MultiDimensionalOLS().fit(df[y], df[x],\ cov_type = cov_type, cluster_cols = df[cluster_cols_var],\ transform_data = True, FE_cols = df[FE_cols_vars], how = how) ''' results = MultiDimensionalOLS().fit(df_trans[y], df_trans[x],\ cov_type = cov_type, cluster_cols = df_trans[cluster_cols_var]) ### Transform to pandas df df_results = results.to_dataframe() ### Save to excel and csv df_results.to_excel(file.format('xlsx')) df_results.to_csv(file.format('csv')) ## > 2009 results = MultiDimensionalOLS().fit(df_trans_1019[y], df_trans_1019[x],\ cov_type = cov_type, cluster_cols = df_trans_1019[cluster_cols_var]) ### Transform to pandas df df_results = results.to_dataframe()
how1819 = 'fips, cert' # Set File names file1 = 'Robustness_checks/Distance_robust_cdd.{}' file2 = 'Robustness_checks/Distance_robust_remote.{}' file3 = 'Robustness_checks/Distance_robust_lssplit.{}' file4 = 'Robustness_checks/Distance_robust_lsever.{}' file5 = 'Robustness_checks/Distance_robust_loancosts.{}' #------------------------------------------------------------ # Run Model #------------------------------------------------------------ # Run for y, x, file in zip(y_lst, [x0, x0, x1, x2], [file1, file2, file3, file4]): results = MultiDimensionalOLS().fit(df_trans[y], df_trans[x],\ cov_type = cov_type, cluster_cols = df_trans[cluster_cols_var]) ### Transform to pandas df df_results = results.to_dataframe() ## Do wald test for file3 if file == file3: R = pd.DataFrame([[1, -1, 0], [0, 1, -1]]) h_beta = R @ pd.DataFrame(results.params[:3]) C = results.nobs * results.cov.iloc[:3, :3] test_stat = results.nobs * h_beta.T @ np.linalg.inv( R @ C @ R.T) @ h_beta ## F test pval_wald = stats.chi2.sf(test_stat, R.shape[0])
cert = df.cert.nunique() # Set File names ## OLS file = 'Results/Benchmark_results.{}' file_nc = 'Results/Benchmark_results_nc.{}' file_nlen = 'Results/Benchmark_results_nlen.{}' #------------------------------------------------------------ # Run benchmark Model #------------------------------------------------------------ # Run ## NOTE: remove msat-invariant variables results_benchmark = MultiDimensionalOLS().fit(df[y], df[x],\ cov_type = cov_type, cluster_cols = df[cluster_cols_var],\ transform_data = True, FE_cols = df[FE_cols_vars], how = how) ## OLS Benchmark ### First demean the data (because we cannot add a constant) df_demean = df - df.mean() ### Run benchmark ols_benchmark = MultiDimensionalOLS().fit(df_demean[y], df_demean[x],\ cov_type = cov_type, cluster_cols = df[cluster_cols_var]) # Hausman test haus_H, haus_pval, haus_dof = HausmanSpecificationTest(results_benchmark.params,\ ols_benchmark.params, results_benchmark.cov,\ ols_benchmark.cov)
file_fe_tcert_lsint_res = 'Results/Results_fe_tcert_lsint_res.{}' ## RE: MSAT, Lender file_re_msatcert_ls_full = 'Results/Results_re_msatcert_ls_full.{}' file_re_msatcert_ls_res = 'Results/Results_re_msatcert_ls_res.{}' file_re_msatcert_int_res = 'Results/Results_re_msatcert_int_res.{}' file_re_msatcert_lsint_res = 'Results/Results_re_msatcert_lsint_res{}' #------------------------------------------------------------ # 1) Pooled OLS -- loan sales + controls -- Full sample #------------------------------------------------------------ # Run results_ols_ls_full = MultiDimensionalOLS().fit( df_full[y_var], df_full[x_ls_var + ['intercept']], cov_type=cov_type, cluster_cols=df_full[cluster_cols_var]) # Transform to pandas df df_results_ols_ls_full = results_ols_ls_full.to_dataframe() # Add count for msamd en cert df_results_ols_ls_full['msamd'] = msamd_full df_results_ols_ls_full['cert'] = cert_full # Save to excel and csv df_results_ols_ls_full.to_excel(file_ols_ls_full.format('xlsx')) df_results_ols_ls_full.to_csv(file_ols_ls_full.format('csv')) #------------------------------------------------------------
# File names ## 1819 file_1819 = 'Robustness_checks/Benchmark_techinno_1819_{}.{}' ## 1319 file_1319 = 'Robustness_checks/Benchmark_techinno_1319_{}.{}' #------------------------------------------------------------ # Run Model #------------------------------------------------------------ # Run model 1819 for x, i in zip(x1819, range(len(x1819))): results_1819 = MultiDimensionalOLS().fit(df1819[y], df1819[x + x_rest],\ cov_type = cov_type1819, cluster_cols = df1819[cluster_cols_var1819],\ transform_data = True, FE_cols = df1819[FE_cols_vars1819], how = how1819) ## Transform to pandas df df_results_1819 = results_1819.to_dataframe() ## Save to excel and csv df_results_1819.to_excel(file_1819.format(i, 'xlsx')) df_results_1819.to_csv(file_1819.format(i, 'csv')) # Run model 1819 for x, i in zip(x1319, range(len(x1319))): results_1319 = MultiDimensionalOLS().fit(df1319[y], df1319[x + x_rest],\ cov_type = cov_type1319, cluster_cols = df1319[cluster_cols_var1319]) ## Transform to pandas df