def main(): state = pd.read_spss('/Users/kellenbullock/Desktop/Geographic Analysis II/Data/5303_EX_A.sav') Counties = state.query("Scale == 'Counties'") Schools = state.query("Scale == 'Schools'") Tracts = state.query("Scale == 'Tracts'") assigned_var_c = Counties[['Pct_Black', 'Pct_Two_Plus', 'Pct_SNAP', 'Pct_FIRE_I', 'Pct_Poverty', 'Pct_Unemp', 'Med_HomeValue', 'Pct_White', 'Pct_BlueCollar_O', 'Pct_Hispanic']] assigned_var_s = Schools[['Pct_Black', 'Pct_Two_Plus', 'Pct_SNAP', 'Pct_FIRE_I', 'Pct_Poverty', 'Pct_Unemp', 'Med_HomeValue', 'Pct_White', 'Pct_BlueCollar_O', 'Pct_Hispanic']] assigned_var_t = Tracts[['Pct_Black', 'Pct_Two_Plus', 'Pct_SNAP', 'Pct_FIRE_I', 'Pct_Poverty', 'Pct_Unemp', 'Med_HomeValue', 'Pct_White', 'Pct_BlueCollar_O', 'Pct_Hispanic']] assigned_var_t = assigned_var_t.reset_index() assigned_var_s = assigned_var_s.reset_index() assigned_var_s = assigned_var_s.drop(columns=['index']) assigned_var_t = assigned_var_t.drop(columns=['index']) # New Variables var_c = Counties[['Pct_Unemp', 'Med_HomeValue', 'Pct_White', 'Pct_BlueCollar_O', 'Pct_Hispanic']] var_s = Schools[['Pct_Unemp', 'Med_HomeValue', 'Pct_White', 'Pct_BlueCollar_O', 'Pct_Hispanic']] var_t = Tracts[['Pct_Unemp', 'Med_HomeValue', 'Pct_White', 'Pct_BlueCollar_O', 'Pct_Hispanic']] var_t = var_t.reset_index() var_s = var_s.reset_index() var_t = var_t.drop(columns=['index']) var_s = var_s.drop(columns=['index']) # 1. #EDA.figures(var_c, 'Counties') #EDA.figures(var_t, 'Tracts') #EDA.figures(var_s, 'Schools') print('=====County=======') #EDA.Descrptives(Counties, var_c) print('======== Tracts =========') #EDA.Descrptives(Tracts, var_t) print('====== School Districts =======') #EDA.Descrptives(Schools, var_s) # 1 part b: #test_trans(assigned_var_c, 'Pct_Black') print('************') #test_trans(assigned_var_c, 'Pct_Hispanic') # 2, # This will make a pearson's r correlation matrix at the County scale # These do not work in the spyder IDE. Please see the jupyter notebook for outputs. corr = assigned_var_c.corr() corr.style.background_gradient(cmap='coolwarm').set_precision(3) EDA.df_to_pdf(corr, 'Matrix_1') ''' Easy way to order Correlations but without signs: correlations = assigned_var_c.corr().abs() stack = correlations.unstack() stack_order = s.sort_values(kind='quicksort') ''' # 2. a. 8 strongest Person's correaltions. There was no easy way to do this I had to pull everythong by hand. corr_table = { 'Variables': ['Pct_Poverty / Pct_SNAP', 'Pct_Unemp / Pct_SNAP', 'Pct_White / Pct_Unemp', 'Pct_White / Pct_SNAP', 'Pct_Unemp / Pct_Poverty', 'Pct_White / Pct_Two_Plus', 'Pct_White / Pct_Poverty', 'Pct_Fire_I / Pct_BlueCollar_O'], 'Correlation': [0.757, 0.722, - 0.672, - 0.645, 0.605, - 0.599, - 0.593, - 0.574] } all_scale = { 'Variables': ['Pct_Poverty / Pct_SNAP', 'Pct_Unemp / Pct_SNAP', 'Pct_White / Pct_Unemp', 'Pct_White / Pct_SNAP', 'Pct_Unemp / Pct_Poverty', 'Pct_White / Pct_Two_Plus', 'Pct_White / Pct_Poverty', 'Pct_Fire_I / Pct_BlueCollar_O'], 'County': [0.757, 0.722, - 0.672, - 0.645, 0.605, - 0.599, - 0.593, - 0.574], 'Tract': [0.773, 0.679, -0.474, -0.564, 0.628, -0.296, -0.495, -0.584], 'School District': [0.732, 0.590, -0.440, -0.515, 0.494, -0.569, -0.442, -0.380] } pearson_r_table = pd.DataFrame(corr_table) pearson_r_table.to_excel('County Correlation.xlsx') EDA.df_to_pdf(pearson_r_table, 'Decending_corr') all_scale = pd.DataFrame(all_scale) all_scale.to_excel('Scale_Correlation.xlsx') EDA.df_to_pdf(all_scale, 'Scale_Correlation') # 3. assigned_c_repub = Counties[['Pct_Repub', 'Pct_Black', 'Pct_Two_Plus', 'Pct_SNAP', 'Pct_FIRE_I', 'Pct_Poverty', 'Pct_Unemp', 'Med_HomeValue', 'Pct_White', 'Pct_BlueCollar_O', 'Pct_Hispanic']] assigned_c_repub = assigned_c_repub.reset_index() assigned_c_repub = assigned_c_repub.drop(columns=['index']) assigned_c_repub['Pct_Black'] = Transformations.log_trans(assigned_c_repub, 'Pct_Black') assigned_c_repub['Pct_Hispanic'] = Transformations.log_trans(assigned_c_repub, 'Pct_Hispanic') corr2 = assigned_c_repub.corr() corr2.style.background_gradient(cmap='coolwarm').set_precision(3) EDA.df_to_pdf(corr2, 'Matrix_2') # c. chosen = corr2[['Pct_Repub', 'Pct_Poverty', 'Pct_Unemp', 'Pct_White', 'Pct_SNAP']] x = ['Pct_Repub', 'Pct_Poverty', 'Pct_Unemp', 'Pct_White', 'Pct_SNAP'] y = 'Pct_Repub' corr_scatter(chosen, x, y) # c. 4 scatter plots with y axis being pct_republican chosen = assigned_c_repub[['Pct_Repub', 'Pct_Poverty', 'Pct_Unemp', 'Pct_White', 'Pct_SNAP']] # Running partial correlation 4 times: Partial_Corr.partial_corr(chosen[['Pct_Repub', 'Pct_Poverty']]) Partial_Corr.partial_corr(chosen[['Pct_Repub', 'Pct_White']]) Partial_Corr.partial_corr(chosen[['Pct_Repub', 'Pct_SNAP']]) Partial_Corr.partial_corr(chosen[['Pct_Repub', 'Pct_Unemp']]) # 5. bivar_regres(chosen['Pct_Repub'], chosen['Pct_Poverty']) # Model 1 bivar_regres(chosen['Pct_Repub'], chosen['Pct_SNAP']) # Model 2 bivar_regres(chosen['Pct_Repub'], chosen['Pct_White']) # Model 3 bivar_regres(chosen['Pct_Repub'], chosen['Pct_Unemp']) # Model 4