bvap = 'H74004' vap = 'H74001' blocks['GISJOIN'] = 'G' + blocks['STATEFP10'] + blocks['COUNTYFP10'].apply( lambda x: x.zfill(4)) + blocks['TRACTCE10'].apply( lambda x: x.zfill(7)) + blocks['BLOCKCE'].apply(lambda x: x.zfill(4)) blocks_w_race = blocks.merge(race.loc[:, ['GISJOIN', bvap, vap]], on='GISJOIN') blocks_w_race = blocks_w_race.rename(columns={bvap: 'BVAP', vap: 'VAP'}) blocks_w_race = blocks_w_race.astype({'BVAP': int, 'VAP': int}) # 2) Load precinct data with election result, merge them with census block data precincts = gpd.read_file(BH_precincts) precincts_w_race = ai.aggregate(blocks_w_race, precincts, source_columns=['BVAP', 'VAP'], method='greatest_area')[1] # 3) load House geographies, assign districts to precincts, filte house = gpd.read_file(house_districts) precincts_w_race_and_districts = ai.aggregate(precincts_w_race, house, target_columns=['NAME'])[0] # filter to only include precincts in affected and adjacent districts according to VPAP # https://www.vpap.org/visuals/visual/ruling-could-impact-1-3-house-districts/ affected = [63, 69, 70, 71, 74, 77, 80, 89, 90, 92, 95] adjacent = [ 27, 55, 61, 62, 64, 66, 68, 72, 73, 75, 76, 78, 79, 81, 83, 85, 91, 93, 94, 96, 97, 100
import areal_interpolation as ai import geopandas as gpd import maup import matplotlib small = gpd.read_file( '/Users/hopecj/projects/gerryspam/MO/dat/tabblock2010_29_pophu/tabblock2010_29_pophu.shp' ) large = gpd.read_file( '/Users/hopecj/projects/gerryspam/MO/dat/tl_2013_29_bg/tl_2013_29_bg.shp') # large = gpd.read_file('/Users/hopecj/projects/gerryspam/MO/dat/mo_prec_labeled/mo_prec_labeled_nopop.shp') small = small.to_crs(large.crs) # target_columns should be name of disaggregation column from `large` # start time: 12:31 pm # end time: 1:30 pm agg = ai.aggregate(small, large, target_columns=['GEOID'])[0] # this takes a while! agg = agg[[ 'STATEFP10', 'COUNTYFP10', 'TRACTCE10', 'BLOCKCE', 'BLOCKID10', 'PARTFLG', 'HOUSING10', 'POP10', 'GEOID', 'geometry' ]] agg.to_file( '/Users/hopecj/projects/gerryspam/MO/dat/blocks_with_bg/blocks_with_bg.shp' )
#nm = gpd.read_file('/Volumes/GoogleDrive/Shared drives/princeton_gerrymandering_project/mapping/NC/Shapefiles/precincts/2016/nc_2016.shp') large = gpd.read_file( '/Volumes/GoogleDrive/Shared drives/princeton_gerrymandering_project/mapping/MI/Counties/Counties_v17a/Counties_v17a.shp' ) #nm = gpd.read_file('/Users/hwheelen/Desktop/VA/VA_Leg_Enacted_BH.shp’) small = small.to_crs(large.crs) # ============================================================================= # for col in cols: # small[col] = small[col].astype(int) # ============================================================================= #small = agg.rename(columns = {'Dist_Name':'SD'}) agg = ai.aggregate(small, large, target_columns=['FIPSCODE', 'NAME'])[0] # this takes a while agg = agg[[ 'OBJECTID', 'ShapeSTAre', 'ShapeSTLen', 'FIPSCODE', 'NAME', 'PRECINCTID', 'ELECTIONYE', 'COUNTYFIPS', 'MCDFIPS', 'WARD', 'PRECINCT', 'geometry', ]]
blocks['GISJOIN'] = 'G' + blocks['STATEFP10'] + blocks['COUNTYFP10'].apply(lambda x: x.zfill(4)) + blocks['TRACTCE10'].apply(lambda x: x.zfill(7)) + blocks['BLOCKCE'].apply(lambda x: x.zfill(4)) blocks_w_race = blocks.merge(race.loc[:, ['GISJOIN', bvap, vap]], on='GISJOIN') blocks_w_race = blocks_w_race.rename(columns={bvap: 'BVAP', vap: 'VAP'}) blocks_w_race = blocks_w_race.astype({'BVAP': int, 'VAP': int}) for mapname in maps: df = gpd.read_file(maps[mapname]['path']) df = df.rename(columns={maps[mapname]['district_colname']: common_colname}) df[common_colname] = df[common_colname].astype(str) df = df[df[common_colname].isin(bh)] df = ai.aggregate(blocks_w_race, df, source_columns=['BVAP', 'VAP'], method='greatest_area')[1] df['prop_BVAP_' + mapname] = df['BVAP'] / df['VAP'] df = df.rename(columns={'BVAP': 'BVAP_' + mapname, 'VAP': 'VAP_' + mapname df.loc[df[common_colname].isin([str(i) for i in affected]), 'status'] = affected_label df.loc[df[common_colname].isin([str(i) for i in adjacent]), 'status'] = adjacent_label maps[mapname]['df'] = df keys = list(maps) mapname = keys[0] df = maps[mapname]['df'][[common_colname, 'status'] + [i + '_' + mapname for i in ['BVAP', 'VAP', 'prop_BVAP']]] for mapname in keys[1:]: df = df.merge(maps[mapname]['df'][[common_colname] + [i + '_' + mapname for i in ['BVAP', 'VAP', 'prop_BVAP']]],
import matplotlib small = gpd.read_file( '/Volumes/GoogleDrive/Shared drives/princeton_gerrymandering_project/States and local partners/New York/Nassau County/demographics/Nassau_blocks_pop10.shp' ) large = gpd.read_file( '/Volumes/GoogleDrive/Shared drives/princeton_gerrymandering_project/States and local partners/New York/Nassau County/Matching Shapes and Elecs/second try/Nassau18Gen.shp' ) large = large.to_crs(small.crs) cols = [ 'tot', 'NHwhite', 'NHblack', 'hispanic', 'totVAP', 'WVAP', 'BVAP', 'HVAP' ] small[cols] = small[cols].astype(float) #nm = gpd.read_file('/Users/hwheelen/Desktop/VA/VA_Leg_Enacted_BH.shp’) aggregated = ai.aggregate(small, large, source_columns=cols)[1] #check that totals match small_total = small['tot'].sum() print('old tot', small_total) agg_total = aggregated['tot'].sum() print('new tot', agg_total) aggregated.to_file( '/Volumes/GoogleDrive/Shared drives/princeton_gerrymandering_project/States and local partners/New York/Nassau County/Precincts Elecs and Demographics/Nassau18GenDem.shp' )
] #, '42', '45', '46', '47', '48', '49', '50', '51', '53', '54', '55', '56'] #codelist=[ '21','30','41','44'] #codelist=['42'] #36,48 df_cd = gpd.read_file('cd_us.shp') for code in codelist: df_vtd = gpd.read_file( os.path.join('/home/zachary/Downloads/zips/tl_2013_' + code + '_cousub.shp')) df_vtd['dummy'] = 1 print("aggregating") (df_vtd, new_tar) = areal.aggregate(df_vtd, df_cd, ['dummy'], ['STATECD']) print("sorting") df_vtd.sort_values('STATECD') dists = list(set(df_vtd.loc[:, 'STATECD'])) dists.sort() rWlist = [] print("doing spatial indx") for d in dists: m = df_vtd.loc[df_vtd.loc[:, 'STATECD'] == d] f = open('spatial_indexes/' + code + '_' + d + '_idx.txt', 'w') for ind, row in m.iterrows(): print(ind) f.write(row['GEOID'] + ',' + str(m.centroid[ind].x) + ',' +
maps['SM_court_order']['df'] = maps['SM_court_order']['df'].append(maps['enacted']['df'].loc[~maps['enacted']['df'][common_colname].isin(maps['SM_court_order']['df'][common_colname])], sort=True, ignore_index=True).sort_values(common_colname) #%% # get census block geography with BVAP and VAP data precincts = gpd.read_file('Maps/VA Precincts/Precincts with CD/Elections/VA_Precincts_CD_and_Elections.shp') vote_cols = ['G18DHOR','G18DSEN', 'G18OHOR', 'G18OSEN', 'G18RHOR', 'G18RSEN', 'G17DGOV', 'G17DLTG', 'G17DATG', 'G17DHOD', 'G17RGOV', 'G17RLTG', 'G17RATG', 'G17RHOD', 'G17OHOD', 'G17OGOV', 'G16DPRS', 'G16RPRS', 'G16OPRS', 'G16DHOR', 'G16RHOR', 'G16OHOR'] precincts[vote_cols] = precincts[vote_cols].astype(float) for mapname in maps: df = maps[mapname]['df'] df['map'] = mapname df = ai.aggregate(precincts, df, source_columns=vote_cols, method='fractional_area')[1] maps[mapname]['df'] = df all = pd.concat([maps[mapname]['df'] for mapname in maps], sort=False) races = {'Clinton16': 'G16.PRS', 'HouseOfRep16Dem': 'G16.HOR', 'Gov17_Northam': 'G17.GOV', 'LtGov17_Fairfax': 'G17.LTG', 'AtGen17_Herring': 'G17.ATG', 'HouseOfDel17Dem': 'G17.HOD', 'Sen18_Kaine': 'G18.SEN', 'HouseOfRep18Dem': 'G18.HOR'} table = all.copy()
import sys sys.path.append( '~/gerryspam/General' ) # will probably need to change the relative path; set it to gerryspam/General import areal_interpolation as ai import geopandas as gpd import maup import matplotlib small = gpd.read_file( '/Users/hopecj/projects/gerryspam/MO/dat/blocks_with_racepop/blocks_with_racepop.shp' ) large = gpd.read_file( '/Users/hopecj/projects/gerryspam/MO/dat/mo_prec_labeled/mo_prec_labeled_nopop.shp' ) small = small.to_crs(large.crs) agg = ai.aggregate(small, large, target_columns=['loc_prec'])[0] # this takes a while agg.to_file( '/Users/hopecj/projects/gerryspam/MO/dat/blocks_with_prec/blocks_with_prec.shp' )
def build_adj_mats(geo_cell_shp, districts, dist_col, name, plots=False): """Function which takes in a shapefile consisting of geo_cells and districts, computes an assignment of geo_cells to districts and builds (optionally) adjacency matrices and visualizations of the districts at the level of the geo_cells. Arguments --------- geo_cell_shp: a shapefile containing the geo_cells (vtds, towns, etc) districts: a shapefile containing the districts (congressional, legislative, etc) dist_col: string the name of the column in the district shapefile which contains the district names name: string the naming convention for the output files. suggestions include the state name or the FIPS code plots: boolean default False True if you want to generate plots, False if not. If you only want matrices, set this to False because it is by far the slowest part of the code """ print("STARTING") df_geo = gpd.read_file(os.path.join(geo_cell_shp)) df_dist = gpd.read_file(os.path.join(districts)) df_geo['dummy'] = 1 print("AGGREGATING") (df_geo, junk) = areal.aggregate(df_geo, df_dist, ['dummy'], [dist_col]) df_geo.sort_values(dist_col) if plots: geo_cent = df_geo.centroid cx = geo_cent.x cy = geo_cent.y basemap_s = df_geo.plot(color="white", edgecolor="lightgray") basemap_m = df_geo.plot(color="white", edgecolor="lightgray", figsize=(12, 12)) basemap_l = df_geo.plot(color="white", edgecolor="lightgray", figsize=(30, 30)) dists = list(set(df_geo.loc[:, dist_col])) dists.sort() rWlist = [] print("doing CD dual graphs") for d in dists: m = df_geo.loc[df_geo.loc[:, dist_col] == d] rW = ps.weights.Rook.from_dataframe(m) rWlist.append(rW.full()[0]) w = rW np.savetxt(name + '_' + d + '_dist.txt', rW.full()[0], delimiter=',', fmt='%d') if plots: c_x = m.centroid.x c_y = m.centroid.y c = cols.pop(0) cols.append(c) for i, jj in w.neighbors.items(): for j in jj: basemap_s.plot([c_x[i], c_x[j]], [c_y[i], c_y[j]], linestyle='-', linewidth=1, color=c) basemap_m.plot([c_x[i], c_x[j]], [c_y[i], c_y[j]], linestyle='-', linewidth=1, color=c) basemap_l.plot([c_x[i], c_x[j]], [c_y[i], c_y[j]], linestyle='-', linewidth=1, color=c) if plots: basemap_s.axis('off') basemap_m.axis('off') basemap_l.axis('off') plt.figure(1) plt.savefig(name + '_graph_s.png', bbox_inches='tight', pad_inches=0) plt.close() plt.savefig(name + '_graph_m.png', bbox_inches='tight', pad_inches=0) plt.close() plt.savefig(name + '_graph_l.png', bbox_inches='tight', pad_inches=0) plt.close() print("SAVING") print(rWlist) print(type(rWlist[0])) mat = scipy.sparse.block_diag(rWlist) scipy.sparse.save_npz('adj_mats/' + name + '_blocks', mat) fullwts = ps.weights.Rook.from_dataframe(df_geo).full()[0] fullwts = fullwts.astype(int) fullwts = scipy.sparse.coo_matrix(fullwts) scipy.sparse.save_npz('adj_mats/' + name + '_full', fullwts)
# reset index df_prec = df_prec.reset_index(drop=True) # get rook contiguity and calculate shared perims df_prec = ht.get_shared_perims(df_prec) # get list of precinct indexes to merge precincts_to_merge = [] for i, _ in df_prec.iterrows(): if str(df_prec.at[i, 'precinct']).split('_')[0] == 'None': precincts_to_merge.append(i) # merge geometries df_prec = ht.merge_geometries(df_prec, precincts_to_merge) # Save census block shapefile block_path = "/Volumes/GoogleDrive/Shared drives/princeton_gerrymandering_project/OpenPrecincts/States for site/Georgia/VR Shapefile/blocks/GA_VR_blocks.shp" prec_path = "/Volumes/GoogleDrive/Shared drives/princeton_gerrymandering_project/OpenPrecincts/States for site/Georgia/VR Shapefile/GA_VR_Precincts.shp" # Save precinct assignments down to the block df = ai.aggregate(df_shp_c, df_prec, target_columns=['precinct'], spatial_index=False)[0] # Save shapefiles df.to_file(block_path) #df_prec.to_file(prec_path) #ht.save_shapefile(df, block_path, ['neighbors']) #ht.save_shapefile(df_prec, prec_path, ['neighbors'])
blocks = gpd.read_file ('.2010 Census Block shapefiles/NY/tl_2014_36_tabblock10.shp') blocks.GEOID10 = blocks.GEOID10.astype(np.int64) pop = pd.read_csv('./Blocks/NY2010pop.csv') blocks = blocks.merge(pop, on =['GEOID10']) #load in COIS flushing = gpd.read_file('./COI shapefiles/Flushing/Flushing.shp') proj_crs = flushing.crs blocks = blocks.to_crs(proj_crs) bayside = gpd.read_file('./COI shapefiles/Bayside/Bayside.shp') bayside = bayside.to_crs(proj_crs) #make COIs out of blocks f_blocks = ai.aggregate(blocks, flushing, target_columns=['Community'])[0] f_blocks = f_blocks.loc[f_blocks.Community == 'Flushing'] f_blocks.to_file('./COI shapefiles/Flushing/FlushingBlocks.shp') b_blocks = ai.aggregate(blocks, bayside, target_columns=['Community'])[0] b_blocks = b_blocks.loc[b_blocks.Community == 'Bayside'] b_blocks.to_file('./COI shapefiles/Bayside/BaysideBlocks.shp') ###################################################### #load in blocks flushing = gpd.read_file('/Users/hwheelen/Desktop/COI shapefiles/Flushing/FlushingBlocks.shp') bayside = gpd.read_file('/Users/hwheelen/Desktop/COI shapefiles/Bayside/BaysideBlocks.shp') flushing = flushing.to_crs(proj_crs)