bvap = 'H74004'
vap = 'H74001'

blocks['GISJOIN'] = 'G' + blocks['STATEFP10'] + blocks['COUNTYFP10'].apply(
    lambda x: x.zfill(4)) + blocks['TRACTCE10'].apply(
        lambda x: x.zfill(7)) + blocks['BLOCKCE'].apply(lambda x: x.zfill(4))

blocks_w_race = blocks.merge(race.loc[:, ['GISJOIN', bvap, vap]], on='GISJOIN')
blocks_w_race = blocks_w_race.rename(columns={bvap: 'BVAP', vap: 'VAP'})
blocks_w_race = blocks_w_race.astype({'BVAP': int, 'VAP': int})

# 2) Load precinct data with election result, merge them with census block data
precincts = gpd.read_file(BH_precincts)

precincts_w_race = ai.aggregate(blocks_w_race,
                                precincts,
                                source_columns=['BVAP', 'VAP'],
                                method='greatest_area')[1]

# 3) load House geographies, assign districts to precincts, filte
house = gpd.read_file(house_districts)

precincts_w_race_and_districts = ai.aggregate(precincts_w_race,
                                              house,
                                              target_columns=['NAME'])[0]

# filter to only include precincts in affected and adjacent districts according to VPAP
# https://www.vpap.org/visuals/visual/ruling-could-impact-1-3-house-districts/
affected = [63, 69, 70, 71, 74, 77, 80, 89, 90, 92, 95]
adjacent = [
    27, 55, 61, 62, 64, 66, 68, 72, 73, 75, 76, 78, 79, 81, 83, 85, 91, 93, 94,
    96, 97, 100
Ejemplo n.º 2
0
import areal_interpolation as ai
import geopandas as gpd
import maup
import matplotlib

small = gpd.read_file(
    '/Users/hopecj/projects/gerryspam/MO/dat/tabblock2010_29_pophu/tabblock2010_29_pophu.shp'
)
large = gpd.read_file(
    '/Users/hopecj/projects/gerryspam/MO/dat/tl_2013_29_bg/tl_2013_29_bg.shp')
# large = gpd.read_file('/Users/hopecj/projects/gerryspam/MO/dat/mo_prec_labeled/mo_prec_labeled_nopop.shp')

small = small.to_crs(large.crs)

# target_columns should be name of disaggregation column from `large`
# start time: 12:31 pm
# end time: 1:30 pm
agg = ai.aggregate(small, large,
                   target_columns=['GEOID'])[0]  # this takes a while!

agg = agg[[
    'STATEFP10', 'COUNTYFP10', 'TRACTCE10', 'BLOCKCE', 'BLOCKID10', 'PARTFLG',
    'HOUSING10', 'POP10', 'GEOID', 'geometry'
]]

agg.to_file(
    '/Users/hopecj/projects/gerryspam/MO/dat/blocks_with_bg/blocks_with_bg.shp'
)
Ejemplo n.º 3
0
#nm = gpd.read_file('/Volumes/GoogleDrive/Shared drives/princeton_gerrymandering_project/mapping/NC/Shapefiles/precincts/2016/nc_2016.shp')
large = gpd.read_file(
    '/Volumes/GoogleDrive/Shared drives/princeton_gerrymandering_project/mapping/MI/Counties/Counties_v17a/Counties_v17a.shp'
)
#nm = gpd.read_file('/Users/hwheelen/Desktop/VA/VA_Leg_Enacted_BH.shp’)

small = small.to_crs(large.crs)
# =============================================================================
# for col in cols:
#     small[col] = small[col].astype(int)
# =============================================================================

#small = agg.rename(columns = {'Dist_Name':'SD'})

agg = ai.aggregate(small, large,
                   target_columns=['FIPSCODE',
                                   'NAME'])[0]  # this takes a while
agg = agg[[
    'OBJECTID',
    'ShapeSTAre',
    'ShapeSTLen',
    'FIPSCODE',
    'NAME',
    'PRECINCTID',
    'ELECTIONYE',
    'COUNTYFIPS',
    'MCDFIPS',
    'WARD',
    'PRECINCT',
    'geometry',
]]
Ejemplo n.º 4
0
blocks['GISJOIN'] = 'G' + blocks['STATEFP10'] + blocks['COUNTYFP10'].apply(lambda x: x.zfill(4)) + blocks['TRACTCE10'].apply(lambda x: x.zfill(7)) + blocks['BLOCKCE'].apply(lambda x: x.zfill(4))

blocks_w_race = blocks.merge(race.loc[:, ['GISJOIN', bvap, vap]], on='GISJOIN')
blocks_w_race = blocks_w_race.rename(columns={bvap: 'BVAP', vap: 'VAP'})
blocks_w_race = blocks_w_race.astype({'BVAP': int, 'VAP': int})

for mapname in maps:
    df = gpd.read_file(maps[mapname]['path'])
    
    df = df.rename(columns={maps[mapname]['district_colname']: common_colname})
        
    df[common_colname] = df[common_colname].astype(str)
    df = df[df[common_colname].isin(bh)]
    
    df = ai.aggregate(blocks_w_race, df, source_columns=['BVAP', 'VAP'], method='greatest_area')[1]

    df['prop_BVAP_' + mapname] = df['BVAP'] / df['VAP']
    df = df.rename(columns={'BVAP': 'BVAP_' + mapname,
                            'VAP': 'VAP_' + mapname

    df.loc[df[common_colname].isin([str(i) for i in affected]), 'status'] = affected_label
    df.loc[df[common_colname].isin([str(i) for i in adjacent]), 'status'] = adjacent_label

    maps[mapname]['df'] = df

keys = list(maps)
mapname = keys[0]
df = maps[mapname]['df'][[common_colname, 'status'] + [i + '_' + mapname for i in ['BVAP', 'VAP', 'prop_BVAP']]]
for mapname in keys[1:]:
    df = df.merge(maps[mapname]['df'][[common_colname] + [i + '_' + mapname for i in ['BVAP', 'VAP', 'prop_BVAP']]],
Ejemplo n.º 5
0
import matplotlib

small = gpd.read_file(
    '/Volumes/GoogleDrive/Shared drives/princeton_gerrymandering_project/States and local partners/New York/Nassau County/demographics/Nassau_blocks_pop10.shp'
)
large = gpd.read_file(
    '/Volumes/GoogleDrive/Shared drives/princeton_gerrymandering_project/States and local partners/New York/Nassau County/Matching Shapes and Elecs/second try/Nassau18Gen.shp'
)

large = large.to_crs(small.crs)

cols = [
    'tot', 'NHwhite', 'NHblack', 'hispanic', 'totVAP', 'WVAP', 'BVAP', 'HVAP'
]

small[cols] = small[cols].astype(float)

#nm = gpd.read_file('/Users/hwheelen/Desktop/VA/VA_Leg_Enacted_BH.shp’)

aggregated = ai.aggregate(small, large, source_columns=cols)[1]

#check that totals match
small_total = small['tot'].sum()
print('old tot', small_total)
agg_total = aggregated['tot'].sum()
print('new tot', agg_total)

aggregated.to_file(
    '/Volumes/GoogleDrive/Shared drives/princeton_gerrymandering_project/States and local partners/New York/Nassau County/Precincts Elecs and Demographics/Nassau18GenDem.shp'
)
Ejemplo n.º 6
0
]  #, '42', '45', '46', '47', '48', '49', '50', '51', '53', '54', '55', '56']

#codelist=[ '21','30','41','44']
#codelist=['42']
#36,48
df_cd = gpd.read_file('cd_us.shp')

for code in codelist:
    df_vtd = gpd.read_file(
        os.path.join('/home/zachary/Downloads/zips/tl_2013_' + code +
                     '_cousub.shp'))

    df_vtd['dummy'] = 1

    print("aggregating")
    (df_vtd, new_tar) = areal.aggregate(df_vtd, df_cd, ['dummy'], ['STATECD'])
    print("sorting")
    df_vtd.sort_values('STATECD')

    dists = list(set(df_vtd.loc[:, 'STATECD']))
    dists.sort()

    rWlist = []
    print("doing spatial indx")
    for d in dists:
        m = df_vtd.loc[df_vtd.loc[:, 'STATECD'] == d]
        f = open('spatial_indexes/' + code + '_' + d + '_idx.txt', 'w')
        for ind, row in m.iterrows():
            print(ind)

            f.write(row['GEOID'] + ',' + str(m.centroid[ind].x) + ',' +
Ejemplo n.º 7
0
maps['SM_court_order']['df'] = maps['SM_court_order']['df'].append(maps['enacted']['df'].loc[~maps['enacted']['df'][common_colname].isin(maps['SM_court_order']['df'][common_colname])], sort=True, ignore_index=True).sort_values(common_colname)

#%%
# get census block geography with BVAP and VAP data
precincts = gpd.read_file('Maps/VA Precincts/Precincts with CD/Elections/VA_Precincts_CD_and_Elections.shp')

vote_cols = ['G18DHOR','G18DSEN', 'G18OHOR', 'G18OSEN', 'G18RHOR', 'G18RSEN', 'G17DGOV',
       'G17DLTG', 'G17DATG', 'G17DHOD', 'G17RGOV', 'G17RLTG', 'G17RATG',
       'G17RHOD', 'G17OHOD', 'G17OGOV', 'G16DPRS', 'G16RPRS', 'G16OPRS',
       'G16DHOR', 'G16RHOR', 'G16OHOR']
precincts[vote_cols] = precincts[vote_cols].astype(float)

for mapname in maps:
    df = maps[mapname]['df']
    df['map'] = mapname
    df = ai.aggregate(precincts, df, source_columns=vote_cols, method='fractional_area')[1]
    
    maps[mapname]['df'] = df

all = pd.concat([maps[mapname]['df'] for mapname in maps], sort=False)

races = {'Clinton16': 'G16.PRS',
         'HouseOfRep16Dem': 'G16.HOR',
         'Gov17_Northam': 'G17.GOV',
         'LtGov17_Fairfax': 'G17.LTG',
         'AtGen17_Herring': 'G17.ATG',
         'HouseOfDel17Dem': 'G17.HOD',
         'Sen18_Kaine': 'G18.SEN',
         'HouseOfRep18Dem': 'G18.HOR'}

table = all.copy()
Ejemplo n.º 8
0
import sys
sys.path.append(
    '~/gerryspam/General'
)  # will probably need to change the relative path; set it to gerryspam/General
import areal_interpolation as ai
import geopandas as gpd
import maup
import matplotlib

small = gpd.read_file(
    '/Users/hopecj/projects/gerryspam/MO/dat/blocks_with_racepop/blocks_with_racepop.shp'
)
large = gpd.read_file(
    '/Users/hopecj/projects/gerryspam/MO/dat/mo_prec_labeled/mo_prec_labeled_nopop.shp'
)

small = small.to_crs(large.crs)

agg = ai.aggregate(small, large,
                   target_columns=['loc_prec'])[0]  # this takes a while

agg.to_file(
    '/Users/hopecj/projects/gerryspam/MO/dat/blocks_with_prec/blocks_with_prec.shp'
)
Ejemplo n.º 9
0
def build_adj_mats(geo_cell_shp, districts, dist_col, name, plots=False):
    """Function which takes in a shapefile consisting of geo_cells and districts, computes an assignment of geo_cells to districts and builds (optionally) adjacency matrices and visualizations of the districts at the level of the geo_cells.

    Arguments
    ---------
    geo_cell_shp: a shapefile containing the geo_cells (vtds, towns, etc)
        
    districts: a shapefile containing the districts (congressional, legislative, etc)
    dist_col: string
        the name of the column in the district shapefile which contains the district names
    name: string
        the naming convention for the output files.  suggestions include the state name or the FIPS code
    plots: boolean default False
        True if you want to generate plots, False if not.  If you only want matrices, set this to False because it
        is by far the slowest part of the code



    """

    print("STARTING")
    df_geo = gpd.read_file(os.path.join(geo_cell_shp))
    df_dist = gpd.read_file(os.path.join(districts))

    df_geo['dummy'] = 1
    print("AGGREGATING")
    (df_geo, junk) = areal.aggregate(df_geo, df_dist, ['dummy'], [dist_col])

    df_geo.sort_values(dist_col)

    if plots:
        geo_cent = df_geo.centroid
        cx = geo_cent.x
        cy = geo_cent.y
        basemap_s = df_geo.plot(color="white", edgecolor="lightgray")
        basemap_m = df_geo.plot(color="white",
                                edgecolor="lightgray",
                                figsize=(12, 12))
        basemap_l = df_geo.plot(color="white",
                                edgecolor="lightgray",
                                figsize=(30, 30))

    dists = list(set(df_geo.loc[:, dist_col]))
    dists.sort()

    rWlist = []
    print("doing CD dual graphs")
    for d in dists:
        m = df_geo.loc[df_geo.loc[:, dist_col] == d]
        rW = ps.weights.Rook.from_dataframe(m)
        rWlist.append(rW.full()[0])
        w = rW
        np.savetxt(name + '_' + d + '_dist.txt',
                   rW.full()[0],
                   delimiter=',',
                   fmt='%d')

        if plots:
            c_x = m.centroid.x
            c_y = m.centroid.y

            c = cols.pop(0)
            cols.append(c)
            for i, jj in w.neighbors.items():

                for j in jj:
                    basemap_s.plot([c_x[i], c_x[j]], [c_y[i], c_y[j]],
                                   linestyle='-',
                                   linewidth=1,
                                   color=c)
                    basemap_m.plot([c_x[i], c_x[j]], [c_y[i], c_y[j]],
                                   linestyle='-',
                                   linewidth=1,
                                   color=c)
                    basemap_l.plot([c_x[i], c_x[j]], [c_y[i], c_y[j]],
                                   linestyle='-',
                                   linewidth=1,
                                   color=c)

    if plots:
        basemap_s.axis('off')
        basemap_m.axis('off')
        basemap_l.axis('off')

        plt.figure(1)
        plt.savefig(name + '_graph_s.png', bbox_inches='tight', pad_inches=0)
        plt.close()
        plt.savefig(name + '_graph_m.png', bbox_inches='tight', pad_inches=0)
        plt.close()
        plt.savefig(name + '_graph_l.png', bbox_inches='tight', pad_inches=0)
        plt.close()
    print("SAVING")
    print(rWlist)
    print(type(rWlist[0]))
    mat = scipy.sparse.block_diag(rWlist)
    scipy.sparse.save_npz('adj_mats/' + name + '_blocks', mat)
    fullwts = ps.weights.Rook.from_dataframe(df_geo).full()[0]
    fullwts = fullwts.astype(int)
    fullwts = scipy.sparse.coo_matrix(fullwts)
    scipy.sparse.save_npz('adj_mats/' + name + '_full', fullwts)
Ejemplo n.º 10
0
# reset index
df_prec = df_prec.reset_index(drop=True)

# get rook contiguity and calculate shared perims
df_prec = ht.get_shared_perims(df_prec)

# get list of precinct indexes to merge
precincts_to_merge = []
for i, _ in df_prec.iterrows():
    if str(df_prec.at[i, 'precinct']).split('_')[0] == 'None':
        precincts_to_merge.append(i)

# merge geometries
df_prec = ht.merge_geometries(df_prec, precincts_to_merge)

# Save census block shapefile
block_path = "/Volumes/GoogleDrive/Shared drives/princeton_gerrymandering_project/OpenPrecincts/States for site/Georgia/VR Shapefile/blocks/GA_VR_blocks.shp"
prec_path = "/Volumes/GoogleDrive/Shared drives/princeton_gerrymandering_project/OpenPrecincts/States for site/Georgia/VR Shapefile/GA_VR_Precincts.shp"
# Save precinct assignments down to the block
df = ai.aggregate(df_shp_c,
                  df_prec,
                  target_columns=['precinct'],
                  spatial_index=False)[0]

# Save shapefiles
df.to_file(block_path)
#df_prec.to_file(prec_path)
#ht.save_shapefile(df, block_path, ['neighbors'])
#ht.save_shapefile(df_prec, prec_path, ['neighbors'])
Ejemplo n.º 11
0
blocks = gpd.read_file ('.2010 Census Block shapefiles/NY/tl_2014_36_tabblock10.shp')
blocks.GEOID10 = blocks.GEOID10.astype(np.int64)

pop = pd.read_csv('./Blocks/NY2010pop.csv')
blocks = blocks.merge(pop, on =['GEOID10'])

#load in COIS
flushing = gpd.read_file('./COI shapefiles/Flushing/Flushing.shp')
proj_crs = flushing.crs
blocks = blocks.to_crs(proj_crs)

bayside = gpd.read_file('./COI shapefiles/Bayside/Bayside.shp')
bayside = bayside.to_crs(proj_crs)

#make COIs out of blocks
f_blocks = ai.aggregate(blocks, flushing, target_columns=['Community'])[0]
f_blocks = f_blocks.loc[f_blocks.Community == 'Flushing']
f_blocks.to_file('./COI shapefiles/Flushing/FlushingBlocks.shp')

b_blocks = ai.aggregate(blocks, bayside, target_columns=['Community'])[0]
b_blocks = b_blocks.loc[b_blocks.Community == 'Bayside']
b_blocks.to_file('./COI shapefiles/Bayside/BaysideBlocks.shp')


######################################################

#load in blocks
flushing = gpd.read_file('/Users/hwheelen/Desktop/COI shapefiles/Flushing/FlushingBlocks.shp')
bayside = gpd.read_file('/Users/hwheelen/Desktop/COI shapefiles/Bayside/BaysideBlocks.shp')

flushing = flushing.to_crs(proj_crs)