Example #1
0
 def verify_maup(state_prec_gdf, state_report):
     state_county_df = census_us_county_gdf[
         census_us_county_gdf["STATEFP"] == state_report.fips
     ]
     # match their projections (necessary for maup.assign)
     if not state_prec_gdf.crs:
         state_prec_gdf = state_prec_gdf.set_crs("EPSG:4326")
     state_prec_gdf = state_prec_gdf.to_crs(state_county_df.crs)
     assert state_prec_gdf.crs == state_county_df.crs
     gdf = fix_buffer(state_prec_gdf)
     try:
         maup.assign(gdf, state_county_df)
         print("MAUP assign was successful")
         return True
     except Exception as error:
         print("Unable to use MAUP assign: \n\n", error)
         return False
Example #2
0
def test_assign_dispatches_to_without_area_and_with_area(
        four_square_grid, squares_some_neat_some_overlapping, crs):
    other = four_square_grid.set_index("ID")
    other.crs = crs
    print(squares_some_neat_some_overlapping.crs, other.crs)
    assignment = assign(squares_some_neat_some_overlapping, other)
    expected = pandas.Series(["a", "a", "b", "d", "b"],
                             index=squares_some_neat_some_overlapping.index)

    assert (expected == assignment).all()
Example #3
0
def test_example_case():
    # Losely based off test_example_case function in test_prorate.py
    blocks = geopandas.read_file("zip://./examples/blocks.zip")
    precincts = geopandas.read_file("zip://./examples/new_precincts.zip")
    columns = ["TOTPOP", "BVAP", "WVAP", "HISP"]
    assignment = assign(blocks, precincts)
    precincts[columns] = blocks[columns].groupby(assignment).sum()
    assert (precincts[columns] > 0).sum().sum() > len(precincts)
    for col in columns:  # fails because it does not neatly cover
        assert abs(precincts[col].sum() -
                   blocks[col].sum()) / blocks[col].sum() < 0.5
Example #4
0
def test_crop_to():
    blocks = geopandas.read_file("zip://./examples/blocks.zip")
    old_precincts = geopandas.read_file("zip://./examples/precincts.zip")
    new_precincts = geopandas.read_file("zip://./examples/new_precincts.zip")
    columns = ["SEN18D", "SEN18R"]

    # Calculate without cropping
    pieces = maup.intersections(old_precincts, new_precincts, area_cutoff=0)
    weights = blocks["TOTPOP"].groupby(maup.assign(blocks, pieces)).sum()
    weights = maup.normalize(weights, level=0)
    new_precincts[columns] = maup.prorate(pieces,
                                          old_precincts[columns],
                                          weights=weights)

    # Calculate with cropping
    old_precincts["geometries"] = maup.crop_to(old_precincts, new_precincts)
    new_precincts_cropped = new_precincts.copy()
    pieces = maup.intersections(old_precincts,
                                new_precincts_cropped,
                                area_cutoff=0)
    weights = blocks["TOTPOP"].groupby(maup.assign(blocks, pieces)).sum()
    weights = maup.normalize(weights, level=0)
    new_precincts_cropped[columns] = maup.prorate(pieces,
                                                  old_precincts[columns],
                                                  weights=weights)

    assert new_precincts_cropped.area.sum() != new_precincts.area.sum()

    diff_sum = 0
    for col in columns:
        diff = new_precincts_cropped[col].sum() - new_precincts[col].sum()
        assert diff >= 0

        diff_sum += diff

    # Ideally this would be strictly positive (which would mean less votes are lost after cropping)
    # but crop_to doesn't resolve the missing votes errors yet.
    assert diff_sum >= 0
Example #5
0
def test_example_case():
    blocks = geopandas.read_file("zip://./examples/blocks.zip")
    old_precincts = geopandas.read_file("zip://./examples/precincts.zip")
    new_precincts = geopandas.read_file("zip://./examples/new_precincts.zip")
    columns = ["SEN18D", "SEN18R"]
    # Include area_cutoff=0 to ignore any intersections with no area,
    # like boundary intersections, which we do not want to include in
    # our proration.
    pieces = intersections(old_precincts, new_precincts, area_cutoff=0)
    # Weight by prorated population from blocks
    weights = blocks["TOTPOP"].groupby(assign(blocks, pieces)).sum()
    # Use blocks to estimate population of each piece
    new_precincts[columns] = prorate(pieces,
                                     old_precincts[columns],
                                     weights=weights)
    assert (new_precincts[columns] > 0).sum().sum() > len(new_precincts) / 2
def prorate(target, source, targetcol, sourcecol, columns):
    """
    Prorates data the source geometries down to the target geometries.

    :param target: Target geometries.
    :param source: Source geometries.
    :param targetcol: Column for target weights.
    :param sourcecol: Column for source weights.
    :param columns: Columns to prorate.
    :return: Geodataframe with prorated data.
    """
    assignment = maup.assign(target, source)
    weights = target[targetcol] / assignment.map(source[sourcecol])
    prorated = maup.prorate(assignment, source[columns], weights)
    target[columns] = prorated

    return target
Example #7
0
def assign_GEOID(state_prec_gdf, state_fips):
    """
    return the (GeoDataFrame) with a column 'GEOID' indicating a precinct's county

    :state_prec_gdf: (GeoDataFrame) with statewide precinct level election results
    :state_fips: (int) Federal Information Processing Standard state code

    returned GeoDataFrame's 'GEOID' column will conform to the GEOID spec:

    Elements of the GEOID column are 5 character strings. The first 2 characters
    are the StateFP code and the last 3 characters are the CountyFP code. e.g.

    Massachusetts' StateFP = '25'
    Essex County's CountyFP = '009'
    Essex County, Massachusetts' GEODID = '25009'

    If either code has fewer digits than are allocated, the string representation should
    be zero-padded from the left. e.g. Alaska (StateFP = 2) should be '02'.
    """
    state_fips_str = str(state_fips).zfill(2)
    state_county_df = census_us_county_gdf[
        census_us_county_gdf["STATEFP"] == state_fips_str
    ]
    # match their projections (necessary for maup.assign)
    if not state_prec_gdf.crs:
        state_prec_gdf = state_prec_gdf.set_crs("EPSG:4326")
    state_prec_gdf = state_prec_gdf.to_crs(state_county_df.crs)
    assert state_prec_gdf.crs == state_county_df.crs

    state_prec_gdf["maup_assignment"] = maup.assign(
        fix_buffer(state_prec_gdf), state_county_df
    )
    state_prec_gdf["GEOID"] = state_prec_gdf["maup_assignment"].map(
        lambda idx: state_fips_str + str(state_county_df.loc[idx]["COUNTYFP"]).zfill(3)
    )
    n_counties_observed = state_prec_gdf["GEOID"].nunique()
    n_counties_expected = state_county_df["GEOID"].nunique()
    assert n_counties_expected == n_counties_observed
    return state_prec_gdf
def dissolve(source, join="CONGDIST", columns=[]):
    """
    Dissolves source geography boundaries based on a column which identifies
    the smaller geography with the larger one.

    :param source: String or geodataframe; string is a filepath, geodataframe is source.
    :param join: String; column on which boundaries are joined; optional.
    :param columns: List; columns to sum when dissolving; optional.
    :return: Geodataframe with dissolved boundaries.
    """
    # Dissolve VTD geometries into congressional district ones.
    source = gpd.read_file(source) if type(source) == str else source
    target = source[[join, "geometry"]].dissolve(by=join)

    # If columns are specified, we aggregate data from VTDs to whatever the
    # target is. If a file destination is provided, send the output to a
    # shapefile.
    if len(columns) > 0:
        assignment = maup.assign(source, target)
        target[columns] = source[columns].groupby(assignment).sum()

    return target
partnership = gpd.read_file(
    '/Users/hopecj/projects/gerryspam/NJ/dat/partnership-2016/unzipped/extracted/precincts/compiled.shp'
)
partnership[
    "loc_prec"] = partnership['COUNTYFP'] + ',' + partnership['NAMELSAD']
partnership['loc_prec'].nunique()
partnership[partnership.duplicated(['loc_prec'])]
partnership.shape
partnership = partnership.dissolve(
    by='loc_prec', as_index=False)  #dissolve precincts with the same name
partnership.rename(columns={"loc_prec": "id"}, inplace=True)
partnership = partnership[["id", "geometry"]]

# voter roll
vr = gpd.read_file(
    '/Users/hopecj/projects/gerryspam/NJ/dat/Geocoded VR/NJ_CivisVRblocks.shp')

# give voter roll precinct labels
vr.crs
partnership.crs
assert vr.crs == partnership.crs
partnership.crs = "epsg:2160"
partnership.to_crs(vr.crs, inplace=True)

partnership.to_file("out_partnership.shp")
vr.to_file("out_vr.shp")

# assign voter roll to precincts
assignment = maup.assign(vr, partnership)
assignment.isna().sum()
vr["prec_2019"] = assignment
#### PART 1: DISAGGREGATE ACS19 FROM BLOCK GROUPS TO 2010 BLOCKS
#######################################################################

# set acs cols - all demographic columns in block groups

bgs.dtypes
blocks.dtypes

bg_cols = list(bgs.columns)[12:37]

bgs[bg_cols] = bgs[bg_cols].astype(float)

bgs.dtypes

# assign blocks to block groups and disaggregate based on population
assignment = maup.assign(blocks, bgs)

# prorate ACS columns by 2010 pop
weights = blocks.tot / assignment.map(bgs.tot10)
prorated = maup.prorate(assignment, bgs[bg_cols], weights)
blocks[bg_cols] = prorated

#### TESTING

# check total population at block level and block group level
blocks['tot19'].sum()  #12792129
bgs['tot19'].sum()  #12791530
blocks['tot'].sum()  #12702379
bgs['tot10'].sum()  #12702379

# # save blocks with dec 10 + acs 19
Example #11
0
# checkDataFrame(bgs)

###########################################
#### PART 2: DISAGGREGATE ACS19 to 2010 BLOCKS
###########################################

# set acs cols - all demographic columns in block groups

bgs.dtypes
blocks.dtypes

acs_cols = list(bgs.columns)[12:31]
block_cols = list(blocks.columns)[15:62]

# assign blocks to block groups and disaggregate based on population
assignment = maup.assign(blocks, bgs)

# We prorate the vote totals according to each block's share of the overall bg population:
weights = blocks.tot / assignment.map(bgs.tot10)
prorated = maup.prorate(assignment, bgs[acs_cols], weights)
blocks[acs_cols] = prorated

test = bgs[acs_cols].dtypes
test2 = bgs[acs_cols]
#### TESTING

# check total population at block level and block group level
blocks['tot19'].sum()  #9966182
bgs['tot19'].sum()  #9965265
blocks['tot'].sum()  #9883640
bgs['tot10'].sum()  #9883640
il2010 = gpd.read_file(chicago_2010_file)
il2000 = gpd.read_file(chicago_2000_file)
il1990 = gpd.read_file(chicago_1990_file)
blocks = gpd.read_file(chicago_blocks_file)
il2000.crs = il2010.crs
il1990.crs = il2010.crs
blocks.to_crs(il2010.crs, inplace=True)

for c in columns:
    il2010[c] = il2010[c].astype(int)
    il2000[c] = il2000[c].astype(int)
    il1990[c] = il1990[c].astype(int)

pieces2000 = maup.intersections(il2000, il2010, area_cutoff=0)
pieces1990 = maup.intersections(il1990, il2010, area_cutoff=0)
weights2000 = blocks["TOTPOP"].groupby(maup.assign(blocks, pieces2000)).sum()
weights1990 = blocks["TOTPOP"].groupby(maup.assign(blocks, pieces1990)).sum()
weights2000 = maup.normalize(weights2000, level=0)
weights1990 = maup.normalize(weights1990, level=0)

il2010[columns2000] = maup.prorate(pieces2000, il2000[columns], weights=weights2000)
il2010[columns1990] = maup.prorate(pieces1990, il1990[columns], weights=weights1990)

il2010.plot(column=il2010["TOTPOP_2000"].isna())
plt.show()

print(il2010["NH_BLACK_2000"])
print(il2010["TOTPOP_2000"])

def relentropy(df, races, totpop_col):
    totpop = sum(x for x in df[totpop_col] if not isnan(x))
Example #13
0
    blocks[coi_col] = blocks_within[coi_col]

# create district columns for each map
plan_dict['court']['DistNum'] = plan_dict['court']['District_1'].map(
    lambda x: str(x).zfill(3))
plan_dict['enacted']['DistNum'] = plan_dict['enacted']['HOUSE_TA_6'].map(
    lambda x: str(x).zfill(3))
plan_dict['reform']['DistNum'] = plan_dict['reform']['DISTRICT_N'].map(
    lambda x: str(x).zfill(3))

plan_dict['court'].set_index('DistNum', inplace=True)
plan_dict['enacted'].set_index('DistNum', inplace=True)
plan_dict['reform'].set_index('DistNum', inplace=True)

# assign blocks to map districts
blocks['court'] = maup.assign(blocks, plan_dict['court'])
blocks['enacted'] = maup.assign(blocks, plan_dict['enacted'])
blocks['reform'] = maup.assign(blocks, plan_dict['reform'])

# isolate the blocks in richmond COIs
richmond_blocks = blocks.loc[
    (blocks['coi_1'] == True) | (blocks['coi_2'] == True) |
    (blocks['coi_3'] == True) | (blocks['coi_4'] == True) |
    (blocks['coi_5'] == True) | (blocks['coi_6'] == True) |
    (blocks['coi_7'] == True) | (blocks['coi_8'] == True) |
    (blocks['coi_9'] == True) | (blocks['coi_10'] == True) |
    (blocks['coi_11'] == True) | (blocks['coi_12'] == True) |
    (blocks['coi_13'] == True)]

# save COI blocks
richmond_blocks.to_file('./COI/richmond_all_blocks.shp')
else:
    print('no')
 
# check for unique district columns
if plan_shp[dist_col].nunique() == num_dists:
    print('yes')
else:
    print('no')
    
# convert assignment column to string, set as index
plan_shp[dist_col] = plan_shp[dist_col].map(lambda x:str(x).zfill(3))
plan_shp.set_index(dist_col, inplace=True)  



assignment = maup.assign(blocks, plan_shp)
blocks[dist_type] = assignment

# maup assign - precincts
assignment = maup.assign(precs, plan_shp)
precs[dist_type] = assignment


# generate and export crosswalks

prec_crosswalk = precs[[dist_type]]
block_crosswalk = blocks[[dist_type]]

prec_crosswalk.to_csv('./{0}_dash_prec_cross.csv'.format(state_name))
block_crosswalk.to_csv('.{0}_dash_block_cross.csv'.format(state_name))
Example #15
0
state = gpd.read_file(state_path)
state.crs
list(state.columns)  # SLDUST is state senate district
state.rename(columns={"SLDUST": "id"}, inplace=True)
state = state[["id", "geometry"]]

# state HOR data
st_house_path = "./raw-from-source/tl_2016_29_sldl/tl_2016_29_sldl.shp"
st_house = gpd.read_file(st_house_path)
st_house.crs
list(st_house.columns)  # SLDUST is st_house senate district
st_house.rename(columns={"SLDLST": "id"}, inplace=True)
st_house = st_house[["id", "geometry"]]

# Assigning precincts to U.S. congressional districts
assignment = maup.assign(prec, mscong_merging)
assignment.isna().sum()
prec["CD115FP"] = assignment

# Assigning precincts to state senate districts
assignment = maup.assign(prec, state)
assignment.isna().sum()
prec["SLDUST"] = assignment

# Assigning precincts to state house districts
assignment = maup.assign(prec, st_house)
assignment.isna().sum()
prec["SLDLST"] = assignment

prec.to_file("./output/mo_prec_labeled/mo_prec_labeled_nopop.shp")
Example #16
0
# Do we want to include CVAP data?
cvap = False

# Read in existing data and blocks.
existing = gpd.read_file(indir)
blocks = gpd.read_file(path.join(georoot,
                                 "blocks-demo-adjoined")).to_crs(existing.crs)

# Get the columns we want.
all_columns = list(set(list(blocks)) - {"GEOID", "geometry"})
nocvap_columns = list(
    set(c for c in list(blocks) if "_" not in c) - {"GEOID", "geometry"})
columns = all_columns if cvap else nocvap_columns

# Aggregate up to precincts.
assignment = maup.assign(blocks, existing)
existing[columns] = blocks[columns].groupby(assignment).sum()

# Fill NaNs with 0.
existing[columns] = existing[columns].fillna(0)

# Assert that our columns are nearly equal.
for column in columns:
    try:
        assert np.isclose(existing[column].sum(), blocks[column].sum())
    except AssertionError:
        print(f"The column {column} didn't sum properly.")

# Fix geometries and write to file.
existing["geometry"] = existing["geometry"].buffer(0)
if not path.exists(outdir): os.mkdir(outdir)