def test_prorate_raises_if_data_is_not_dataframe_or_series(sources, targets): pieces = intersections(sources, targets) with pytest.raises(TypeError): prorate( pieces, "not a series", weights=pandas.Series([0] * len(pieces), index=pieces.index), )
def test_prorate_gives_expected_value(sources, targets): pieces = intersections(sources, targets, area_cutoff=0) weights = pieces.area / pieces.index.get_level_values("source").to_series( index=pieces.index ).map(sources.area) prorated = prorate(pieces, sources.area, weights) assert (prorated == targets.area).all()
def test_trivial_case(sources): sources["data1"] = [10, 10, 10, 10] sources["data2"] = [10, 10, 10, 10] columns = ["data1", "data2"] pieces = intersections(sources, sources, area_cutoff=0) weights = pandas.Series([1] * len(pieces), index=pieces.index) prorated = prorate(pieces, sources[columns], weights) assert (prorated == sources[columns]).all().all()
def test_crop_to(): blocks = geopandas.read_file("zip://./examples/blocks.zip") old_precincts = geopandas.read_file("zip://./examples/precincts.zip") new_precincts = geopandas.read_file("zip://./examples/new_precincts.zip") columns = ["SEN18D", "SEN18R"] # Calculate without cropping pieces = maup.intersections(old_precincts, new_precincts, area_cutoff=0) weights = blocks["TOTPOP"].groupby(maup.assign(blocks, pieces)).sum() weights = maup.normalize(weights, level=0) new_precincts[columns] = maup.prorate(pieces, old_precincts[columns], weights=weights) # Calculate with cropping old_precincts["geometries"] = maup.crop_to(old_precincts, new_precincts) new_precincts_cropped = new_precincts.copy() pieces = maup.intersections(old_precincts, new_precincts_cropped, area_cutoff=0) weights = blocks["TOTPOP"].groupby(maup.assign(blocks, pieces)).sum() weights = maup.normalize(weights, level=0) new_precincts_cropped[columns] = maup.prorate(pieces, old_precincts[columns], weights=weights) assert new_precincts_cropped.area.sum() != new_precincts.area.sum() diff_sum = 0 for col in columns: diff = new_precincts_cropped[col].sum() - new_precincts[col].sum() assert diff >= 0 diff_sum += diff # Ideally this would be strictly positive (which would mean less votes are lost after cropping) # but crop_to doesn't resolve the missing votes errors yet. assert diff_sum >= 0
def test_prorate_dataframe_with_assignment(sources, targets): sources["data1"] = [10, 10, 10, 10] sources["data2"] = [10, 10, 10, 10] columns = ["data1", "data2"] relationship = pandas.Series({0: 0}) weight_by = pandas.Series({0: 1}) # Use blocks to estimate population of each piece prorated = prorate(relationship, sources[columns], weight_by) assert (prorated["data1"] == 10).all() assert (prorated["data2"] == 10).all() assert prorated.index == targets.index
def test_prorate_dataframe(sources, targets): sources["data1"] = [10, 10, 10, 10] sources["data2"] = [10, 10, 10, 10] columns = ["data1", "data2"] pieces = intersections(sources, targets) weight_by = pieces.area / pieces.index.get_level_values("source").map(sources.area) # Use blocks to estimate population of each piece prorated = prorate(pieces, sources[columns], weight_by) assert (prorated["data1"] == 10 * targets.area).all() assert (prorated["data2"] == 10 * targets.area).all()
def test_example_case(): blocks = geopandas.read_file("zip://./examples/blocks.zip") old_precincts = geopandas.read_file("zip://./examples/precincts.zip") new_precincts = geopandas.read_file("zip://./examples/new_precincts.zip") columns = ["SEN18D", "SEN18R"] # Include area_cutoff=0 to ignore any intersections with no area, # like boundary intersections, which we do not want to include in # our proration. pieces = intersections(old_precincts, new_precincts, area_cutoff=0) # Weight by prorated population from blocks weights = blocks["TOTPOP"].groupby(assign(blocks, pieces)).sum() # Use blocks to estimate population of each piece new_precincts[columns] = prorate(pieces, old_precincts[columns], weights=weights) assert (new_precincts[columns] > 0).sum().sum() > len(new_precincts) / 2
def prorate(target, source, targetcol, sourcecol, columns): """ Prorates data the source geometries down to the target geometries. :param target: Target geometries. :param source: Source geometries. :param targetcol: Column for target weights. :param sourcecol: Column for source weights. :param columns: Columns to prorate. :return: Geodataframe with prorated data. """ assignment = maup.assign(target, source) weights = target[targetcol] / assignment.map(source[sourcecol]) prorated = maup.prorate(assignment, source[columns], weights) target[columns] = prorated return target
bgs.dtypes blocks.dtypes bg_cols = list(bgs.columns)[12:37] bgs[bg_cols] = bgs[bg_cols].astype(float) bgs.dtypes # assign blocks to block groups and disaggregate based on population assignment = maup.assign(blocks, bgs) # prorate ACS columns by 2010 pop weights = blocks.tot / assignment.map(bgs.tot10) prorated = maup.prorate(assignment, bgs[bg_cols], weights) blocks[bg_cols] = prorated #### TESTING # check total population at block level and block group level blocks['tot19'].sum() #12792129 bgs['tot19'].sum() #12791530 blocks['tot'].sum() #12702379 bgs['tot10'].sum() #12702379 # # save blocks with dec 10 + acs 19 # blocks.to_file(out_file) ####################################################################### #### PART 2: AGGREGATE DEC10 + ACS19 FROM BLOCKS TO PLANS
#test = bgs.columns acs_cols = list(bgs.columns)[12:37] cvap_cols = list(bgs.columns)[38:] bg_cols = acs_cols + cvap_cols bgs[bg_cols] = bgs[bg_cols].astype(float) bgs.dtypes # assign blocks to block groups and disaggregate based on population assignment = maup.assign(blocks, bgs) # We prorate the vote totals according to each block's share of the overall bg population: weights = blocks.tot / assignment.map(bgs.tot10) prorated = maup.prorate(assignment, bgs[bg_cols], weights) blocks[bg_cols] = prorated #### TESTING # check total population at block level and block group level blocks['tot19'].sum() #3932891.53 bgs['tot19'].sum() #3932870.0 blocks['tot'].sum() #3751351.0 bgs['tot10'].sum() #3751351.0 # # save blocks with dec 10 + acs 19 # blocks.to_file(out_file) ####################################################################### #### PART 2: AGGREGATE DEC10 + ACS19 FROM BLOCKS TO PLANS
########################################### # set acs cols - all demographic columns in block groups bgs.dtypes blocks.dtypes acs_cols = list(bgs.columns)[12:31] block_cols = list(blocks.columns)[15:62] # assign blocks to block groups and disaggregate based on population assignment = maup.assign(blocks, bgs) # We prorate the vote totals according to each block's share of the overall bg population: weights = blocks.tot / assignment.map(bgs.tot10) prorated = maup.prorate(assignment, bgs[acs_cols], weights) blocks[acs_cols] = prorated test = bgs[acs_cols].dtypes test2 = bgs[acs_cols] #### TESTING # check total population at block level and block group level blocks['tot19'].sum() #9966182 bgs['tot19'].sum() #9965265 blocks['tot'].sum() #9883640 bgs['tot10'].sum() #9883640 # # save blocks with dec 10 + acs 19 # blocks.to_file(out_file)
def test_one_dimensional_intersections_dont_cause_error(sources): pieces = intersections(sources, sources.iloc[:2]) weight_by = pieces.area / pieces.index.get_level_values("source").map(sources.area) prorated = prorate(pieces, sources.area, weight_by) assert (prorated == sources.iloc[:2].area).all()
il1990.crs = il2010.crs blocks.to_crs(il2010.crs, inplace=True) for c in columns: il2010[c] = il2010[c].astype(int) il2000[c] = il2000[c].astype(int) il1990[c] = il1990[c].astype(int) pieces2000 = maup.intersections(il2000, il2010, area_cutoff=0) pieces1990 = maup.intersections(il1990, il2010, area_cutoff=0) weights2000 = blocks["TOTPOP"].groupby(maup.assign(blocks, pieces2000)).sum() weights1990 = blocks["TOTPOP"].groupby(maup.assign(blocks, pieces1990)).sum() weights2000 = maup.normalize(weights2000, level=0) weights1990 = maup.normalize(weights1990, level=0) il2010[columns2000] = maup.prorate(pieces2000, il2000[columns], weights=weights2000) il2010[columns1990] = maup.prorate(pieces1990, il1990[columns], weights=weights1990) il2010.plot(column=il2010["TOTPOP_2000"].isna()) plt.show() print(il2010["NH_BLACK_2000"]) print(il2010["TOTPOP_2000"]) def relentropy(df, races, totpop_col): totpop = sum(x for x in df[totpop_col] if not isnan(x)) res = 0 for j in range(0, df.shape[0]): jpop = df[totpop_col][j] everyoneelse = jpop if jpop == 0 or isnan(jpop): continue