def test_normalize(): index = pandas.MultiIndex.from_tuples([(0, 1), (0, 2), (1, 2), (1, 3), (1, 4), (2, 4)]) weights = pandas.Series([10, 20, 25, 15, 0, 30], index=index) expected = pandas.Series([1 / 3, 2 / 3, 25 / 40, 15 / 40, 0, 1], index=index) assert (maup.normalize(weights) == expected).all(0)
def test_crop_to(): blocks = geopandas.read_file("zip://./examples/blocks.zip") old_precincts = geopandas.read_file("zip://./examples/precincts.zip") new_precincts = geopandas.read_file("zip://./examples/new_precincts.zip") columns = ["SEN18D", "SEN18R"] # Calculate without cropping pieces = maup.intersections(old_precincts, new_precincts, area_cutoff=0) weights = blocks["TOTPOP"].groupby(maup.assign(blocks, pieces)).sum() weights = maup.normalize(weights, level=0) new_precincts[columns] = maup.prorate(pieces, old_precincts[columns], weights=weights) # Calculate with cropping old_precincts["geometries"] = maup.crop_to(old_precincts, new_precincts) new_precincts_cropped = new_precincts.copy() pieces = maup.intersections(old_precincts, new_precincts_cropped, area_cutoff=0) weights = blocks["TOTPOP"].groupby(maup.assign(blocks, pieces)).sum() weights = maup.normalize(weights, level=0) new_precincts_cropped[columns] = maup.prorate(pieces, old_precincts[columns], weights=weights) assert new_precincts_cropped.area.sum() != new_precincts.area.sum() diff_sum = 0 for col in columns: diff = new_precincts_cropped[col].sum() - new_precincts[col].sum() assert diff >= 0 diff_sum += diff # Ideally this would be strictly positive (which would mean less votes are lost after cropping) # but crop_to doesn't resolve the missing votes errors yet. assert diff_sum >= 0
def test_example_case(): blocks = geopandas.read_file("zip://./examples/blocks.zip") old_precincts = geopandas.read_file("zip://./examples/precincts.zip") new_precincts = geopandas.read_file("zip://./examples/new_precincts.zip") columns = ["SEN18D", "SEN18R"] # Include area_cutoff=0 to ignore any intersections with no area, # like boundary intersections, which we do not want to include in # our proration. pieces = intersections(old_precincts, new_precincts, area_cutoff=0) # Weight by prorated population from blocks weights = blocks["TOTPOP"].groupby(assign(blocks, pieces)).sum() weights = normalize(weights, level=0) # Use blocks to estimate population of each piece new_precincts[columns] = prorate(pieces, old_precincts[columns], weights=weights) assert (new_precincts[columns] > 0).sum().sum() > len(new_precincts) / 2 for col in columns: assert abs(new_precincts[col].sum() - old_precincts[col].sum()) / old_precincts[col].sum() < 0.1
il1990 = gpd.read_file(chicago_1990_file) blocks = gpd.read_file(chicago_blocks_file) il2000.crs = il2010.crs il1990.crs = il2010.crs blocks.to_crs(il2010.crs, inplace=True) for c in columns: il2010[c] = il2010[c].astype(int) il2000[c] = il2000[c].astype(int) il1990[c] = il1990[c].astype(int) pieces2000 = maup.intersections(il2000, il2010, area_cutoff=0) pieces1990 = maup.intersections(il1990, il2010, area_cutoff=0) weights2000 = blocks["TOTPOP"].groupby(maup.assign(blocks, pieces2000)).sum() weights1990 = blocks["TOTPOP"].groupby(maup.assign(blocks, pieces1990)).sum() weights2000 = maup.normalize(weights2000, level=0) weights1990 = maup.normalize(weights1990, level=0) il2010[columns2000] = maup.prorate(pieces2000, il2000[columns], weights=weights2000) il2010[columns1990] = maup.prorate(pieces1990, il1990[columns], weights=weights1990) il2010.plot(column=il2010["TOTPOP_2000"].isna()) plt.show() print(il2010["NH_BLACK_2000"]) print(il2010["TOTPOP_2000"]) def relentropy(df, races, totpop_col): totpop = sum(x for x in df[totpop_col] if not isnan(x)) res = 0 for j in range(0, df.shape[0]):