예제 #1
0
파일: test_prorate.py 프로젝트: mggg/maup
def test_prorate_raises_if_data_is_not_dataframe_or_series(sources, targets):
    pieces = intersections(sources, targets)
    with pytest.raises(TypeError):
        prorate(
            pieces,
            "not a series",
            weights=pandas.Series([0] * len(pieces), index=pieces.index),
        )
예제 #2
0
파일: test_prorate.py 프로젝트: mggg/maup
def test_prorate_gives_expected_value(sources, targets):
    pieces = intersections(sources, targets, area_cutoff=0)
    weights = pieces.area / pieces.index.get_level_values("source").to_series(
        index=pieces.index
    ).map(sources.area)
    prorated = prorate(pieces, sources.area, weights)
    assert (prorated == targets.area).all()
예제 #3
0
파일: test_prorate.py 프로젝트: mggg/maup
def test_trivial_case(sources):
    sources["data1"] = [10, 10, 10, 10]
    sources["data2"] = [10, 10, 10, 10]
    columns = ["data1", "data2"]
    pieces = intersections(sources, sources, area_cutoff=0)
    weights = pandas.Series([1] * len(pieces), index=pieces.index)
    prorated = prorate(pieces, sources[columns], weights)
    assert (prorated == sources[columns]).all().all()
예제 #4
0
def test_crop_to():
    blocks = geopandas.read_file("zip://./examples/blocks.zip")
    old_precincts = geopandas.read_file("zip://./examples/precincts.zip")
    new_precincts = geopandas.read_file("zip://./examples/new_precincts.zip")
    columns = ["SEN18D", "SEN18R"]

    # Calculate without cropping
    pieces = maup.intersections(old_precincts, new_precincts, area_cutoff=0)
    weights = blocks["TOTPOP"].groupby(maup.assign(blocks, pieces)).sum()
    weights = maup.normalize(weights, level=0)
    new_precincts[columns] = maup.prorate(pieces,
                                          old_precincts[columns],
                                          weights=weights)

    # Calculate with cropping
    old_precincts["geometries"] = maup.crop_to(old_precincts, new_precincts)
    new_precincts_cropped = new_precincts.copy()
    pieces = maup.intersections(old_precincts,
                                new_precincts_cropped,
                                area_cutoff=0)
    weights = blocks["TOTPOP"].groupby(maup.assign(blocks, pieces)).sum()
    weights = maup.normalize(weights, level=0)
    new_precincts_cropped[columns] = maup.prorate(pieces,
                                                  old_precincts[columns],
                                                  weights=weights)

    assert new_precincts_cropped.area.sum() != new_precincts.area.sum()

    diff_sum = 0
    for col in columns:
        diff = new_precincts_cropped[col].sum() - new_precincts[col].sum()
        assert diff >= 0

        diff_sum += diff

    # Ideally this would be strictly positive (which would mean less votes are lost after cropping)
    # but crop_to doesn't resolve the missing votes errors yet.
    assert diff_sum >= 0
예제 #5
0
파일: test_prorate.py 프로젝트: mggg/maup
def test_prorate_dataframe_with_assignment(sources, targets):
    sources["data1"] = [10, 10, 10, 10]
    sources["data2"] = [10, 10, 10, 10]
    columns = ["data1", "data2"]

    relationship = pandas.Series({0: 0})
    weight_by = pandas.Series({0: 1})

    # Use blocks to estimate population of each piece
    prorated = prorate(relationship, sources[columns], weight_by)

    assert (prorated["data1"] == 10).all()
    assert (prorated["data2"] == 10).all()
    assert prorated.index == targets.index
예제 #6
0
파일: test_prorate.py 프로젝트: mggg/maup
def test_prorate_dataframe(sources, targets):
    sources["data1"] = [10, 10, 10, 10]
    sources["data2"] = [10, 10, 10, 10]
    columns = ["data1", "data2"]

    pieces = intersections(sources, targets)

    weight_by = pieces.area / pieces.index.get_level_values("source").map(sources.area)

    # Use blocks to estimate population of each piece
    prorated = prorate(pieces, sources[columns], weight_by)

    assert (prorated["data1"] == 10 * targets.area).all()
    assert (prorated["data2"] == 10 * targets.area).all()
예제 #7
0
def test_example_case():
    blocks = geopandas.read_file("zip://./examples/blocks.zip")
    old_precincts = geopandas.read_file("zip://./examples/precincts.zip")
    new_precincts = geopandas.read_file("zip://./examples/new_precincts.zip")
    columns = ["SEN18D", "SEN18R"]
    # Include area_cutoff=0 to ignore any intersections with no area,
    # like boundary intersections, which we do not want to include in
    # our proration.
    pieces = intersections(old_precincts, new_precincts, area_cutoff=0)
    # Weight by prorated population from blocks
    weights = blocks["TOTPOP"].groupby(assign(blocks, pieces)).sum()
    # Use blocks to estimate population of each piece
    new_precincts[columns] = prorate(pieces,
                                     old_precincts[columns],
                                     weights=weights)
    assert (new_precincts[columns] > 0).sum().sum() > len(new_precincts) / 2
예제 #8
0
def prorate(target, source, targetcol, sourcecol, columns):
    """
    Prorates data the source geometries down to the target geometries.

    :param target: Target geometries.
    :param source: Source geometries.
    :param targetcol: Column for target weights.
    :param sourcecol: Column for source weights.
    :param columns: Columns to prorate.
    :return: Geodataframe with prorated data.
    """
    assignment = maup.assign(target, source)
    weights = target[targetcol] / assignment.map(source[sourcecol])
    prorated = maup.prorate(assignment, source[columns], weights)
    target[columns] = prorated

    return target
bgs.dtypes
blocks.dtypes

bg_cols = list(bgs.columns)[12:37]

bgs[bg_cols] = bgs[bg_cols].astype(float)

bgs.dtypes

# assign blocks to block groups and disaggregate based on population
assignment = maup.assign(blocks, bgs)

# prorate ACS columns by 2010 pop
weights = blocks.tot / assignment.map(bgs.tot10)
prorated = maup.prorate(assignment, bgs[bg_cols], weights)
blocks[bg_cols] = prorated

#### TESTING

# check total population at block level and block group level
blocks['tot19'].sum()  #12792129
bgs['tot19'].sum()  #12791530
blocks['tot'].sum()  #12702379
bgs['tot10'].sum()  #12702379

# # save blocks with dec 10 + acs 19
# blocks.to_file(out_file)

#######################################################################
#### PART 2: AGGREGATE DEC10 + ACS19 FROM BLOCKS TO PLANS
예제 #10
0
#test = bgs.columns
acs_cols = list(bgs.columns)[12:37]
cvap_cols = list(bgs.columns)[38:]

bg_cols = acs_cols + cvap_cols

bgs[bg_cols] = bgs[bg_cols].astype(float)

bgs.dtypes

# assign blocks to block groups and disaggregate based on population
assignment = maup.assign(blocks, bgs)

# We prorate the vote totals according to each block's share of the overall bg population:
weights = blocks.tot / assignment.map(bgs.tot10)
prorated = maup.prorate(assignment, bgs[bg_cols], weights)
blocks[bg_cols] = prorated

#### TESTING

# check total population at block level and block group level
blocks['tot19'].sum()  #3932891.53
bgs['tot19'].sum()  #3932870.0
blocks['tot'].sum()  #3751351.0
bgs['tot10'].sum()  #3751351.0

# # save blocks with dec 10 + acs 19
# blocks.to_file(out_file)

#######################################################################
#### PART 2: AGGREGATE DEC10 + ACS19 FROM BLOCKS TO PLANS
예제 #11
0
###########################################

# set acs cols - all demographic columns in block groups

bgs.dtypes
blocks.dtypes

acs_cols = list(bgs.columns)[12:31]
block_cols = list(blocks.columns)[15:62]

# assign blocks to block groups and disaggregate based on population
assignment = maup.assign(blocks, bgs)

# We prorate the vote totals according to each block's share of the overall bg population:
weights = blocks.tot / assignment.map(bgs.tot10)
prorated = maup.prorate(assignment, bgs[acs_cols], weights)
blocks[acs_cols] = prorated

test = bgs[acs_cols].dtypes
test2 = bgs[acs_cols]
#### TESTING

# check total population at block level and block group level
blocks['tot19'].sum()  #9966182
bgs['tot19'].sum()  #9965265
blocks['tot'].sum()  #9883640
bgs['tot10'].sum()  #9883640

# # save blocks with dec 10 + acs 19
# blocks.to_file(out_file)
예제 #12
0
파일: test_prorate.py 프로젝트: mggg/maup
def test_one_dimensional_intersections_dont_cause_error(sources):
    pieces = intersections(sources, sources.iloc[:2])
    weight_by = pieces.area / pieces.index.get_level_values("source").map(sources.area)
    prorated = prorate(pieces, sources.area, weight_by)
    assert (prorated == sources.iloc[:2].area).all()
예제 #13
0
il1990.crs = il2010.crs
blocks.to_crs(il2010.crs, inplace=True)

for c in columns:
    il2010[c] = il2010[c].astype(int)
    il2000[c] = il2000[c].astype(int)
    il1990[c] = il1990[c].astype(int)

pieces2000 = maup.intersections(il2000, il2010, area_cutoff=0)
pieces1990 = maup.intersections(il1990, il2010, area_cutoff=0)
weights2000 = blocks["TOTPOP"].groupby(maup.assign(blocks, pieces2000)).sum()
weights1990 = blocks["TOTPOP"].groupby(maup.assign(blocks, pieces1990)).sum()
weights2000 = maup.normalize(weights2000, level=0)
weights1990 = maup.normalize(weights1990, level=0)

il2010[columns2000] = maup.prorate(pieces2000, il2000[columns], weights=weights2000)
il2010[columns1990] = maup.prorate(pieces1990, il1990[columns], weights=weights1990)

il2010.plot(column=il2010["TOTPOP_2000"].isna())
plt.show()

print(il2010["NH_BLACK_2000"])
print(il2010["TOTPOP_2000"])

def relentropy(df, races, totpop_col):
    totpop = sum(x for x in df[totpop_col] if not isnan(x))
    res = 0
    for j in range(0, df.shape[0]):
        jpop = df[totpop_col][j] 
        everyoneelse = jpop
        if jpop == 0 or isnan(jpop): continue