def verify_gerrychain(df): try: Graph.from_geodataframe(fix_buffer(df)) print("GerryChain graph created") return True except Exception as error: print("Unable to create GerryChain graph: ", error) return False
def test_uses_graph_geometries_by_default(self, geodataframe): mock_plot = MagicMock() gp.GeoDataFrame.plot = mock_plot graph = Graph.from_geodataframe(geodataframe) partition = Partition(graph=graph, assignment={node: 0 for node in graph}) partition.plot() assert mock_plot.call_count == 1
def make_dual_graph(st, year): """ Takes a 2-letter state postal code abbreviation (e.g. GA for Georgia), makes a dual graph of its shapefile, and writes the dual graph to a JSON Arguments: st -- 2 letter state postal code """ # Fold state postal code to lowercase: st = st.lower() ##2016 Census Tract- TigerLine File for the appropriate state, follow link to download geo = gpd.read_file("./data/" + st + "_" + str(year) + "_tract.shp") graph = Graph.from_geodataframe( geo ) #if graph is successfully generated, we should be able to run chain graph.add_data(geo, columns=geo.columns) #nx.is_connected(graph) # if returns true, graph is connected graph.to_json("./data/" + st + "_tract.json")
"SEN16R", "SEN16L", ] for x in df.columns: if x in variables: df[x] = df[x].astype(int) #county_col = "COUNTYFP10" pop_col = "TOTPOP" df["CPOP"] = df["TOTPOP"] - df["NCPOP"] ccol = "CPOP" uid = "ID" num_districts = 14 graph = Graph.from_geodataframe(df, ignore_errors=True) graph.add_data(df, list(df)) graph = nx.relabel_nodes(graph, df[uid]) elections = [ Election("PRES16", { "Democratic": "PRES16D", "Republican": "PRES16R" }), Election("SEN16", { "Democratic": "SEN16D", "Republican": "SEN16R" }) ] #my_updaters = {"population" : updaters.Tally("TOTPOP", alias="population")}
def main(graph_json, shp, n_steps, output_dir, prefix, seed, pop_col, pop_tol, plan_col, reproject, election): os.makedirs(output_dir, exist_ok=True) has_geometry = False if not shp and not graph_json: print('Specify a shapefile or a NetworkX-format graph ' 'JSON file.', file=sys.stderr) sys.exit(1) elif shp and not graph_json: gdf = gpd.read_file(shp) if reproject: gdf = reprojected(gdf) graph = Graph.from_geodataframe(gdf) has_geometry = True elif graph_json and not shp: graph = Graph.from_json(graph_json) else: graph = Graph.from_json(graph_json) gdf = gpd.read_file(shp) if reproject: gdf = reprojected(gdf) print('Appending geometries from shapefile to graph...') graph.geometry = gdf.geometry # TODO: is this always valid? has_geometry = True my_updaters = {'population': updaters.Tally(pop_col, alias='population')} if election: election_up = Election('election', { 'Democratic': election[0], 'Republican': election[1] }) my_updaters['election'] = election_up initial_state = GeographicPartition(graph, assignment=plan_col, updaters=my_updaters) normal_chain = RecomChain(graph=graph, total_steps=n_steps, initial_state=initial_state, pop_col=pop_col, pop_tol=pop_tol, reversible=False, seed=seed) reversible_chain = RecomChain(graph=graph, total_steps=n_steps, initial_state=initial_state, pop_col=pop_col, pop_tol=pop_tol, reversible=True, seed=seed) normal_plans = [plan for plan in tqdm(normal_chain)] reversible_plans = [plan for plan in tqdm(reversible_chain)] cut_edges_fig(output_dir, prefix, normal_plans, reversible_plans) longest_boundary_fig(output_dir, prefix, normal_plans, reversible_plans) if has_geometry: demo_plans(output_dir, '_'.join([prefix, 'recom']), normal_plans, n_steps, n_steps // 25) demo_plans(output_dir, '_'.join([prefix, 'reversible_recom']), reversible_plans, n_steps, n_steps // 25) if election: election_hists(output_dir, 'dem_vote_share', 'election', 'Democratic', normal_plans, reversible_plans) acceptance_stats(output_dir, '_'.join([prefix, 'recom']), normal_plans) acceptance_stats(output_dir, '_'.join([prefix, 'reversible_recom']), reversible_plans)
import pandas as pd from local_tools import states from states import STATES import geopandas as gpd from glob import glob from gerrychain import Graph postal_to_name = {v: k.lower().replace(" ", "_") for k, v in states.name_postal_code_mappings.items()} for code, st in list(STATES.items())[13:]: print("{} - {}".format(code, st["STFIPS"])) bg_shapes = gpd.read_file("https://www2.census.gov/geo/tiger/TIGER2010/BG/2010/tl_2010_{}_bg10.zip".format(st["STFIPS"])) bg_shapes = bg_shapes.rename(columns={"GEOID10": "GEOID"}) bg_shapes = bg_shapes[["GEOID", "geometry"]].set_index("GEOID") graph = Graph.from_geodataframe(bg_shapes) graph.to_json("../districtContiguity/graphs/{}_blockgroups.json".format(postal_to_name[code])) # bg_shapes.to_csv("../districtCenter/resources/{}_blockgroups.csv".format(postal_to_name[code]), index=False) for code, st in [("IA", STATES["IA"])]: print("{} - {}".format(code, st["STFIPS"])) cnty_shapes = gpd.read_file("https://www2.census.gov/geo/tiger/TIGER2010/COUNTY/2010/tl_2010_{}_county10.zip".format(st["STFIPS"])) cnty_shapes = cnty_shapes.rename(columns={"GEOID10": "GEOID"}) cnty_shapes = cnty_shapes[["GEOID", "geometry"]].set_index("GEOID") graph = Graph.from_geodataframe(cnty_shapes) graph.to_json("../districtContiguity/graphs/{}_counties.json".format(postal_to_name[code])) # cnty_shapes.to_csv("../districtCenter/resources/{}_counties.csv".format(postal_to_name[code]), index=False)
def crossover_test(): #test on IOWA # k = 4 # graph_name = 'iowa' # graph_path = './input_data/'+graph_name+'.json' # graph = Graph.from_json(graph_path) # num_districts = k # ideal_pop = sum([graph.nodes[v]["TOTPOP"] for v in graph.nodes()])/num_districts # unit_name = 'GEOID10' # area_name = 'area' # x_name = 'INTPTLON10' # y_name = 'INTPTLAT10' # # areaC_X = "areaC_X" # # areaC_Y = "areaC_Y" # # area = 'area' # shapefile_name = 'IA_counties' # gdf = gpd.read_file('./input_data/'+shapefile_name) # gdf = gdf.to_crs({'init': 'epsg:26775'}) #test on New Mexico k = 42 #NM state senate districts graph_name = 'New Mexico' unit_name = 'NAME10' num_districts = k plot_path = './input_data/NM_precincts_edited/NM_precincts_edited.shp' gdf = gpd.read_file(plot_path) graph = Graph.from_geodataframe(gdf) graph.add_data(gdf) ideal_pop = sum([graph.nodes[v]["TOTPOP"] for v in graph.nodes()]) / num_districts area_name = 'Area' centroids = gdf.centroid c_x = centroids.x c_y = centroids.y for node in graph.nodes(): graph.nodes[node]["x_val"] = c_x[node] graph.nodes[node]["y_val"] = c_y[node] x_name = 'x_val' y_name = 'y_val' ##test on TX # k=36 # graph_name = 'Texas' # graph_path = './input_data/tx.json' # graph = Graph.from_json(graph_path) # shapefile_path = './input_data/Texas_xy/Texas_xy.shp' # gdf = gpd.read_file(shapefile_path) # num_districts = k # ideal_pop = sum([graph.nodes[v]["TOTPOP"] for v in graph.nodes()])/num_districts # unit_name = 'CNTYVTD' # area_name = 'Shape_area' # x_name = 'x_val' # y_name = 'y_val' # gdf = gdf.to_crs({'init': 'epsg:26775'}) for node in graph.nodes(): graph.nodes[node]["x"] = float(graph.nodes[node][x_name]) graph.nodes[node]["y"] = float(graph.nodes[node][y_name]) graph.nodes[node]["area"] = float(graph.nodes[node][area_name]) updaters = { "population": Tally("TOTPOP", alias="population"), "cut_edges": cut_edges, "centroids": centroids_x_y_area } new_plan1 = recursive_tree_part(graph, range(k), ideal_pop, "TOTPOP", .02, 3) part1 = Partition(graph, assignment=new_plan1, updaters=updaters) new_plan2 = recursive_tree_part(graph, range(k), ideal_pop, "TOTPOP", .02, 3) part2 = Partition(graph, assignment=new_plan2, updaters=updaters) max_adjust = 10000 ep = 0.05 print("tiling crossover test:") tiling_child1, tiling_child2 = tiling_crossover(part1, part2, k, ep, max_adjust, ideal_pop, draw_map=True, gdf=gdf, unit_name=unit_name, testing=True) print(len(tiling_child1.cut_edges), len(tiling_child2.cut_edges)) print("seam crossover test:") seam_child1, seam_child2 = seam_split_crossover(part1, part2, k, ep, max_adjust, ideal_pop, draw_map=True, gdf=gdf, unit_name=unit_name, testing=True) print(len(seam_child1.cut_edges), len(seam_child2.cut_edges)) print("book chapter crossover test:") book_child1, book_child2 = book_chapter_crossover(part1, part2, k, ep, max_adjust, ideal_pop, draw_map=True, gdf=gdf, unit_name=unit_name, testing=True) print(len(book_child1.cut_edges), len(book_child2.cut_edges)) print("chen crossover test:") chen_child1, chen_child2 = chen_crossover(part1, part2, k, ep, max_adjust, ideal_pop, draw_map=True, gdf=gdf, unit_name=unit_name, testing=True) print(len(chen_child1.cut_edges), len(chen_child2.cut_edges)) print("half-half recom crossover test:") half_recom_child1, half_recom_child2 = half_half_recom_crossover( part1, part2, k, ep, max_adjust, ideal_pop, draw_map=True, gdf=gdf, unit_name=unit_name, testing=True) print(len(half_recom_child1.cut_edges), len(half_recom_child2.cut_edges))
args = parser.parse_args() STEP_COUNT = args.steps BURN_IN = int(0.1 * STEP_COUNT) CITY_NAME = args.city STATE = args.state STATE_FIPS = str(args.fips) TOT_WORKERS = args.workers manager = Manager() results = manager.dict() race_matrix = load_data(CITY_NAME, STATE, STATE_FIPS) # build chain graph = Graph.from_geodataframe(race_matrix, adjacency="queen") nx.set_node_attributes(graph, race_matrix["total"].to_dict(), name="population") init_partition = Partition( graph, assignment=race_matrix.to_dict()["partition"], updaters={"population": Tally("population")}, ) # validators def mean_pop(part): return np.mean(list(part["population"].values())) def min_pop(part): return min(list(part["population"].values()))
#unit_name = 'GEOID10' #area_name = 'area' #x_name = 'INTPTLON10' #y_name = 'INTPTLAT10' #shapefile_name = 'IA_counties' #gdf = gpd.read_file('./input_data/'+shapefile_name) #gdf = gdf.to_crs({'init': 'epsg:26775'}) #NEW MEXICO k = 42 #NM state senate districts graph_name = 'New Mexico' unit_name = 'NAME10' num_districts = k plot_path = './input_data/NM_precincts_edited/NM_precincts_edited.shp' gdf = gpd.read_file(plot_path) graph = Graph.from_geodataframe(gdf) graph.add_data(gdf) ideal_pop = sum([graph.nodes[v]["TOTPOP"] for v in graph.nodes()]) / num_districts area_name = 'Area' centroids = gdf.centroid c_x = centroids.x c_y = centroids.y for node in graph.nodes(): graph.nodes[node]["x_val"] = c_x[node] graph.nodes[node]["y_val"] = c_y[node] x_name = 'x_val' y_name = 'y_val' ##TEXAS #k=36
state_gdf = gpd.read_file(plot_path) state_gdf["CD"] = state_gdf["CD"].astype('int') state_gdf["Seed_Demo"] = state_gdf["Seed_Demo"].astype('int') state_gdf.columns = state_gdf.columns.str.replace("-", "_") #replace cut-off candidate names from shapefile with full names state_gdf_cols = list(state_gdf.columns) cand1_index = state_gdf_cols.index('RomneyR_12') cand2_index = state_gdf_cols.index('ObamaD_12P') state_gdf_cols[cand1_index:cand2_index + 1] = TX_columns state_gdf.columns = state_gdf_cols state_df = pd.DataFrame(state_gdf) state_df = state_df.drop(['geometry'], axis=1) #build graph from geo_dataframe ##################################################### graph = Graph.from_geodataframe(state_gdf) graph.add_data(state_gdf) centroids = state_gdf.centroid c_x = centroids.x c_y = centroids.y for node in graph.nodes(): graph.nodes[node]["C_X"] = c_x[node] graph.nodes[node]["C_Y"] = c_y[node] #set up elections data structures ################################################ elections = list(elec_data["Election"]) elec_type = elec_data["Type"] elec_cand_list = TX_columns elecs_bool = ~elec_data.Election.isin(list(dropped_elecs)) elec_data_trunc = elec_data[elecs_bool].reset_index(drop=True)
def run_full_chain(chain_name): # # twilio setup, requires proper env variables to be set up (so it will text you when the chain is done) # account = os.environ["TWILIO_ACCT"] # auth = os.environ["TWILIO_AUTH"] # client = Client(account, auth) # get hyperparams parser = argparse.ArgumentParser() parser.add_argument( "-s", "--steps", type=int, help="number of steps for each markov chain", default=100000, ) parser.add_argument("city", type=str, help="city name, i.e. Atlanta") parser.add_argument("state", type=str, help="state code, i.e. GA") parser.add_argument( "fips", help="state FIPS code (zero-padded on the end), i.e. 130") args = parser.parse_args() STEP_COUNT = args.steps BURN_IN_RATIO = 0.1 CITY_NAME = args.city STATE = args.state STATE_FIPS = str(args.fips) THINNING_FACTOR = 5 # measure entropy only once every these many iterations of MC race_matrix = load_data(CITY_NAME, STATE, STATE_FIPS, fake=False) R_scratch = race_matrix[[ "partition", "geometry" ]] # scratch version of R for the polsby-popper computation print(race_matrix.head()) # build chain graph = Graph.from_geodataframe(race_matrix, adjacency="queen") nx.set_node_attributes(graph, race_matrix["total"].to_dict(), name="population") init_partition = Partition( graph, assignment=race_matrix.to_dict()["partition"], updaters={"population": Tally("population")}, ) # validators def mean_pop(part): return np.mean(list(part["population"].values())) def min_pop(part): return np.min(list(part["population"].values())) def sd_pop(part): return np.std(list(part["population"].values())) # TODO: only check if GISJOIN in minimum P-P partition have changed # TODO: cache set of GISJOINs for minimum partition for lowest P-P partition # TODO: compare this set to the new one when given a partition # TODO: if set is different, recompute P-P for whole partition, else do nothing def partition_polsby_popper(part, R=R_scratch): """Checks if partition is within polsby-popper metric Args: partition (gerrychain partition): partition map from a single step in the Markov Chain R (geopandas.GeoDataFrame): columns 'partition' and 'geometry' for getting the polygons Returns: function that takes partition and checks if it's within the bounds """ # get all shapes from each district # compute polsby-popper on all districts, get min pd.options.mode.chained_assignment = None R.loc[:, "partition"] = race_matrix.index.map(dict(part.assignment)) R_temp = R.copy(deep=True).dissolve(by="partition") polsby_popper = lambda d: (4 * np.pi * d.area) / (d.length**2 ) # d is a polygon # srs = R["geometry"].map(polsby_popper).values # print(np.min(srs), np.mean(srs), np.max(srs)) # return srs.min() return R_temp["geometry"].map(polsby_popper).min() # return min(polsby_popper_from_R(R).values()) def polsby_popper_from_R(R): """A more stable version of geopandas dissolve.""" from shapely.ops import unary_union # loop through all partitons and unary join them, the return a dict indexed by partition id result = {} polsby_popper = lambda d: (4 * np.pi * d.area) / (d.length**2 ) # d is a polygon for pid in R["partition"].unique(): # get all geometries geom = R.loc[R["partition"] == pid]["geometry"].values result[pid] = polsby_popper(unary_union(geom)) return result def partition_polsby_popper_min( part, R=R_scratch, ): nonlocal min_partition_id nonlocal min_partition_gisjoins nonlocal min_partition_p_p pd.options.mode.chained_assignment = None R.loc[:, "partition"] = race_matrix.index.map(dict(part.assignment)) same_gisjoins = (set( R.loc[R["partition"] == min_partition_id].index.values) == min_partition_gisjoins) if min_partition_id is not None and same_gisjoins: # no change, return the old one return min_partition_p_p else: # something changed, so recompute all partitions # R_temp = R.copy(deep=True).dissolve(by="partition") # p_p_scores = R_temp["geometry"].map(polsby_popper) # min_partition_p_p = p_p_scores.min() # min_partition_id = R_temp.iloc[np.argmin(p_p_scores.values)].name p_p_scores = polsby_popper_from_R(R) min_partition_p_p = min(p_p_scores.values()) min_partition_id = min(p_p_scores.items(), key=lambda x: x[1])[0] min_partition_gisjoins = set( R.loc[R["partition"] == min_partition_id].index.values) if (min_partition_p_p < 0.147): # initial oakland partition has min score of 0.147 print("Rejected with score", min_partition_p_p) return min_partition_p_p mean_one_sd_up = mean_pop(init_partition) + (2 / 3) * sd_pop(init_partition) mean_one_sd_down = mean_pop(init_partition) - (2 / 3) * sd_pop(init_partition) min_partition_id, min_partition_gisjoins, min_partition_p_p = None, set( ), None # initalize and run chains # TODO: record descent is_valid = Validator([ LowerBound(min_pop, min_pop(init_partition) % 50), UpperBound(mean_pop, mean_one_sd_up), LowerBound(mean_pop, mean_one_sd_down), WithinPercentRangeOfBounds(sd_pop, 25), # contiguous, # LowerBound( # partition_polsby_popper, bound=partition_polsby_popper(init_partition) # ), # LowerBound( # partition_polsby_popper_min, # bound=partition_polsby_popper_min(init_partition), # ), no_vanishing_districts, ]) # make sure init_partition passes validators assert is_valid(init_partition) chain = MarkovChain( proposal=propose_chunk_flip, constraints=is_valid, accept=always_accept, initial_state=init_partition, total_steps=(STEP_COUNT * THINNING_FACTOR) + int(STEP_COUNT * BURN_IN_RATIO), ) print(f"Prereqs created, {chain_name} running...") # burn-in of 1000 iter(chain) # print(f"Burn-in: ({int(STEP_COUNT * BURN_IN_RATIO)} steps)") for i in range(int(STEP_COUNT * BURN_IN_RATIO)): if i % 100 == 0: print( f"{chain_name} BURN IN => {i}/{int(STEP_COUNT * BURN_IN_RATIO)}" ) next(chain) # print(f"Measurement: ({STEP_COUNT} steps)") entropies = [] scores = [] start_time = time.time() for i in range(STEP_COUNT * THINNING_FACTOR): if i % 25 == 0: print( f"{chain_name} ELAPSED {round(time.time() - start_time, 1)}s => {len(entropies)}/{STEP_COUNT}" ) if i % THINNING_FACTOR == 0: part = next(chain) entropies.append(chain_to_entropy(part, race_matrix)) scores.append(partition_polsby_popper_min(part)) else: next(chain) np.save("./results_2020/polsby_popper_oakland.npy", np.array(scores)) save_results( CITY_NAME, STEP_COUNT, chain_name, baseline=chain_to_entropy(init_partition, race_matrix), entropies=entropies, )