def dfs(request): polys1 = GeoSeries( [ Polygon([(0, 0), (5, 0), (5, 5), (0, 5)]), Polygon([(5, 5), (6, 5), (6, 6), (5, 6)]), Polygon([(6, 0), (9, 0), (9, 3), (6, 3)]), ] ) polys2 = GeoSeries( [ Polygon([(1, 1), (4, 1), (4, 4), (1, 4)]), Polygon([(4, 4), (7, 4), (7, 7), (4, 7)]), Polygon([(7, 7), (10, 7), (10, 10), (7, 10)]), ] ) df1 = GeoDataFrame({"geometry": polys1, "df1": [0, 1, 2]}) df2 = GeoDataFrame({"geometry": polys2, "df2": [3, 4, 5]}) if request.param == "string-index": df1.index = ["a", "b", "c"] df2.index = ["d", "e", "f"] if request.param == "named-index": df1.index.name = "df1_ix" df2.index.name = "df2_ix" # construction expected frames expected = {} part1 = df1.copy().reset_index().rename(columns={"index": "index_left"}) part2 = ( df2.copy() .iloc[[0, 1, 1, 2]] .reset_index() .rename(columns={"index": "index_right"}) ) part1["_merge"] = [0, 1, 2] part2["_merge"] = [0, 0, 1, 3] exp = pd.merge(part1, part2, on="_merge", how="outer") expected["intersects"] = exp.drop("_merge", axis=1).copy() part1 = df1.copy().reset_index().rename(columns={"index": "index_left"}) part2 = df2.copy().reset_index().rename(columns={"index": "index_right"}) part1["_merge"] = [0, 1, 2] part2["_merge"] = [0, 3, 3] exp = pd.merge(part1, part2, on="_merge", how="outer") expected["contains"] = exp.drop("_merge", axis=1).copy() part1["_merge"] = [0, 1, 2] part2["_merge"] = [3, 1, 3] exp = pd.merge(part1, part2, on="_merge", how="outer") expected["within"] = exp.drop("_merge", axis=1).copy() return [request.param, df1, df2, expected]
def get_mascon_gdf(mascon_ds): """ converts the mascon group in the hdf5 file to a geodataframe Parameters ---------- mascon_ds : HDF5 group the HDF5 group labeled "mascon" Returns ------- mascon_gdf : geodataframe a geodataframe with the same data as in the HDF5 group """ mascon_dct = {} poly_geom = [] dataset_list = list(mascon_ds.keys()) for d in dataset_list: mascon_dct.update({d: mascon_ds[d][0, :]}) mascon_df = pd.DataFrame.from_dict(mascon_dct) for k, m in mascon_df.iterrows(): poly_geom.append(polygeom(m)) mascon_gdf = GeoDataFrame(mascon_df, crs=CRS, geometry=poly_geom) mascon_gdf.index = mascon_gdf.index + 1 print('There are {} Mascons in this dataset.'.format(len(mascon_gdf))) return mascon_gdf
def make_grid(gdf, height, cut=True): """ Return a grid, based on the shape of *gdf* and on a *height* value (in units of *gdf*). If cut=False, the grid will not be intersected with *gdf* (i.e it makes a grid on the bounding-box of *gdf*). Parameters ---------- gdf: GeoDataFrame The collection of polygons to be covered by the grid. height: Integer The dimension (will be used as height and width) of the ceils to create, in units of *gdf*. cut: Boolean, default True Cut the grid to fit the shape of *gdf* (ceil partially covering it will be truncated). If False, the returned grid will fit the bounding box of *gdf*. Returns ------- grid: GeoDataFrame A collection of polygons. """ from math import ceil from shapely.ops import unary_union xmin, ymin = [i.min() for i in gdf.bounds.T.values[:2]] xmax, ymax = [i.max() for i in gdf.bounds.T.values[2:]] rows = int(ceil((ymax - ymin) / height)) cols = int(ceil((xmax - xmin) / height)) x_left_origin = xmin x_right_origin = xmin + height y_top_origin = ymax y_bottom_origin = ymax - height res_geoms = [] for countcols in range(cols): y_top = y_top_origin y_bottom = y_bottom_origin for countrows in range(rows): res_geoms.append( ((x_left_origin, y_top), (x_right_origin, y_top), (x_right_origin, y_bottom), (x_left_origin, y_bottom))) y_top = y_top - height y_bottom = y_bottom - height x_left_origin = x_left_origin + height x_right_origin = x_right_origin + height if cut: res = GeoDataFrame( geometry=pd.Series(res_geoms).apply(lambda x: Polygon(x)), crs=gdf.crs).intersection(unary_union(gdf.geometry).convex_hull) res = res[res.geometry.type == 'Polygon'] res.index = [i for i in range(len(res))] return GeoDataFrame(geometry=res) else: return GeoDataFrame( index=[i for i in range(len(res_geoms))], geometry=pd.Series(res_geoms).apply(lambda x: Polygon(x)), crs=gdf.crs)
def dfs(request): polys1 = GeoSeries( [Polygon([(0, 0), (5, 0), (5, 5), (0, 5)]), Polygon([(5, 5), (6, 5), (6, 6), (5, 6)]), Polygon([(6, 0), (9, 0), (9, 3), (6, 3)])]) polys2 = GeoSeries( [Polygon([(1, 1), (4, 1), (4, 4), (1, 4)]), Polygon([(4, 4), (7, 4), (7, 7), (4, 7)]), Polygon([(7, 7), (10, 7), (10, 10), (7, 10)])]) df1 = GeoDataFrame({'geometry': polys1, 'df1': [0, 1, 2]}) df2 = GeoDataFrame({'geometry': polys2, 'df2': [3, 4, 5]}) if request.param == 'string-index': df1.index = ['a', 'b', 'c'] df2.index = ['d', 'e', 'f'] # construction expected frames expected = {} part1 = df1.copy().reset_index().rename( columns={'index': 'index_left'}) part2 = df2.copy().iloc[[0, 1, 1, 2]].reset_index().rename( columns={'index': 'index_right'}) part1['_merge'] = [0, 1, 2] part2['_merge'] = [0, 0, 1, 3] exp = pd.merge(part1, part2, on='_merge', how='outer') expected['intersects'] = exp.drop('_merge', axis=1).copy() part1 = df1.copy().reset_index().rename( columns={'index': 'index_left'}) part2 = df2.copy().reset_index().rename( columns={'index': 'index_right'}) part1['_merge'] = [0, 1, 2] part2['_merge'] = [0, 3, 3] exp = pd.merge(part1, part2, on='_merge', how='outer') expected['contains'] = exp.drop('_merge', axis=1).copy() part1['_merge'] = [0, 1, 2] part2['_merge'] = [3, 1, 3] exp = pd.merge(part1, part2, on='_merge', how='outer') expected['within'] = exp.drop('_merge', axis=1).copy() return [request.param, df1, df2, expected]
def dfs(request): polys1 = GeoSeries([ Polygon([(0, 0), (5, 0), (5, 5), (0, 5)]), Polygon([(5, 5), (6, 5), (6, 6), (5, 6)]), Polygon([(6, 0), (9, 0), (9, 3), (6, 3)]) ]) polys2 = GeoSeries([ Polygon([(1, 1), (4, 1), (4, 4), (1, 4)]), Polygon([(4, 4), (7, 4), (7, 7), (4, 7)]), Polygon([(7, 7), (10, 7), (10, 10), (7, 10)]) ]) df1 = GeoDataFrame({'geometry': polys1, 'df1': [0, 1, 2]}) df2 = GeoDataFrame({'geometry': polys2, 'df2': [3, 4, 5]}) if request.param == 'string-index': df1.index = ['a', 'b', 'c'] df2.index = ['d', 'e', 'f'] # construction expected frames expected = {} part1 = df1.copy().reset_index().rename(columns={'index': 'index_left'}) part2 = df2.copy().iloc[[ 0, 1, 1, 2 ]].reset_index().rename(columns={'index': 'index_right'}) part1['_merge'] = [0, 1, 2] part2['_merge'] = [0, 0, 1, 3] exp = pd.merge(part1, part2, on='_merge', how='outer') expected['intersects'] = exp.drop('_merge', axis=1).copy() part1 = df1.copy().reset_index().rename(columns={'index': 'index_left'}) part2 = df2.copy().reset_index().rename(columns={'index': 'index_right'}) part1['_merge'] = [0, 1, 2] part2['_merge'] = [0, 3, 3] exp = pd.merge(part1, part2, on='_merge', how='outer') expected['contains'] = exp.drop('_merge', axis=1).copy() part1['_merge'] = [0, 1, 2] part2['_merge'] = [3, 1, 3] exp = pd.merge(part1, part2, on='_merge', how='outer') expected['within'] = exp.drop('_merge', axis=1).copy() return [request.param, df1, df2, expected]
def dfs(request): s1 = GeoSeries([Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]), Polygon([(2, 2), (4, 2), (4, 4), (2, 4)])]) s2 = GeoSeries([Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]), Polygon([(3, 3), (5, 3), (5, 5), (3, 5)])]) df1 = GeoDataFrame({'geometry': s1, 'col1': [1, 2]}) df2 = GeoDataFrame({'geometry': s2, 'col2': [1, 2]}) if request.param: df1.index = ['row1', 'row2'] return df1, df2
def write_outputs( cfg: dict, bin_gdf: GeoDataFrame, eq_gdf: GeoDataFrame, write_index: bool = False, ) -> None: """ Writes output GIS files and plots (i.e., maps or MFD plots.) All of the options for what to write are specified in the `cfg`. :param cfg: Configuration for the evaluations, such as that parsed from the YAML config file. :param bin_gdf: :class:`GeoDataFrame` with the spatial bins for testing :param eq_gdf: :class:`GeoDataFrame` with the observed earthquake catalog. """ logger.info("writing outputs") if "plots" in cfg["output"].keys(): write_mfd_plots_to_gdf(bin_gdf, **cfg["output"]["plots"]["kwargs"]) if "map_epsg" in cfg["config"]: out_gdf = out_gdf.to_crs(cfg["config"]["map_epsg"]) if "bin_gdf" in cfg["output"].keys(): outfile = cfg["output"]["bin_gdf"]["file"] out_format = outfile.split(".")[-1] bin_gdf["bin_index"] = bin_gdf.index bin_gdf.index = np.arange(len(bin_gdf)) if out_format == "csv": write_bin_gdf_to_csv(outfile, bin_gdf) else: try: bin_gdf.drop("SpacemagBin", axis=1).to_file( outfile, driver=OUTPUT_FILE_MAP[out_format], index=write_index, ) except KeyError: raise Exception(f"No writer for {out_format} format")
def dissolve_un_m49_regions( self, world: geopandas.GeoDataFrame) -> geopandas.GeoDataFrame: codes_mapping = self._unm49_data[[ 'Region Code', 'Region Name', 'Sub-region Code', 'Sub-region Name', 'Intermediate Region Code', 'Intermediate Region Name', 'ISO-alpha3 Code', 'Country or Area', 'M49 Code' ]] world = world.merge(codes_mapping, left_on='iso_a3', right_on='ISO-alpha3 Code', how='left') world_regions = [] for level in ['Region', 'Sub-region', 'Intermediate Region']: code_label = f'{level} Code' name_label = f'{level} Name' regions_group = world.dissolve(by=code_label)[[ 'geometry', name_label ]].rename(columns={name_label: 'name'}) regions_group.index.name = 'id' regions_group.index = regions_group.index.astype('int64') world_regions.append(regions_group) antartica = world[world['ISO-alpha3 Code'] == 'ATA'][[ 'geometry', 'M49 Code', 'Country or Area' ]] antartica = antartica.rename(columns={ 'M49 Code': 'id', 'Country or Area': 'name' }).astype({ 'id': 'int64' }).set_index('id') world_regions.append(antartica) world = pd.concat(world_regions) world['un_m49_numeric'] = world.index.astype('int64') world.index = world.index.astype('str') return world
def make_grid(gdf, height, cut=True): """ Return a grid, based on the shape of *gdf* and on a *height* value (in units of *gdf*). If cut=False, the grid will not be intersected with *gdf* (i.e it makes a grid on the bounding-box of *gdf*). Parameters ---------- gdf: GeoDataFrame The collection of polygons to be covered by the grid. height: Integer The dimension (will be used as height and width) of the ceils to create, in units of *gdf*. cut: Boolean, default True Cut the grid to fit the shape of *gdf* (ceil partially covering it will be truncated). If False, the returned grid will fit the bounding box of *gdf*. Returns ------- grid: GeoDataFrame A collection of polygons. """ from math import ceil from shapely.ops import unary_union xmin, ymin = [i.min() for i in gdf.bounds.T.values[:2]] xmax, ymax = [i.max() for i in gdf.bounds.T.values[2:]] rows = ceil((ymax-ymin) / height) cols = ceil((xmax-xmin) / height) x_left_origin = xmin x_right_origin = xmin + height y_top_origin = ymax y_bottom_origin = ymax - height res_geoms = [] for countcols in range(cols): y_top = y_top_origin y_bottom = y_bottom_origin for countrows in range(rows): res_geoms.append(( (x_left_origin, y_top), (x_right_origin, y_top), (x_right_origin, y_bottom), (x_left_origin, y_bottom) )) y_top = y_top - height y_bottom = y_bottom - height x_left_origin = x_left_origin + height x_right_origin = x_right_origin + height if cut: if all(gdf.eval( "geometry.type =='Polygon' or geometry.type =='MultiPolygon'")): res = GeoDataFrame( geometry=pd.Series(res_geoms).apply(lambda x: Polygon(x)), crs=gdf.crs ).intersection(unary_union(gdf.geometry).convex_hull) else: res = GeoDataFrame( geometry=pd.Series(res_geoms).apply(lambda x: Polygon(x)), crs=gdf.crs ).intersection(unary_union(gdf.geometry).convex_hull) res = res[res.geometry.type == 'Polygon'] res.index = [i for i in range(len(res))] return GeoDataFrame(geometry=res) else: return GeoDataFrame( index=[i for i in range(len(res_geoms))], geometry=pd.Series(res_geoms).apply(lambda x: Polygon(x)), crs=gdf.crs )
# first, calculate a bounding box to restrict the diagram min_x = min(stops_pts[:,0]) - 5000 max_x = max(stops_pts[:,0]) + 5000 min_y = min(stops_pts[:,1]) - 5000 max_y = max(stops_pts[:,1]) + 5000 bbox = np.array([[min_x,min_y], [max_x,max_y], [min_x,max_y], [max_x,min_y]]) # find the voronoi coords = np.vstack([stops_pts, bbox]) vor = Voronoi(coords) # rearrange, so that regions are in the same order as their corresponding # points, so the last four are the bbox dummy observations, and remove them regions = np.array(vor.regions)[vor.point_region] regions = regions[:-4] clipped = [] for region in regions: region_vertices = vor.vertices[region] region_polygon = Polygon(region_vertices) if nyc.intersects(region_polygon): clipped.append(nyc.intersection(region_polygon)) clipped = GeoSeries(clipped) stops = GeoDataFrame(stops) stops.index = np.arange(stops.shape[0]) stops['region'] = clipped pickle.dump(stops,open('save/stops.p','wb')) pickle.dump(nyc,open('save/nyc.p','wb'))
def test_write_index_to_file(tmpdir, df_points, driver, ext): fngen = FileNumber(tmpdir, "check", ext) def do_checks(df, index_is_used): # check combinations of index=None|True|False on GeoDataFrame/GeoSeries other_cols = list(df.columns) other_cols.remove("geometry") if driver == "ESRI Shapefile": # ESRI Shapefile will add FID if no other columns exist driver_col = ["FID"] else: driver_col = [] if index_is_used: index_cols = list(df.index.names) else: index_cols = [None] * len(df.index.names) # replicate pandas' default index names for regular and MultiIndex if index_cols == [None]: index_cols = ["index"] elif len(index_cols) > 1 and not all(index_cols): for level, index_col in enumerate(index_cols): if index_col is None: index_cols[level] = "level_" + str(level) # check GeoDataFrame with default index=None to autodetect tempfilename = next(fngen) df.to_file(tempfilename, driver=driver, index=None) df_check = read_file(tempfilename) if len(other_cols) == 0: expected_cols = driver_col[:] else: expected_cols = [] if index_is_used: expected_cols += index_cols expected_cols += other_cols + ["geometry"] assert list(df_check.columns) == expected_cols # similar check on GeoSeries with index=None tempfilename = next(fngen) df.geometry.to_file(tempfilename, driver=driver, index=None) df_check = read_file(tempfilename) if index_is_used: expected_cols = index_cols + ["geometry"] else: expected_cols = driver_col + ["geometry"] assert list(df_check.columns) == expected_cols # check GeoDataFrame with index=True tempfilename = next(fngen) df.to_file(tempfilename, driver=driver, index=True) df_check = read_file(tempfilename) assert list(df_check.columns) == index_cols + other_cols + ["geometry"] # similar check on GeoSeries with index=True tempfilename = next(fngen) df.geometry.to_file(tempfilename, driver=driver, index=True) df_check = read_file(tempfilename) assert list(df_check.columns) == index_cols + ["geometry"] # check GeoDataFrame with index=False tempfilename = next(fngen) df.to_file(tempfilename, driver=driver, index=False) df_check = read_file(tempfilename) if len(other_cols) == 0: expected_cols = driver_col + ["geometry"] else: expected_cols = other_cols + ["geometry"] assert list(df_check.columns) == expected_cols # similar check on GeoSeries with index=False tempfilename = next(fngen) df.geometry.to_file(tempfilename, driver=driver, index=False) df_check = read_file(tempfilename) assert list(df_check.columns) == driver_col + ["geometry"] return # # Checks where index is not used/saved # # index is a default RangeIndex df_p = df_points.copy() df = GeoDataFrame(df_p["value1"], geometry=df_p.geometry) do_checks(df, index_is_used=False) # index is a RangeIndex, starting from 1 df.index += 1 do_checks(df, index_is_used=False) # index is a Int64Index regular sequence from 1 df_p.index = list(range(1, len(df) + 1)) df = GeoDataFrame(df_p["value1"], geometry=df_p.geometry) do_checks(df, index_is_used=False) # index was a default RangeIndex, but delete one row to make an Int64Index df_p = df_points.copy() df = GeoDataFrame(df_p["value1"], geometry=df_p.geometry).drop(5, axis=0) do_checks(df, index_is_used=False) # no other columns (except geometry) df = GeoDataFrame(geometry=df_p.geometry) do_checks(df, index_is_used=False) # # Checks where index is used/saved # # named index df_p = df_points.copy() df = GeoDataFrame(df_p["value1"], geometry=df_p.geometry) df.index.name = "foo_index" do_checks(df, index_is_used=True) # named index, same as pandas' default name after .reset_index(drop=False) df.index.name = "index" do_checks(df, index_is_used=True) # named MultiIndex df_p = df_points.copy() df_p["value3"] = df_p["value2"] - df_p["value1"] df_p.set_index(["value1", "value2"], inplace=True) df = GeoDataFrame(df_p, geometry=df_p.geometry) do_checks(df, index_is_used=True) # partially unnamed MultiIndex df.index.names = ["first", None] do_checks(df, index_is_used=True) # unnamed MultiIndex df.index.names = [None, None] do_checks(df, index_is_used=True) # unnamed Float64Index df_p = df_points.copy() df = GeoDataFrame(df_p["value1"], geometry=df_p.geometry) df.index = df_p.index.astype(float) / 10 do_checks(df, index_is_used=True) # named Float64Index df.index.name = "centile" do_checks(df, index_is_used=True) # index as string df_p = df_points.copy() df = GeoDataFrame(df_p["value1"], geometry=df_p.geometry) df.index = pd.TimedeltaIndex(range(len(df)), "days") # TODO: TimedeltaIndex is an invalid field type df.index = df.index.astype(str) do_checks(df, index_is_used=True) # unnamed DatetimeIndex df_p = df_points.copy() df = GeoDataFrame(df_p["value1"], geometry=df_p.geometry) df.index = pd.TimedeltaIndex(range(len(df)), "days") + pd.DatetimeIndex( ["1999-12-27"] * len(df)) if driver == "ESRI Shapefile": # Shapefile driver does not support datetime fields df.index = df.index.astype(str) do_checks(df, index_is_used=True) # named DatetimeIndex df.index.name = "datetime" do_checks(df, index_is_used=True)
# project from latitude to longitude p1_points = np.array(subshape) p2_points = transform(p1, p2, p1_points[:, 0], p1_points[:, 1]) p2_points = np.array(p2_points).T # create polygon tract_polygons.append(Polygon(p2_points)) geoids.append(shape['properties']['GEOID']) tracts = GeoDataFrame(index=range(len(tract_polygons))) # initialize data tracts['region'] = tract_polygons tracts['geoid'] = geoids tracts['population'] = np.tile(np.nan, len(tracts)) tracts['area'] = np.tile(np.nan, len(tracts)) tracts.index = range(len(tracts)) # # trim tracts to nyc # read in nyc boundary nyc = nyc_boundary() areas = [] print 'Trimming tracts...' for i in range(len(tracts)): if i % 100 == 0: print i # trim to nyc boundaries, no water (nybb_13a) tract = tracts.iloc[i] region = tract['region'] if nyc.intersects(region): tracts['region'].iloc[i] = nyc.intersection(region)
min_x = min(stops_pts[:, 0]) - 5000 max_x = max(stops_pts[:, 0]) + 5000 min_y = min(stops_pts[:, 1]) - 5000 max_y = max(stops_pts[:, 1]) + 5000 bbox = np.array([[min_x, min_y], [max_x, max_y], [min_x, max_y], [max_x, min_y]]) # find the voronoi coords = np.vstack([stops_pts, bbox]) vor = Voronoi(coords) # rearrange, so that regions are in the same order as their corresponding # points, so the last four are the bbox dummy observations, and remove them regions = np.array(vor.regions)[vor.point_region] regions = regions[:-4] clipped = [] for region in regions: region_vertices = vor.vertices[region] region_polygon = Polygon(region_vertices) if nyc.intersects(region_polygon): clipped.append(nyc.intersection(region_polygon)) clipped = GeoSeries(clipped) stops = GeoDataFrame(stops) stops.index = np.arange(stops.shape[0]) stops['region'] = clipped pickle.dump(stops, open('save/stops.p', 'wb')) pickle.dump(nyc, open('save/nyc.p', 'wb'))
# rearrange, so that regions are in the same order as their corresponding # points, so the last four are the bbox dummy observations, and remove them regions = np.array(vor.regions)[vor.point_region] regions = regions[:-4] clipped = [] for region in regions: region_vertices = vor.vertices[region] region_polygon = Polygon(region_vertices) if nyc.intersects(region_polygon): clipped.append(nyc.intersection(region_polygon)) # add clipped regions to dataframe clipped = GeoSeries(clipped) stops = GeoDataFrame(stops) stops.index = np.arange(stops.shape[0]) stops['region'] = clipped # calculate area of each region stops['v_area'] = GeoSeries(index=np.arange(stops.shape[0])) stops['v_larea'] = GeoSeries(index=np.arange(stops.shape[0])) for i in np.arange(stops.shape[0]): stops['v_area'].ix[i] = stops.ix[i]['region'].area stops['v_larea'].ix[i] = np.log(stops.ix[i]['v_area']) print "OK" print "collapsing transfers..." # # 2.2 collapse all transfers into single stops transfers = read_csv('data/indata/google_transit/transfers.txt')