def convert_GeoPandas_to_Bokeh_format( gdf : gpd.GeoDataFrame ) -> ColumnDataSource : """ Function to convert a GeoPandas GeoDataFrame to a Bokeh ColumnDataSource object. :param: (GeoDataFrame) gdf: GeoPandas GeoDataFrame with polygon(s) under the column name 'geometry.' :return: ColumnDataSource for Bokeh. """ gdf_new = gdf.drop('geometry', axis=1).copy() gdf_new['x'] = gdf.apply(getGeometryCoords, geom='geometry', coord_type='x', shape_type='polygon', axis=1) gdf_new['y'] = gdf.apply(getGeometryCoords, geom='geometry', coord_type='y', shape_type='polygon', axis=1) return ColumnDataSource(gdf_new)
def test_apply_geodataframe(self): df = GeoDataFrame({"col1": [0, 1]}, geometry=self.geoms, crs=27700) assert df.crs == 27700 # apply preserves the CRS if the result is a GeoDataFrame result = df.apply(lambda col: col, axis=0) assert result.crs == 27700 result = df.apply(lambda row: row, axis=1) assert result.crs == 27700
def extract_gdf_roads_from_key_pano(self, panos: gpd.GeoDataFrame): def _extract_helper(_roads): for road in _roads: if road['IsCurrent'] != 1: continue # shared memory r = deepcopy(road) sorted(r['Panos'], key=lambda x: x['Order']) r['src'] = r['Panos'][0]['PID'] r['dst'] = r['Panos'][-1]['PID'] coords = [bd_mc_to_wgs(p['X'], p['Y']) for p in r['Panos']] if len(coords) == 1: coords = coords * 2 r['geometry'] = LineString(coords) return r return None if isinstance(panos, dict): panos = gpd.GeoDataFrame(panos).T assert isinstance(panos, gpd.GeoDataFrame), "Check Input" gdf_roads = panos.apply(lambda x: _extract_helper(x.Roads), axis=1, result_type='expand').drop_duplicates( ['ID', 'src', 'dst']) gdf_roads.set_index("ID", inplace=True) return gdf_roads
def spatial_overlays(df1, df2, how='intersection'): '''Compute overlay intersection of two GeoPandasDataFrames df1 and df2 ''' df1 = df1.copy() df2 = df2.copy() if how == 'intersection': # Spatial Index to create intersections spatial_index = df2.sindex df1['bbox'] = df1.geometry.apply(lambda x: x.bounds) df1['histreg'] = df1.bbox.apply( lambda x: list(spatial_index.intersection(x))) pairs = df1['histreg'].to_dict() nei = [] for i, j in pairs.items(): for k in j: nei.append([i, k]) pairs = GeoDataFrame(nei, columns=['idx1', 'idx2'], crs=df1.crs) pairs = pairs.merge(df1, left_on='idx1', right_index=True) pairs = pairs.merge(df2, left_on='idx2', right_index=True, suffixes=['_1', '_2']) pairs['Intersection'] = pairs.apply( lambda x: (x['geometry_1'].intersection(x['geometry_2'])).buffer(0), axis=1) pairs = GeoDataFrame(pairs, columns=pairs.columns, crs=df1.crs) cols = pairs.columns.tolist() cols.remove('geometry_1') cols.remove('geometry_2') cols.remove('histreg') cols.remove('bbox') cols.remove('Intersection') dfinter = pairs[cols + ['Intersection']].copy() dfinter.rename(columns={'Intersection': 'geometry'}, inplace=True) dfinter = GeoDataFrame(dfinter, columns=dfinter.columns, crs=pairs.crs) dfinter = dfinter.loc[dfinter.geometry.is_empty == False] return (dfinter) elif how == 'difference': spatial_index = df2.sindex df1['bbox'] = df1.geometry.apply(lambda x: x.bounds) df1['histreg'] = df1.bbox.apply( lambda x: list(spatial_index.intersection(x))) df1['new_g'] = df1.apply( lambda x: reduce(lambda x, y: x.difference(y).buffer( 0), [x.geometry] + list(df2.iloc[x.histreg].geometry)), axis=1) df1.geometry = df1.new_g df1 = df1.loc[df1.geometry.is_empty == False].copy() df1.drop(['bbox', 'histreg', 'new_g'], axis=1, inplace=True) return (df1)
def explode_multipolygons_to_polygons( polygon_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: row_accumulator = [] def explode_multipolygons(row): if row['geometry'].type == 'MultiPolygon': for geom in row['geometry'].geoms: new_row = row.to_dict() new_row['geometry'] = geom row_accumulator.append(new_row) else: row_accumulator.append(row.to_dict()) polygon_gdf.apply(explode_multipolygons, axis=1) gdf = gpd.GeoDataFrame(row_accumulator, crs=CRS.from_epsg(3879)) if len(polygon_gdf) != len(gdf): log.debug( f'Exploaded {len(gdf)} polygons from {len(polygon_gdf)} multipolygons' ) return gdf
def long_lat_to_utm(points: Union[list, np.ndarray], graph=None) -> np.ndarray: """ Converts a collection of long-lat points to UTM :param points: points to be projected, shape = (N, 2) :param graph: optional cam_graph containing desired crs in cam_graph.gra['crs'] :return: array of projected points """ points = np.atleast_2d(points) points_gdf = GeoDataFrame({'index': np.arange(len(points)), 'x': points[:, 0], 'y': points[:, 1]}) points_gdf['geometry'] = points_gdf.apply(lambda row: Point(row['x'], row['y']), axis=1) points_gdf.crs = '+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs' # long lat crs points_gdf_utm = ox.projection.project_gdf(points_gdf, to_crs=str(graph.graph['crs']) if graph is not None else None) points_gdf_utm['x'] = points_gdf_utm['geometry'].map(lambda point: point.x) points_gdf_utm['y'] = points_gdf_utm['geometry'].map(lambda point: point.y) return np.squeeze(np.array(points_gdf_utm[['x', 'y']]))
def pretty_plot(gg: GeoDataFrame, islands: GeoDataFrame, poly_viewsheds: GeoDataFrame, save_figure_to: str, proj=PROJECTION): x = gg[gg.apply(lambda x: not x.is_empty and x.area > 1e-9)] xa = GeoDataFrame(x.centroid, geometry=0, crs=islands.crs) xa.columns = ['geometry'] xa_tmp = xa.reset_index() xa_tmp['idx'] = xa_tmp.apply(lambda y: (y.idx_a, y.idx_b), axis=1) xa_tmp['idx_other'] = xa_tmp.apply(lambda y: (y.idx_b, y.idx_a), axis=1) xa_tmp = xa_tmp.set_index('idx') paths = xa_tmp.join(xa_tmp, on='idx_other', lsuffix='_ab', rsuffix='_ba') paths = paths[paths.apply(lambda y: y.geometry_ab is not np.nan and y.geometry_ba is not np.nan, axis=1)] ax = gplt.polyplot( islands, projection=proj, figsize=(20, 20), color='darkgray' ) gplt.polyplot( poly_viewsheds, projection=proj, ax=ax, linewidth=0, facecolor='lightgray', alpha=0.3 ) gplt.polyplot( x, projection=proj, ax=ax, linewidth=0, facecolor='red', alpha=0.3 ) gplt.sankey( paths, start='geometry_ab', end='geometry_ba', ax=ax, projection=proj, alpha=0.05, rasterized=False ) plt.savefig(save_figure_to)
def _add_nuts_regions_to_df(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: """ Add NUTS region objects to the stations dataframe :param gdf: The stations geopands dataframe created from _get_station_df() :return: The same dataframe with the addition of the nuts regions """ # get a NUTS geoframe for local speed nuts_gdf = NutsRegions.get_nuts_geoframe(nuts_version=CURRENT_NUTS_VERSION) def add_nuts(s: gpd.GeoSeries) -> gpd.GeoSeries: rv = nuts_gdf[nuts_gdf.contains(s.geometry)][['LEVL_CODE', 'db_record']] rv.set_index('LEVL_CODE', drop=True, inplace=True) rv.rename(index={0: 'NUTS_0', 1: 'NUTS_1', 2: 'NUTS_2', 3: 'NUTS_3'}, inplace=True) return s.append(rv.T.iloc[0]) # make sure the nuts_df and the stations_df have the same crs gdf = gdf.to_crs(nuts_gdf.crs) # apply the nuts columns based on location return gdf.apply(add_nuts, axis=1)
def extract_gdf_panos_from_key_pano(self, panos: gpd.GeoDataFrame, update_dir=True, sim_thred=.15): """extract gdf_panos from key pano infomation Args: panos ([type]): [description] update_dir (bool, optional): [description]. Defaults to False. sim_thred (float, optional): [description]. Defaults to .15. """ def _cal_dir_diff(x): return ((x + 360) - 180) % 360 def _extract_pano_helper(item): for r in item["Roads"]: if r['IsCurrent'] != 1: continue sorted(r['Panos'], key=lambda x: x['Order']) for idx, pano in enumerate(r['Panos']): pano['RID'] = r['ID'] return r['Panos'] return None def _update_key_pano_move_dir(df, gdf_key_panos): # TODO 厘清原因 """Update the moving direction of panos, for the heading of the last point in eahc segment is usually zero. Args: gdf (GeoDataFrame): The original panos dataframe gdf_key_panos (GeoDataFrame): The key panos dataframe Returns: [GeoDataFrame]: The panos dataframe after change the moving direaction """ count_dict = df.RID.value_counts().to_dict() # the first pano in the road con = df.Order == 0 # TODO 补充抓取端点 df.loc[con, 'MoveDir'] = df[con].apply( lambda x: gdf_key_panos.loc[x.name].MoveDir if x.name in gdf_key_panos.index else 0, axis=1) con1 = con & df.apply(lambda x: count_dict[x.RID] > 1, axis=1) df.loc[con1, 'dir_sim'] = df[con1].apply( lambda x: math.cos(azimuth_diff(x.MoveDir, x.DIR)) + 1, axis=1) # the last pano in the road df.sort_values(["RID", 'Order'], ascending=[True, False], inplace=True) idx = df.groupby("RID").head(1).index df.loc[idx, 'MoveDir'] = df.loc[idx].apply( lambda x: gdf_key_panos.loc[x.name]['MoveDir'] if x.name in gdf_key_panos.index else -1, axis=1) return df def _update_neg_dir_move_dir(df, sim_thred=.15): """增加判断,若是反方向则变更顺序 Args: gdf_panos ([type]): [description] Returns: [type]: [description] """ df.loc[:, 'revert'] = False rids = df[df.dir_sim < sim_thred].RID.values # update Order idxs = df.query(f"RID in @rids").index df.loc[idxs, 'revert'] = True _max_idx_dict = df.loc[idxs].groupby('RID').Order.max() df.loc[idxs, 'Order'] = df.loc[idxs].apply( lambda x: _max_idx_dict.loc[x.RID] - x.Order, axis=1) # update MoveDir idxs = df.query(f"RID in @rids and not MoveDir>=0").index df.loc[idxs, 'MoveDir'] = df.loc[idxs, 'DIR'].apply(_cal_dir_diff) # # update MoveDir for the positive dir # rids = gdf_panos[gdf_panos.dir_sim > 2 - sim_thred].RID.values # idxs = gdf_panos.query(f"RID in @rids and not MoveDir>=0").index # gdf_panos.loc[idxs, 'MoveDir'] = gdf_panos.loc[idxs, 'DIR'] return df if isinstance(panos, dict): panos = gpd.GeoDataFrame(panos).T assert isinstance(panos, gpd.GeoDataFrame), "Check Input" df = pd.DataFrame.from_records( np.concatenate( panos.apply(lambda x: _extract_pano_helper(x), axis=1).values)).drop_duplicates() df = gpd.GeoDataFrame( df, geometry=df.apply(lambda x: Point(*bd_mc_to_wgs(x['X'], x['Y'])), axis=1), crs='EPSG:4326') df.set_index("PID", inplace=True) if update_dir: _update_key_pano_move_dir(df, panos) _update_neg_dir_move_dir(df, sim_thred) con = df.MoveDir.isna() df.loc[con, 'MoveDir'] = df.loc[con, 'DIR'] df.MoveDir = df.MoveDir.astype(np.int) df.sort_values(["RID", 'Order'], ascending=[True, True], inplace=True) return df
def cluster_shapes(self): """ :return: """ # get unique stop-to-stop shapes, with trips aggregated # split by nearby stops # match splitted, and aggregate trips # identify branches: large overlap but crosses buffer, insert pseudo stop at branch # split everything again # match splitted #this query returns shapes for of the maximum trips, both directions df = self.gtfs.execute_custom_query_pandas("""WITH a AS ( SELECT routes.name AS name, shape_id, route_I, trip_I, routes.type, direction_id, max(end_time_ds-start_time_ds) AS trip_duration, count(*) AS n_trips FROM trips LEFT JOIN routes USING(route_I) WHERE start_time_ds >= 7*3600 AND start_time_ds < 8*3600 GROUP BY routes.route_I, direction_id ), b AS( SELECT q1.trip_I AS trip_I, q1.stop_I AS from_stop_I, q2.stop_I AS to_stop_I, q1.seq AS seq, q1.shape_break AS from_shape_break, q2.shape_break AS to_shape_break FROM (SELECT stop_I, trip_I, shape_break, seq FROM stop_times) q1, (SELECT stop_I, trip_I, shape_break, seq AS seq FROM stop_times) q2 WHERE q1.seq=q2.seq-1 AND q1.trip_I=q2.trip_I AND q1.trip_I IN (SELECT trip_I FROM a) ), c AS( SELECT b.*, name, direction_id, route_I, a.shape_id, group_concat(lat) AS lats, group_concat(lon) AS lons, count(*) AS n_coords FROM b, a, shapes WHERE b.trip_I = a.trip_I AND shapes.shape_id=a.shape_id AND b.from_shape_break <= shapes.seq AND b.to_shape_break >= shapes.seq GROUP BY route_I, direction_id, b.seq ORDER BY route_I, b.seq ) SELECT from_stop_I, to_stop_I, group_concat(trip_I) AS trip_ids, group_concat(direction_id) AS direction_ids, lats, lons FROM c WHERE n_coords > 1 GROUP BY from_stop_I, to_stop_I ORDER BY count(*) DESC""") df["geometry"] = df.apply(lambda row: shapely.LineString([ (float(lon), float(lat)) for lon, lat in zip(row["lons"].split(","), row["lats"].split(",")) ]), axis=1) gdf = GeoDataFrame(df, crs=self.crs_wgs, geometry=df["geometry"]) #gdf = gdf.to_crs(self.crs_eurefin) gdf = gdf.to_crs(self.crs_wgs) gdf = gdf.drop(["lats", "lons"], axis=1) stops_set = set(gdf["from_stop_I"]) | set(gdf["to_stop_I"]) gdf["orig_parent_stops"] = list( zip(gdf['from_stop_I'], gdf['to_stop_I'])) clustered_stops = self.cluster_stops(stops_set) cluster_dict = clustered_stops[[ "new_stop_I", "stop_I", "geometry" ]].set_index('stop_I').T.to_dict('list') geom_dict = clustered_stops[[ "new_stop_I", "geometry" ]].set_index("new_stop_I").T.to_dict('list') gdf["to_stop_I"] = gdf.apply( lambda row: cluster_dict[row["to_stop_I"]][0], axis=1) gdf["from_stop_I"] = gdf.apply( lambda row: cluster_dict[row["from_stop_I"]][0], axis=1) # to/from_stop_I: cluster id # orig_parent_stops: old id # child_stop_I: cluster id splitted_gdf = self.split_shapes_by_nearby_stops(clustered_stops, gdf) splitted_gdf['child_stop_I'] = splitted_gdf.apply( lambda row: ",".join([str(int(x)) for x in row.child_stop_I]), axis=1) splitted_gdf_grouped = splitted_gdf.groupby(['child_stop_I']) splitted_gdf_grouped = splitted_gdf_grouped.agg( { 'orig_parent_stops': lambda x: tuple(x), 'geometry': lambda x: x.iloc[0] }, axis=1) splitted_gdf = splitted_gdf_grouped.reset_index() splitted_gdf['value'] = splitted_gdf.apply(lambda row: 1, axis=1) #splitted_gdf = splitted_gdf.set_geometry(splitted_gdf["geometry"], crs=self.crs_eurefin) splitted_gdf = self.match_shapes(splitted_gdf) splitted_gdf["rand"] = np.random.randint(1, 10, splitted_gdf.shape[0]) print(splitted_gdf) self.plot_geopandas(splitted_gdf, alpha=0.3)
""" # Question 3.2 # need to downlaod the "Pediacities NYC Neighborhoods" dataset first dataset line: http://catalog.opendata.city/dataset/pediacities-nyc-neighborhoods Airports to consider: 'John F. Kennedy International Airport' 'LaGuardia Airport' """ NYC_geofile = 'NYCNeighborhoods.geojson' NYCneighborhoods = GeoDataFrame.from_file(NYC_geofile) airport = GeoDataFrame(NYCneighborhoods['geometry'][(NYCneighborhoods.neighborhood == 'John F. Kennedy International Airport') | (NYCneighborhoods.neighborhood == 'LaGuardia Airport')]) df = GeoDataFrame(df) # Trips originate/pick up at NYC area airports df['geometry'] = df.apply(lambda row: Point(row['Pickup_longitude'], row['Pickup_latitude']), axis=1) pickup = df['geometry'].intersects(airport['geometry'].unary_union) df_pickup_at_airport = df[pickup] print('Number of trips originate at NYC area airports is {0:d}'.format(len(df_pickup_at_airport))) print('Average fair of trips originate at NYC area airports is {0:5.2f}'.format(df_pickup_at_airport['Fare_amount'].mean())) # Trips terminate/drop off at NYC area airports df['geometry'] = df.apply(lambda row: Point(row['Dropoff_longitude'], row['Dropoff_latitude']), axis=1) dropoff = df['geometry'].intersects(airport['geometry'].unary_union) df_dropoff_at_airport = df[dropoff] print('Number of trips terminate at NYC area airports is {0:d}'.format(len(df_dropoff_at_airport))) print('Average fair of trips terminate at NYC area airports is {0:5.2f}'.format(df_dropoff_at_airport['Fare_amount'].mean())) # Some other interesting characteristics: print('Average tip of trips originate at NYC area airports is {0:5.2f}'.format(df_pickup_at_airport['Tip_amount'].mean()))
#### Plotting ### Extract x and y data for plotting print('Creating the plot') zones1 = multipoly_to_poly(view_zones) zones1['x'] = zones1.apply(getPolyCoords, coord_type='x', axis=1) zones1['y'] = zones1.apply(getPolyCoords, coord_type='y', axis=1) zones2 = zones1.drop('geometry', axis=1) cant1 = GeoDataFrame(['Canterbury'], geometry=[zones1.unary_union]) cant1.columns = ['site', 'geometry'] cant1['x'] = cant1.apply(getPolyCoords, coord_type='x', axis=1) cant1['y'] = cant1.apply(getPolyCoords, coord_type='y', axis=1) cant2 = cant1.drop('geometry', axis=1) ## Catchments catch1 = multipoly_to_poly(site_catch2) catch1['x'] = catch1.apply(getPolyCoords, coord_type='x', axis=1) catch1['y'] = catch1.apply(getPolyCoords, coord_type='y', axis=1) catch2 = catch1.drop('geometry', axis=1) ### Combine with time series data data1 = merge(cat1.unstack('time').reset_index(), zones2, on=['zone']) time_index = hy_summ2.time.unique().tolist() data1['cat'] = data1[time_index[-1]]
def map_cluster_diff(clusters_a, clusters_b, intersection_color='#00ff00', diff_ab_color='#0000ff', diff_ba_color='#ff0000', tiles='OpenStreetMap', width='100%', height='100%'): """Returns a Folium Map displaying the differences between two sets of clusters. Map center and zoom level are set automatically. Args: clusters_a (GeoDataFrame): The first set of clusters. clusters_b (GeoDataFrame): The second set of clusters. intersection_color (color code): The color to use for A & B. diff_ab_color (color code): The color to use for A - B. diff_ba_color (color code): The color to use for B - A. tiles (string): The tiles to use for the map (default: `OpenStreetMap`). width (integer or percentage): Width of the map in pixels or percentage (default: 100%). height (integer or percentage): Height of the map in pixels or percentage (default: 100%). Returns: A Folium Map object displaying cluster intersections and differences. """ if clusters_a.crs['init'] != '4326': clusters_a = clusters_a.to_crs({'init': 'epsg:4326'}) if clusters_b.crs['init'] != '4326': clusters_b = clusters_b.to_crs({'init': 'epsg:4326'}) spatial_index_b = clusters_b.sindex prev_size_list = [] prev_cid_list = [] after_cid_list = [] after_size_list = [] intersect_area_percentage_list = [] intersection_polygons = [] diff_ab_polygs = [] diff_ba_polygs = [] new_polygs = [] intersection_polygons_attr = [] diff_ab_polygs_attr = [] diff_ba_polygs_attr = [] new_polygs_attr = [] for index_1, row_1 in clusters_a.iterrows(): size = row_1['size'] poly = row_1['geometry'] cid = row_1['cluster_id'] prev_area = poly.area prev_cid_list.append(cid) prev_size_list.append(size) possible_matches_index = list(spatial_index_b.intersection( poly.bounds)) possible_matches = clusters_b.iloc[possible_matches_index] max_area = 0.0 max_cid_intersect = -1 max_size = 0 max_polyg = None max_intersect_polyg = None for index_2, row_2 in possible_matches.iterrows(): size_2 = row_2['size'] poly_2 = row_2['geometry'] cid_2 = row_2['cluster_id'] intersect_polyg = poly.intersection(poly_2) area = intersect_polyg.area if area > max_area: max_area = area max_cid_intersect = cid_2 max_size = size_2 max_polyg = poly_2 max_intersect_polyg = intersect_polyg if max_cid_intersect == -1: after_cid_list.append(np.nan) after_size_list.append(np.nan) intersect_area_percentage_list.append(0.0) new_polygs.append(poly) new_polygs_attr.append('A (' + str(cid) + ')') else: after_cid_list.append(max_cid_intersect) after_size_list.append(max_size) intersect_area_percentage_list.append(max_area / prev_area) ab_diff_poly = poly.difference(max_polyg) ba_diff_poly = max_polyg.difference(poly) intersection_polygons.append(max_intersect_polyg) diff_ab_polygs.append(ab_diff_poly) diff_ba_polygs.append(ba_diff_poly) intersection_polygons_attr.append('A(' + str(cid) + ') & B(' + str(max_cid_intersect) + ')') diff_ab_polygs_attr.append('A(' + str(cid) + ') - B(' + str(max_cid_intersect) + ')') diff_ba_polygs_attr.append('B(' + str(max_cid_intersect) + ') - A(' + str(cid) + ')') spatial_index_a = clusters_a.sindex old_polys = [] old_poly_attr = [] for index, row in clusters_b.iterrows(): poly = row['geometry'] cid = row['cluster_id'] possible_matches_index = list(spatial_index_a.intersection( poly.bounds)) possible_matches = clusters_a.iloc[possible_matches_index] max_area = 0.0 for index_2, row_2 in possible_matches.iterrows(): poly_2 = row_2['geometry'] intersect_polyg = poly.intersection(poly_2) area = intersect_polyg.area if area > max_area: max_area = area if max_area == 0.0: old_polys.append(poly) old_poly_attr.append('B (' + str(cid) + ')') intersection_gdf = GeoDataFrame(list( zip(intersection_polygons, intersection_polygons_attr)), columns=['geometry', 'diff'], crs=clusters_a.crs) diff_ab_gdf = GeoDataFrame(list( zip(diff_ab_polygs + new_polygs, diff_ab_polygs_attr + new_polygs_attr)), columns=['geometry', 'diff'], crs=clusters_a.crs) diff_ba_gdf = GeoDataFrame(list( zip(diff_ba_polygs + old_polys, diff_ba_polygs_attr + old_poly_attr)), columns=['geometry', 'diff'], crs=clusters_a.crs) # Filter out erroneous rows intersection_gdf = intersection_gdf[(intersection_gdf.geometry.area > 0.0)] diff_ab_gdf = diff_ab_gdf[(diff_ab_gdf.geometry.area > 0.0)] diff_ba_gdf = diff_ba_gdf[(diff_ba_gdf.geometry.area > 0.0)] # Add colors intersection_style = { 'fillColor': intersection_color, 'weight': 2, 'color': 'black', 'fillOpacity': 0.8 } diff_ab_style = { 'fillColor': diff_ab_color, 'weight': 2, 'color': 'black', 'fillOpacity': 0.8 } diff_ba_style = { 'fillColor': diff_ba_color, 'weight': 2, 'color': 'black', 'fillOpacity': 0.8 } intersection_gdf['style'] = intersection_gdf.apply( lambda x: intersection_style, axis=1) diff_ab_gdf['style'] = diff_ab_gdf.apply(lambda x: diff_ab_style, axis=1) diff_ba_gdf['style'] = diff_ba_gdf.apply(lambda x: diff_ba_style, axis=1) # Concatenate results return concat([diff_ab_gdf, diff_ba_gdf, intersection_gdf], ignore_index=True, sort=False)
def write_bin_gdf_to_csv(filename, bin_gdf: GeoDataFrame, index: bool = False): bin_gdf["wkt"] = bin_gdf.apply(lambda row: row.geometry.to_wkt(), axis=1) bin_wkt = bin_gdf.drop(["geometry", "SpacemagBin"], axis=1) bin_wkt.to_csv(filename, index=index)
p.circle(x='long', y='lat', size=9, color="color",alpha=0.7,legend_label="BN Jain", source=psource1) p.square(x='long', y='lat', size=7, color="color",alpha=0.7,legend_label="Sumit Darak", source=psource2) p.asterisk(x='long', y='lat', size=8, color="color",alpha=0.7,legend_label="Rajiv Ratn", source=psource3) p.legend.location = "top_left" p.legend.click_policy="hide" outfp = r"C:/Users/Jatin/Desktop/points.html" save(obj=p, filename=outfp) show(p) lat=coordinates[1] long=coordinates[2] geometry = [Point(xy) for xy in zip(long,lat)] gdf = GeoDataFrame(geometry=geometry) gdf['long'] = gdf.apply(getPointCoords, geom='geometry', coord_type='x', axis=1) gdf['lat'] = gdf.apply(getPointCoords, geom='geometry', coord_type='y', axis=1) gdf = gdf.drop('geometry', axis=1).copy() gdf['name']=coordinates[0] gdf['color']="red" gdf['faculty']="Rajiv Ratn" k = 6378137 gdf["long"] = gdf["long"] * (k * np.pi/180.0) gdf["lat"] = np.log(np.tan((90 + gdf["lat"]) * np.pi/360.0)) * k gdf.head() df1=gdf df2=gdf df3=gdf
def main(main_df: gpd.GeoDataFrame, other_df: gpd.GeoDataFrame, how='inner', op='intersects', other_columns=None, final_columns=None, main_df_preprocessor=None, main_row_preprocessor=None, other_df_preprocessor=None, other_row_preprocessor=None, df_postprocessor=None, row_postprocessor=None, agg=None, remove_index_columns=True): """ Matches two dataframes and outputs main_df with fields from matching `other_df` records. If one record of `main_df` matches two or more rows in `other_df`, the row is duplicated as in relational databases. Parameters ========== * `main_df` the dataframe whose objects will be kept in the result * `other_df` the dataframe from which the other attributes will be added to `main_df` * `how`: database-like join, either `'inner'` or `'left'` or `'right'` * `op` geometry operation (`'intersects'`, `'within'`) * `main_df_preprocessor`: function that will take `main_df` before spatial join and return another dataframe that will be used in the join. But output dataframe will contain rows and geometries from the original `main_df`. * `main_row_preprocessor`: function that will be applied to rows of `main_df` before spatial join. Must return Shapely geometry object. This geometries will be used for spatial join, but the original ones will be in the output dataframe. * `other_df_preprocessor`, `other_row_preprocessor` work the same way. * `df_postprocessor` and `row_postprocessor`: work the same way with the result dataframe before aggregation, and get a DF with `geometry` coming from `main_df`, and `geometry_other` coming from `other_df`. `df_postprocessor` should return the new (Geo)DataFrame, whereas `row_postprocessor` should return a shapely.geometry object. After applying postprocessor, `geometry_other` is discarded. * `agg`: (default `None`) is Pandas aggregation object. If it's provided, the result dataframe will be grouped by `main_df` index, and the other columns will be aggregated according to this parameter. Otherwise, rows may be repeated. """ # TODO: change row_postprocessor to return entire rows instead of geometry # saving original dataframes to use them for `merge` from gistalt import subset if remove_index_columns: main_df.drop(['index_main', 'index_other'], axis=1, inplace=True, errors='ignore') other_df.drop(['index_main', 'index_other'], axis=1, inplace=True, errors='ignore') elif (set(main_df) | set(other_df)) & {'index_main', 'index_other'}: raise ValueError( 'DataFrames should not have `index_main` or `index_other` columns, because this function creates them again. Please rename them to avoid collisions and confusion.' ) if how == 'cross': main_df_origin = gpd.GeoDataFrame(main_df.copy().reset_index(), crs=main_df.crs) main_df_origin['__join__'] = 1 other_df_origin = gpd.GeoDataFrame(other_df.copy().reset_index(), crs=other_df.crs) other_df_origin['__join__'] = 1 joined_dfs = main_df_origin.merge(other_df_origin, on='__join__', suffixes=('_main', '_other'))[[ 'index_main', 'index_other' ]] main_df_origin.drop(['__join__', 'index'], axis=1, inplace=True) other_df_origin.drop(['__join__', 'index'], axis=1, inplace=True) else: main_df_origin = main_df.copy() other_df_origin = other_df.copy() # df preprocessor has priority over row preprocessor if both are provided if main_df_preprocessor: main_df = df_crash_wrapper(main_df_preprocessor)(main_df.copy()) elif main_row_preprocessor: tqdm.pandas(desc='Preprocessing main rows') main_df = main_df.copy() main_df['geometry'] = main_df.apply( row_crash_wrapper(main_row_preprocessor), axis=1) if other_df_preprocessor: other_df = df_crash_wrapper(other_df_preprocessor)(other_df.copy()) elif other_row_preprocessor: tqdm.pandas(desc='Preprocessing other rows') other_df = other_df.copy() other_df['geometry'] = other_df.apply( row_crash_wrapper(other_row_preprocessor), axis=1) # making dataframes with geometries only, for `sjoin` main_df_geom = gpd.GeoDataFrame(main_df.copy()[['geometry']], crs=main_df.crs) other_df_geom = gpd.GeoDataFrame(other_df.copy()[['geometry']], crs=other_df.crs) # deciding which way to `sjoin`. Left df should be smaller than the right one. if len(main_df_geom) <= len(other_df_geom): joined_dfs = gpd.sjoin(main_df_geom, other_df_geom, how=how, op=op, lsuffix='main', rsuffix='other') joined_dfs = joined_dfs[[ 'index_other' ]].reset_index().rename(columns={'index': 'index_main'}) else: if how == 'left': how = 'right' elif how == 'right': how = 'left' joined_dfs = gpd.sjoin(other_df_geom, main_df_geom, how=how, op=op, lsuffix='other', rsuffix='main') joined_dfs = joined_dfs[[ 'index_main' ]].reset_index().rename(columns={'index': 'index_other'}) # if other_columns is not specified, we need at least to remove 'geometry' of the other df if other_columns is None: other_columns = ';'.join(set(other_df_origin) - {'geometry'}) result_df = main_df_origin.merge(joined_dfs, right_on='index_main', left_index=True) if other_columns not in (None, ''): other_df_origin_subset = subset(other_df_origin, other_columns) result_df = result_df.merge(other_df_origin_subset, left_on='index_other', right_index=True, suffixes=('', '_other')) if len(result_df) == 0: return gpd.GeoDataFrame(result_df, crs=main_df_origin.crs) if (df_postprocessor or row_postprocessor) and len(result_df) > 0: result_df = result_df.merge( gpd.GeoDataFrame(other_df_origin[[ 'geometry' ]].rename(columns={'geometry': 'geometry_other'}), crs=other_df_origin.crs), left_on='index_other', right_index=True ) # here only the geometry_other column is merged, no need for suffixes if df_postprocessor: result_df = df_crash_wrapper(df_postprocessor)(result_df) else: tqdm.pandas(desc='Applying postprocessor') result_df['geometry'] = result_df.progress_apply( row_crash_wrapper(row_postprocessor), axis=1) if agg: if isinstance(agg, str): agg = json.loads(agg) agg_dict = {k: 'first' for k in list(main_df_origin)} agg_dict.update(agg) # print(list(result_df)) # print(agg_dict) result_df = result_df.groupby(by=['index_main']).agg(agg_dict) if 'geometry' not in other_columns: result_df.drop('geometry_other', errors='ignore', inplace=True, axis=1) return gpd.GeoDataFrame(subset(result_df, final_columns), crs=main_df_origin.crs)
def make_SpacemagBins_from_bin_gdf( bin_gdf: gpd.GeoDataFrame, min_mag: Optional[float] = 6.0, max_mag: Optional[float] = 9.0, bin_width: Optional[float] = 0.2, ) -> gpd.GeoDataFrame: """ Creates a GeoPandas GeoDataFrame with :class:`~openquake.hme.utils.bins.SpacemagBin` that forms the basis of most of the spatial hazard model testing. :param bin_filepath: Path to GIS polygon file that contains the spatial bins for analysis. :param min_mag: Minimum earthquake magnitude for MFD-based analysis. :param max_mag: Maximum earthquake magnitude for MFD-based analysis. :param bin_width: Width of earthquake/MFD bins. :returns: GeoDataFrame with :class:`~openquake.hme.utils.bins.SpacemagBin` as a column. """ def bin_to_mag(row): return SpacemagBin( row.geometry, bin_id=row._name, min_mag=min_mag, max_mag=max_mag, bin_width=bin_width, ) bin_gdf["SpacemagBin"] = bin_gdf.apply(bin_to_mag, axis=1) # create serialization functions and add to instantiated GeoDataFrame def to_dict(): out_dict = { i: bin_gdf.loc[i, "SpacemagBin"].to_dict() for i in bin_gdf.index } return out_dict bin_gdf.to_dict = to_dict def to_json(fp): def to_serializable(val): """ modified from Hynek (https://hynek.me/articles/serialization/) """ if isinstance(val, datetime.datetime): return val.isoformat() + "Z" # elif isinstance(val, enum.Enum): # return val.value elif attr.has(val.__class__): return attr.asdict(val) elif isinstance(val, np.integer): return int(val) elif isinstance(val, Exception): return { "error": val.__class__.__name__, "args": val.args, } return str(val) with open(fp, "w") as ff: json.dump(bin_gdf.to_dict(), ff, default=to_serializable) bin_gdf.to_json = to_json return bin_gdf
def add_earthquakes_to_bins( earthquake_gdf: gpd.GeoDataFrame, bin_df: gpd.GeoDataFrame, category: str = "observed", h3_res: int = 3, ) -> None: """ Takes a GeoPandas GeoDataFrame of observed earthquakes (i.e., an instrumental earthquake catalog) and adds them to the ruptures list that is an attribute of each :class:`SpacemagBin` based on location and magnitude. The spatial binning is performed through a left join via GeoPandas, and should use RTree if available for speed. This function modifies both GeoDataFrames in memory and does not return any value. :param rupture_gdf: GeoDataFrame of ruptures; this should have two columns, one of them being the `rupture` column with the :class:`Rupture` object, and the other being the `geometry` column, with a GeoPandas/Shapely geometry class. :param bin_df: GeoDataFrame of the bins. This should have a `geometry` column with a GeoPandas/Shapely geometry and a `SpacemagBin` column that has a :class:`SpacemagBin` object. :param category: Type of earthquake catalog. Default value is `observed` which is, typically, the catalog used to make the model. If a separate testing catalog is to be considered, then this is added using the `prospective` value. :Returns: `None`. """ earthquake_gdf["Eq"] = earthquake_gdf.apply(_make_earthquake_from_row, axis=1) earthquake_gdf["bin_id"] = [ h3.geo_to_h3(eq.latitude, eq.longitude, h3_res) for eq in earthquake_gdf.Eq ] for i, eq in earthquake_gdf.iterrows(): try: spacemag_bin = bin_df.loc[eq.bin_id, "SpacemagBin"] if eq.magnitude < spacemag_bin.min_mag - spacemag_bin.bin_width / 2: pass elif eq.magnitude > (spacemag_bin.max_mag + spacemag_bin.bin_width / 2): pass else: nearest_bc = _nearest_bin(eq.Eq.magnitude, spacemag_bin.mag_bin_centers) if category == "observed": spacemag_bin.mag_bins[ nearest_bc].observed_earthquakes.append(eq["Eq"]) spacemag_bin.observed_earthquakes[nearest_bc].append( eq["Eq"]) elif category == "prospective": spacemag_bin.mag_bins[ nearest_bc].prospective_earthquakes.append(eq["Eq"]) spacemag_bin.prospective_earthquakes[nearest_bc].append( eq["Eq"]) except: pass
def main(input_reference, data_path): os.environ['OTB_MAX_RAM_HINT'] = '4096' ciop = cioppy.Cioppy() temp_results = [] search_params = dict() for index, entry in enumerate(input_reference['value'].split(',')): temp_results.append( ciop.search( end_point=entry, params=search_params, output_fields= 'identifier,self,wkt,startdate,enddate,enclosure,orbitDirection,cc', model='EOP')[0]) sentinel2_search = GeoDataFrame(temp_results) sentinel2_search['startdate'] = pd.to_datetime( sentinel2_search['startdate']) sentinel2_search['enddate'] = pd.to_datetime(sentinel2_search['enddate']) sentinel2_search['wkt'] = sentinel2_search['wkt'].apply(loads) sentinel2_search = sentinel2_search.merge(sentinel2_search.apply( lambda row: analyse(row, data_path['value']), axis=1), left_index=True, right_index=True) composites = [] bands = ['B12', 'B8A', 'B04'] for index, row in sentinel2_search.iterrows(): # cloud mask logging.info('Cloud mask 20%') mask_prb = get_mask_prob(row) output_name = '{}_CLOUD_MASK_20.tif'.format(row['identifier']) cloud_mask(mask_prb, 20, output_name) cog(output_name) metadata(output_name, 'Cloud mask 20% {}'.format(row['identifier']), row) vrt_bands = [] for j, band in enumerate(bands): vrt_bands.append(get_band_path(row, band)) vrt = '{0}.vrt'.format(row['identifier']) ds = gdal.BuildVRT(vrt, vrt_bands, srcNodata=0, xRes=10, yRes=10, separate=True) ds.FlushCache() tif = '{}_ACTIVE_FIRE_UInt16.tif'.format(row['identifier']) logging.info('Convert {} to UInt16'.format(row['identifier'])) metadata(tif, 'RGB UInt16 Composite {}'.format(row['identifier']), row) gdal.Translate(tif, vrt, outputType=gdal.GDT_UInt16) cog(tif) tif = '{0}.tif'.format(row['identifier']) logging.info('Convert {} to byte'.format(row['identifier'])) gdal.Translate(tif, vrt, outputType=gdal.GDT_Byte, scaleParams=[[0, 10000, 0, 255]]) tif_e = '{}_ACTIVE_FIRE.tif'.format(row['identifier']) contrast_enhancement(tif, tif_e) composites.append(tif_e) os.remove(tif) os.remove(vrt) cog(tif_e) metadata(tif_e, 'RGB Composite {}'.format(row['identifier']), row) vrt = '{0}.vrt'.format(row['identifier']) ds = gdal.BuildVRT(vrt, [get_band_path(row, 'SCL')], separate=True) ds.FlushCache() scl_tif = '{0}_SCL.tif'.format(row['identifier']) metadata(scl_tif, 'Scene Classification {}'.format(row['identifier']), row) gdal.Translate(scl_tif, vrt, xRes=10, yRes=10, outputType=gdal.GDT_Byte, resampleAlg=gdal.GRA_Mode) cog(scl_tif) bands = ['B12'] #resampleAlg=gdal.GRA_Mode, for index, row in sentinel2_search.iterrows(): vrt_bands = [] for j, band in enumerate(bands): vrt_bands.append(get_band_path(row, band)) vrt = '{0}.vrt'.format(row['identifier']) ds = gdal.BuildVRT(vrt, vrt_bands, srcNodata=0, xRes=10, yRes=10, separate=True) ds.FlushCache() tif = '{0}.tif'.format(row['identifier']) gdal.Translate(tif, vrt, outputType=gdal.GDT_UInt16) hot_spot_name = '{}_HOT_SPOT.tif'.format(row['identifier']) metadata(hot_spot_name, 'Hot spot {}'.format(row['identifier']), row) logging.info('Hot spot detection for {}'.format(row['identifier'])) hot_spot(tif, scl_tif, hot_spot_name) cog(hot_spot_name) logging.info('Vectorize detected hot spots in {}'.format( row['identifier'])) results_gdf = polygonize(hot_spot_name, row['startdate'], row['identifier']) results_gdf.to_file('{}_HOT_SPOT_VECTOR.geojson'.format( row['identifier']), driver='GeoJSON') metadata('{}_HOT_SPOT_VECTOR.geojson'.format(row['identifier']), 'Hot spot vector {}'.format(row['identifier']), row) os.remove(tif) os.remove(vrt)
def heatmap(map: GeoDataFrame, calibration_data: GeoDataFrame, windfield: Windfield, title=None, plot_calibration_data=False, mask=None, max_wind=10, colormap='Blues'): """ Plots the wind speeds as a heatmap. Parameters ---------- map: GeoDataFrame The map onto which the wind field is plotted calibration_data: GeoDataFrame The original observation data windfield: Windfield The windfield to plot title: string, optional The title of the plot, default is None plot_calibration_data: boolean, optional Whether to plot the calibration data as arrows, default is False mask: GeoSeries (or GeoDataFrame), optional If provided, only points within this geometrical shape will be displayed on the heatmap, default is None. max_wind: int, optional The wind speed that will be drawns as the maximum color value, anything above this will also be given the maxium color value. Default is 10. colormap: string, optional The color map to use, see https://matplotlib.org/examples/color/colormaps_reference.html Default is 'Blues' """ # Setup fig, ax, x0, x1, xn, y0, y1, yn = setup_plot(map=map, title=title) scale = max(x1 - x0, y1 - y0) / 150 arrow_head_width = max(x1 - x0, y1 - y0) / 100 # Plot background map as black contour map.plot(ax=ax, facecolor='none', edgecolor='black', linewidth=1) # Plot calibration data as red arrows # if (plot_calibration_data): # calibration_data.apply(lambda row: plot_wind_vector(ax=ax, vector=row.geometry, speed=row.speed, scale=scale, arrow_head_width=arrow_head_width), axis=1) # Plot calibration data as red arrows if (plot_calibration_data): calibration_data.apply( lambda row: plot_wind_vector(ax=ax, x=row.x, y=row.y, u=row.u, v=row.v, scale=scale, arrow_head_width=arrow_head_width), axis=1) # Plot wind speed as a heat map Y, X = np.mgrid[y1:y0:yn * 1j, x0:x1:xn * 1j] # Z = np.zeros_like(X) # np.shape(X) (m, n) = np.shape(X) X = np.reshape(X, (m * n, )) Y = np.reshape(Y, (m * n, )) wind = windfield.predict(pd.Series(X), pd.Series(Y)) speed = np.sqrt(wind.u**2 + wind.v**2) speed = np.reshape(speed.values, (m, n)) if mask is not None: zero_speed = np.zeros((m, n)) points = [Point(x, y) for (x, y) in zip(X, Y)] f_points = [mask.contains(p).any() for p in points] f_points = np.reshape(f_points, (m, n)) speed = np.where(f_points, speed, zero_speed) # for i in range(xn): # for j in range(yn): # x = X[j, i] # y = Y[j, i] # wind = windfield.get_wind(x, y) # # if (mask is None): # Z[j, i] = wind.speed # else: # xp = x + (x1 - x0) / xn # yp = y + (y1 - y0) / yn # if (mask.contains(Point(x, y)).any() or mask.contains(Point(xp, y)).any() or mask.contains(Point(x, yp)).any() or mask.contains(Point(xp, yp)).any()): # Z[j, i] = wind.speed # else: # Z[j, i] = 0 plt.imshow(speed, cmap=colormap, interpolation='bilinear', origin='upper', extent=[x0, x1, y0, y1], vmin=0, vmax=max_wind)
def compute_toc_tiers_from_metro_rail( stations: geopandas.GeoDataFrame, toc_buses: geopandas.GeoDataFrame, clip: geopandas.GeoDataFrame, cushion: float = DEFAULT_CUSHION, ) -> geopandas.GeoDataFrame: """ Compute TOC tiers for metro rail stations. Parameters ========== stations: geopandas.GeoDataFrame The stations list for Metro Rail. toc_buses: geopandas.GeoDataFrame The list of bus lines that satisfies TOC. clip: geopandas.GeoDataFrame Clip the resulting geodataframe by this (probably the City of LA). """ # Project into feet for the purpose of drawing buffers. stations_feet = stations.to_crs(f"EPSG:{SOCAL_FEET}") # Find the stations that are the same, but for some reason given # different lines, put the intersecting line in a new column. station_intersections = geopandas.sjoin( stations_feet, stations_feet.set_geometry(stations_feet.buffer(660.0))[[ "geometry", "LINE", "LINENUM" ]].rename(columns={ "LINE": "intersecting_route_name", "LINENUM": "intersecting_route", }), how="left", op="within", ) # Also grab the intersections that are explicitly marked as such. station_intersections2 = ( stations_feet[stations_feet.LINENUM2 != 0].rename( columns={ "LINENUM2": "intersecting_route" }).merge( stations_feet.set_index("LINENUM")[[ "LINE" ]].rename(columns={"LINE": "intersecting_route_name"}), how="left", left_on="intersecting_route", right_index=True, )) # Concatenate the two means of finding intersecting routes. station_intersections = pandas.concat( [station_intersections, station_intersections2], axis=0, sort=False) # Filter self intersections. station_intersections = ( station_intersections[station_intersections.index_right != station_intersections.index].dropna( subset=["index_right"]).drop( columns=["index_right"]).drop_duplicates( subset=["TPIS_NAME"])) # Find all of the buses that are rapid buses, and determine # where their routes intersect with the stations. toc_rapid_buses = toc_buses[toc_buses.apply( lambda x: is_rapid_bus(x.agency, x.route_short_name), axis=1, )] rapid_bus_intersections = (geopandas.sjoin( stations_feet.assign( buffered=stations_feet.buffer(660.0)).set_geometry("buffered"), toc_rapid_buses.to_crs(f"EPSG:{SOCAL_FEET}")[[ "geometry", "route_short_name", "agency" ]], how="left", op="intersects", ).rename( columns={ "index_right": "intersecting_route", "route_short_name": "intersecting_route_name", "agency": "intersecting_route_agency", }).set_geometry("geometry").drop(columns=["buffered"])) # Concatenate the station intersections and the rapid bus intersections. stations = pandas.concat( [station_intersections, rapid_bus_intersections], axis=0, sort=False, ).rename(columns={ "LINE": "line", "LINENUM": "line_id", "STATION": "station" })[[ "line", "line_id", "station", "geometry", "intersecting_route", "intersecting_route_name", "intersecting_route_agency", ]] # Determine tier 3 and tier 4 TOC zones. def assign_tiers_to_rail_stations(row): tier_2 = shapely.geometry.GeometryCollection() tier_1 = shapely.geometry.GeometryCollection() if not pandas.isna(row.intersecting_route): tier_4 = row.geometry.buffer(750.0 * cushion) tier_3 = row.geometry.buffer(2640.0 * cushion) else: tier_4 = shapely.geometry.GeometryCollection() tier_3 = row.geometry.buffer(2640.0 * cushion) return pandas.Series({ "tier_1": tier_1, "tier_2": tier_2, "tier_3": tier_3, "tier_4": tier_4 }) station_toc_tiers = pandas.concat( [stations, stations.apply(assign_tiers_to_rail_stations, axis=1)], axis=1, ) # Reproject back into WGS 84 station_toc_tiers = station_toc_tiers.assign( tier_1=geopandas.GeoSeries( station_toc_tiers.tier_1, crs=f"EPSG:{SOCAL_FEET}").to_crs(f"EPSG:{WGS84}"), tier_2=geopandas.GeoSeries( station_toc_tiers.tier_2, crs=f"EPSG:{SOCAL_FEET}").to_crs(f"EPSG:{WGS84}"), tier_3=geopandas.GeoSeries( station_toc_tiers.tier_3, crs=f"EPSG:{SOCAL_FEET}").to_crs(f"EPSG:{WGS84}"), tier_4=geopandas.GeoSeries( station_toc_tiers.tier_4, crs=f"EPSG:{SOCAL_FEET}").to_crs(f"EPSG:{WGS84}"), ).to_crs(f"EPSG:{WGS84}") # Drop all stations that don't intersect the City of LA and return. station_toc_tiers["mode"] = "metro" return station_toc_tiers[station_toc_tiers.set_geometry( "tier_3").intersects(clip.iloc[0].geometry)]