df['lat'], c=df[indicator], s=10, cmap=cmap, norm=norm, zorder=1) jana = "DEF" if TARGET_LETTERS[location] in "ABC": jana = "ABC" jana_stops = { loc: stop for loc, stop in stops.items() if TARGET_LETTERS[loc] in jana } lats = [ gtfs.get_stop_coordinates(stop)[0] for loc, stop in jana_stops.items() ] lons = [ gtfs.get_stop_coordinates(stop)[1] for loc, stop in jana_stops.items() ] ax.plot(lons, lats, linewidth=2, c="black", zorder=2) for location2, stop2 in jana_stops.items(): lat, lon = gtfs.get_stop_coordinates(stop2) ax.scatter(lon, lat, s=50, c="black", zorder=3) if not stop2 == stop: ax.scatter(lon, lat, s=30, c="white", zorder=4) if stop2 == stop: ax.scatter(lon, lat, s=50, c="black", zorder=5)
class RouteMapMaker: def __init__(self, gtfs_name): if isinstance(gtfs_name, str): self.gtfs = GTFS(FEED_DICT[gtfs_name]["gtfs_dir"]) else: self.gtfs = gtfs_name self.bunching_value = 99 self.line_spacing = 0.0001 self.shapes = False self.crs_wgs = {'init': 'epsg:4326'} #self.crs_eurefin = {'init': 'epsg:3067'} def cluster_shapes(self): """ :return: """ # get unique stop-to-stop shapes, with trips aggregated # split by nearby stops # match splitted, and aggregate trips # identify branches: large overlap but crosses buffer, insert pseudo stop at branch # split everything again # match splitted #this query returns shapes for of the maximum trips, both directions df = self.gtfs.execute_custom_query_pandas("""WITH a AS ( SELECT routes.name AS name, shape_id, route_I, trip_I, routes.type, direction_id, max(end_time_ds-start_time_ds) AS trip_duration, count(*) AS n_trips FROM trips LEFT JOIN routes USING(route_I) WHERE start_time_ds >= 7*3600 AND start_time_ds < 8*3600 GROUP BY routes.route_I, direction_id ), b AS( SELECT q1.trip_I AS trip_I, q1.stop_I AS from_stop_I, q2.stop_I AS to_stop_I, q1.seq AS seq, q1.shape_break AS from_shape_break, q2.shape_break AS to_shape_break FROM (SELECT stop_I, trip_I, shape_break, seq FROM stop_times) q1, (SELECT stop_I, trip_I, shape_break, seq AS seq FROM stop_times) q2 WHERE q1.seq=q2.seq-1 AND q1.trip_I=q2.trip_I AND q1.trip_I IN (SELECT trip_I FROM a) ), c AS( SELECT b.*, name, direction_id, route_I, a.shape_id, group_concat(lat) AS lats, group_concat(lon) AS lons, count(*) AS n_coords FROM b, a, shapes WHERE b.trip_I = a.trip_I AND shapes.shape_id=a.shape_id AND b.from_shape_break <= shapes.seq AND b.to_shape_break >= shapes.seq GROUP BY route_I, direction_id, b.seq ORDER BY route_I, b.seq ) SELECT from_stop_I, to_stop_I, group_concat(trip_I) AS trip_ids, group_concat(direction_id) AS direction_ids, lats, lons FROM c WHERE n_coords > 1 GROUP BY from_stop_I, to_stop_I ORDER BY count(*) DESC""") df["geometry"] = df.apply(lambda row: shapely.LineString([ (float(lon), float(lat)) for lon, lat in zip(row["lons"].split(","), row["lats"].split(",")) ]), axis=1) gdf = GeoDataFrame(df, crs=self.crs_wgs, geometry=df["geometry"]) #gdf = gdf.to_crs(self.crs_eurefin) gdf = gdf.to_crs(self.crs_wgs) gdf = gdf.drop(["lats", "lons"], axis=1) stops_set = set(gdf["from_stop_I"]) | set(gdf["to_stop_I"]) gdf["orig_parent_stops"] = list( zip(gdf['from_stop_I'], gdf['to_stop_I'])) clustered_stops = self.cluster_stops(stops_set) cluster_dict = clustered_stops[[ "new_stop_I", "stop_I", "geometry" ]].set_index('stop_I').T.to_dict('list') geom_dict = clustered_stops[[ "new_stop_I", "geometry" ]].set_index("new_stop_I").T.to_dict('list') gdf["to_stop_I"] = gdf.apply( lambda row: cluster_dict[row["to_stop_I"]][0], axis=1) gdf["from_stop_I"] = gdf.apply( lambda row: cluster_dict[row["from_stop_I"]][0], axis=1) # to/from_stop_I: cluster id # orig_parent_stops: old id # child_stop_I: cluster id splitted_gdf = self.split_shapes_by_nearby_stops(clustered_stops, gdf) splitted_gdf['child_stop_I'] = splitted_gdf.apply( lambda row: ",".join([str(int(x)) for x in row.child_stop_I]), axis=1) splitted_gdf_grouped = splitted_gdf.groupby(['child_stop_I']) splitted_gdf_grouped = splitted_gdf_grouped.agg( { 'orig_parent_stops': lambda x: tuple(x), 'geometry': lambda x: x.iloc[0] }, axis=1) splitted_gdf = splitted_gdf_grouped.reset_index() splitted_gdf['value'] = splitted_gdf.apply(lambda row: 1, axis=1) #splitted_gdf = splitted_gdf.set_geometry(splitted_gdf["geometry"], crs=self.crs_eurefin) splitted_gdf = self.match_shapes(splitted_gdf) splitted_gdf["rand"] = np.random.randint(1, 10, splitted_gdf.shape[0]) print(splitted_gdf) self.plot_geopandas(splitted_gdf, alpha=0.3) def split_shapes_by_nearby_stops(self, stops, shapes, buffer=0.01): """ Splits shapes by stops, within buffer :param stops: GeoDataFrame :param shapes: :return: """ # stops within buffer # splitter # retain the "parent" stop section #stops['geometry'] = stops.apply(lambda row: str(row.geometry), axis=1) #stops = stops.groupby(['new_stop_I', 'geometry'])['stop_I'].apply(list).reset_index() #stops["geometry"] = stops.apply(lambda row: loads(row.geometry), axis=1) stops_grouped = stops.groupby(['new_stop_I']) stops_grouped = stops_grouped.agg( { 'stop_I': lambda x: tuple(x), 'geometry': lambda x: x.iloc[0] }, axis=1) stops = stops_grouped.reset_index() #stops = stops.set_geometry(stops["geometry"], crs=self.crs_eurefin) stops["point_geom"] = stops["geometry"] shapes["buffer"] = shapes["geometry"].buffer(buffer) shapes["line_geom"] = shapes["geometry"] shapes = shapes.set_geometry(shapes["buffer"]) gdf_joined = sjoin(shapes, stops, how="left", op='intersects') gdf_joined = gdf_joined.set_geometry(gdf_joined["line_geom"]) gdf_joined = gdf_joined.drop(["buffer", "line_geom"], axis=1) #gdf_joined['geometry'] = gdf_joined.apply(lambda row: str(row.geometry), axis=1) gdf_grouped = gdf_joined.groupby( ["orig_parent_stops", 'from_stop_I', 'to_stop_I']) gdf_grouped = gdf_grouped.agg( { 'point_geom': lambda x: tuple(x), 'new_stop_I': lambda x: tuple(x), 'geometry': lambda x: x.iloc[0] }, axis=1) gdf_joined = gdf_grouped.reset_index() gdf_joined = gdf_joined.apply( lambda row: self.split_shape_by_points(row), axis=1) new_list = [] for row in gdf_joined.to_dict('records'): for shape, stop_tuple in zip(row['shape_parts'], row['child_stop_Is']): new_row = copy.deepcopy(row) new_row["shape_part"] = shape new_row["child_stop_I"] = stop_tuple new_list.append(new_row) gdf_joined = pd.DataFrame(new_list) gdf_joined = gdf_joined.set_geometry(gdf_joined["shape_part"]) return gdf_joined[['child_stop_I', 'orig_parent_stops', 'geometry']] def check_shape_orientation(self, shape, from_stop_point, to_stop_point): """ Checks that the shape goes from the from stop to the to stop and not the opposite direction :param shape: :param from_stop_point: :param to_stop_point: :return: """ # def split_shape_by_points(self, shape, shape_parents, points, point_ids): def split_shape_by_points(self, row): """ :param shape: :param shape_parents: :param points: :param point_ids: :return: """ shape = row["geometry"] shape_parents = [row["from_stop_I"], row["to_stop_I"]] points = row["point_geom"] point_ids = row["new_stop_I"] # TODO: change this to also output the cluster point ids for the end points so that matching is possible directly if not isinstance(points[0], shapely.Point): row["shape_parts"] = [shape] row["child_stop_Is"] = [shape_parents] return row # finds the distance on the shape that corresponds to the closest distance to the point distance_dict = { shape.project(point): { "point": point, "id": id } for point, id in zip(points, point_ids) } shape_parts = [] stop_sections = [] rest_of_shape = copy.deepcopy(shape) previous_stop = shape_parents[0] # loops trough the points in the order they are compared to the shape if len(distance_dict) >= 3: for key in sorted(distance_dict)[1:-1]: if distance_dict[key]["id"] not in shape_parents: new_point = shape.interpolate(key) # TODO: this step only works with a modified version of split(), replace with a custom function geometries = split(rest_of_shape, new_point) stop_sections.append( (int(previous_stop), int(distance_dict[key]["id"]))) previous_stop = distance_dict[key]["id"] if len(geometries) == 2: rest_of_shape = geometries[1] shape_parts.append(geometries[0]) else: rest_of_shape = geometries[0] shape_parts.append(rest_of_shape) stop_sections.append((previous_stop, shape_parents[1])) #if len(shape_parts) > 1: # assert not all(x == shape_parts[0] for x in shape_parts) #shape_parts = row["new_stop_I"] #stop_sections = row["new_stop_I"] row["shape_parts"] = shape_parts row["child_stop_Is"] = stop_sections return row # return (shape_parts, stop_sections) def match_shapes(self, shapes, buffer=0.01): """ checks if shapes are completely within each others buffers, aggregates routes for these :return: """ # buffer for spatial self join first_points = shapes["geometry"].apply(lambda x: Point(x.coords[0])) last_points = shapes["geometry"].apply(lambda x: Point(x.coords[-1])) points = pd.concat([first_points, last_points]) point_df = points.to_frame(name='geometry') #point_df = point_df.set_geometry(point_df["geometry"], crs=self.crs_eurefin) point_df = point_df.set_geometry(point_df["geometry"], crs=self.crs_wgs) #buffer = point_df.buffer(30) #buffer = GeoDataFrame(crs=self.crs_eurefin, geometry=point_df.buffer(buffer)) buffer = GeoDataFrame(crs=self.crs_wgs, geometry=point_df.buffer(buffer)) buffer["everything"] = 1 gdf_poly = buffer.dissolve(by="everything") polygons = None for geoms in gdf_poly["geometry"]: polygons = [polygon for polygon in geoms] #single_parts = GeoDataFrame(crs=self.crs_eurefin, geometry=polygons) single_parts = GeoDataFrame(crs=self.crs_wgs, geometry=polygons) single_parts['new_stop_I'] = single_parts.index gdf_joined = sjoin(shapes, single_parts, how="left", op='within') return gdf_joined def identify_branches(self, shapes, buffer=0.01): """ Checks for other shapes that exits the buffer of another buffer. In these cases a pseudo stop is created, for further splitting of shapes :param shapes: :param buffer: :return: """ def get_linestrings_for_stop_section(self, stop_tuple, trip_id, from_shape_brake, to_shape_brake): try: assert self.shapes shapedict = get_shape_between_stops( self.gtfs.conn.cursor(), trip_id, stop_tuple[0], stop_tuple[1], (from_shape_brake, to_shape_brake)) assert not len(set(shapedict["lat"])) <= 1 assert not len(set(shapedict["lon"])) <= 1 return shapely.LineString([ (lon, lat) for lat, lon in zip(shapedict["lat"], shapedict["lon"]) ]) except (ValueError, AssertionError): lat0, lon0 = self.gtfs.get_stop_coordinates(stop_tuple[0]) lat1, lon1 = self.gtfs.get_stop_coordinates(stop_tuple[1]) if lat0 == lat1 and lon0 == lon1: return else: return shapely.LineString([(lon0, lat0), (lon1, lat1)]) def route_parallels(self, line, route, all_routes, bunching_value=5, line_spacing=0.0001): n_parallels = len(all_routes) line_routes = [] if not line: return if n_parallels < bunching_value: offsets = np.linspace(-1 * ((n_parallels - 1) * line_spacing) / 2, ((n_parallels - 1) * line_spacing) / 2, n_parallels) try: return line.parallel_offset( abs(offsets[all_routes.index(route)]), "left" if offsets[all_routes.index(route)] < 0 else "right") except: print(line, offsets[all_routes.index(route)]) else: return line def get_route_ranking(self, df): route_order_for_stop_sections = {} stop_section_shapes = {} for row in df.itertuples(): section_tuple = (row.from_stop_I, row.to_stop_I) alt_section_tuple = (row.to_stop_I, row.from_stop_I) if not section_tuple in route_order_for_stop_sections and not alt_section_tuple in route_order_for_stop_sections: route_order_for_stop_sections[section_tuple] = [row.route_I] stop_section_shapes[section_tuple] = (row.trip_I, row.from_shape_break, row.to_shape_break) elif section_tuple in route_order_for_stop_sections: route_order_for_stop_sections[section_tuple].append( row.route_I) elif alt_section_tuple in route_order_for_stop_sections: route_order_for_stop_sections[alt_section_tuple].insert( 0, row.route_I) return route_order_for_stop_sections, stop_section_shapes def get_geometry(self, stop_tuple, route, all_routes, cluster_dict): #line = get_linestrings_for_stop_section(stop_tuple, trip_id, from_shape_break, to_shape_break) #print(stop_tuple, cluster_dict[stop_tuple[0]], cluster_dict[stop_tuple[1]]) line = shapely.LineString( [cluster_dict[stop_tuple[0]][0], cluster_dict[stop_tuple[1]][0]]) if stop_tuple[0] == stop_tuple[1]: return else: return self.route_parallels(line, route, all_routes, bunching_value=self.bunching_value, line_spacing=self.line_spacing) def cluster_stops(self, stops_set, distance=100): """ merges stops that are within distance together into one stop :param stops_set: iterable that lists stop_I's :param distance: int, distance to merge, meters :return: """ df = self.gtfs.execute_custom_query_pandas( """SELECT * FROM stops WHERE stop_I IN ({stops_set})""" .format(stops_set=",".join([str(x) for x in stops_set]))) df["geometry"] = df.apply(lambda row: Point((row["lon"], row["lat"])), axis=1) gdf = GeoDataFrame(df, crs=self.crs_wgs, geometry=df["geometry"]) gdf = gdf.to_crs(self.crs_eurefin) gdf_poly = gdf.copy() gdf_poly["geometry"] = gdf_poly["geometry"].buffer(distance / 2) gdf_poly["everything"] = 1 gdf_poly = gdf_poly.dissolve(by="everything") polygons = None for geoms in gdf_poly["geometry"]: polygons = [polygon for polygon in geoms] single_parts = GeoDataFrame(crs=self.crs_eurefin, geometry=polygons) single_parts['new_stop_I'] = single_parts.index gdf_joined = sjoin(gdf, single_parts, how="left", op='within') single_parts["geometry"] = single_parts.centroid gdf_joined = gdf_joined.drop('geometry', 1) centroid_stops = single_parts.merge(gdf_joined, on="new_stop_I") return centroid_stops """ change projection for accurate buffer distance merge polygons, select single parts calculate centroids """ def plot_geopandas(self, gdf, **kwargs): fig, ax = plt.subplots() gdf.plot(column="rand", **kwargs) plt.show()