def parse_od_settings(path_travel_mode: str, path_routing_mode: str, routing_conf: RoutingConf, orig_lat, orig_lon, dest_lat, dest_lon, aqi_updater: Union[GraphAqiUpdater, None]) -> OdSettings: try: travel_mode = TravelMode(path_travel_mode) except Exception: raise RoutingException(ErrorKey.INVALID_TRAVEL_MODE_PARAM.value) if path_routing_mode == 'short': # retain support for legacy path variable 'short' routing_mode = RoutingMode.FAST else: try: routing_mode = RoutingMode(path_routing_mode) except Exception: raise RoutingException(ErrorKey.INVALID_ROUTING_MODE_PARAM.value) if travel_mode == TravelMode.BIKE and not conf.cycling_enabled: raise RoutingException(ErrorKey.BIKE_ROUTING_NOT_AVAILABLE.value) if travel_mode == TravelMode.WALK and not conf.walking_enabled: raise RoutingException(ErrorKey.WALK_ROUTING_NOT_AVAILABLE.value) if routing_mode == RoutingMode.GREEN and not conf.gvi_paths_enabled: raise RoutingException(ErrorKey.GREEN_PATH_ROUTING_NOT_AVAILABLE.value) if routing_mode == RoutingMode.QUIET and not conf.quiet_paths_enabled: raise RoutingException(ErrorKey.QUIET_PATH_ROUTING_NOT_AVAILABLE.value) if routing_mode == RoutingMode.CLEAN: if not conf.clean_paths_enabled: raise RoutingException( ErrorKey.CLEAN_PATH_ROUTING_NOT_AVAILABLE.value) if not aqi_updater or not aqi_updater.get_aqi_update_status_response( )['aqi_data_updated']: raise RoutingException(ErrorKey.NO_REAL_TIME_AQI_AVAILABLE.value) if travel_mode == TravelMode.WALK and routing_mode == RoutingMode.SAFE: raise RoutingException( ErrorKey.SAFE_PATHS_ONLY_AVAILABLE_FOR_BIKE.value) orig_latLon = {'lat': float(orig_lat), 'lon': float(orig_lon)} dest_latLon = {'lat': float(dest_lat), 'lon': float(dest_lon)} orig_point = geom_utils.project_geom( geom_utils.get_point_from_lat_lon(orig_latLon)) dest_point = geom_utils.project_geom( geom_utils.get_point_from_lat_lon(dest_latLon)) sens = routing_conf.sensitivities_by_routing_mode[routing_mode] return OdSettings(orig_point, dest_point, travel_mode, routing_mode, sens)
def get_nodata_zones(wfs_hsy_url: str, layer: str, hma_mask: str, export_gpkg: str): """1) Downloads polygon layer of municipalities of Helsinki Metropolitan Area, 2) Creates buffered polygons from the boundary lines of these polygons, 3) Exports the boundary-buffers to geopackage. """ mask_poly: Polygon = geom_utils.project_geom( gpd.read_file(hma_mask)['geometry'][0]).buffer(500) municipalities = get_wfs_feature(wfs_hsy_url, layer) municipalities.to_file(export_gpkg, layer='hma_municipalities', driver='GPKG') boundaries = [] for municipality in municipalities.itertuples(): for poly in municipality.geometry.geoms: poly = municipality.geometry boundaries.append(poly.boundary.buffer(22)) dissolved_buffer: Polygon = unary_union(boundaries) intersected_buffer = dissolved_buffer.intersection(mask_poly) boundary_gdf = gpd.GeoDataFrame(data=[{ 'nodata_zone': 1 }], geometry=[intersected_buffer], crs=CRS.from_epsg(3879)) boundary_gdf.to_file(export_gpkg, layer='municipal_boundaries', driver='GPKG')
def test_path_set_1_fastest_path_geom(path_set_1): _, path_fc = path_set_1 s_path = [feat for feat in path_fc['features'] if feat['properties']['type'] == 'fast'][0] geom = s_path['geometry'] line = LineString(geom['coordinates']) proj_line = project_geom(line) assert round(proj_line.length, 2) == 1340.0
def test_data_for_linking_edges_has_right_values( new_nearest_node: OdNodeData, new_linking_edge_data: List[dict], ): assert len(new_linking_edge_data) == 2 link_edge = new_linking_edge_data[0] assert len(new_nearest_node.link_to_edge_spec.edge) == 28 edge = new_nearest_node.link_to_edge_spec.edge link_edge_len_ratio = link_edge[E.length.value] / edge[E.length.value] assert round(link_edge_len_ratio, 2) == 0.1 assert link_edge[E.length.value] == round( link_edge[E.geometry.value].length, 2) assert round( geom_utils.project_geom(link_edge[E.geom_wgs.value]).length, 2) == link_edge[E.length.value] assert link_edge[E.length.value] == round( link_edge_len_ratio * edge[E.length.value], 2) assert link_edge[E.bike_time_cost.value] == round( link_edge_len_ratio * edge[E.bike_time_cost.value], 2) assert link_edge[E.bike_safety_cost.value] == round( link_edge_len_ratio * edge[E.bike_safety_cost.value], 2) link_edge_total_noise_exp = sum(link_edge[E.noises.value].values()) assert round(link_edge_total_noise_exp, 2) == link_edge[E.length.value] for key in new_nearest_node.link_to_edge_spec.edge.keys(): if key.startswith('c_') and not key.startswith('c_aq'): assert round(link_edge[key]) == round(link_edge_len_ratio * edge[key])
def test_path_set_1_clean_path_geom(path_set_1): _, path_fc = path_set_1 c_path = [feat for feat in path_fc['features'] if feat['properties']['id'] == path_id][0] geom = c_path['geometry'] line = LineString(geom['coordinates']) proj_line = project_geom(line) assert round(proj_line.length, 2) == 1372.8
def edge_attrs_near_point(lat, lon): point = geom_utils.project_geom( geom_utils.get_point_from_lat_lon({ 'lat': float(lat), 'lon': float(lon) })) edge = G.find_nearest_edge(point) return jsonify( G.format_edge_dict_for_debugging(edge.attrs) if edge else None)
def test_func(geometry: dict): assert geometry['type'] == 'LineString' coords = geometry['coordinates'] assert isinstance(coords, list) line = LineString(coords) assert isinstance(line, LineString) assert line.is_valid # test that the length is a positive number proj_line = project_geom(line) assert isinstance(proj_line.length, (float, int)) assert proj_line.length >= 0.1
def test_noise_graph_update(self): in_graph_file = 'data/test_graph.graphml' out_graph_file = 'temp/test_graph_noises.graphml' data_extent_file = 'data/HMA.geojson' noise_csv_dir = 'noise_csv/' data_extent: Polygon = geom_utils.project_geom( gpd.read_file(data_extent_file)['geometry'][0]) graph = ig_utils.read_graphml(in_graph_file, log) noise_graph_update.set_default_and_na_edge_noises( graph, data_extent, log) noise_graph_update.noise_graph_update(graph, noise_csv_dir, log) ig_utils.export_to_graphml(graph, out_graph_file) graph = ig_utils.read_graphml(out_graph_file) self.assertEqual(graph.ecount(), 3702) for edge in graph.es: attrs = edge.attributes() # check that edge IDs are correct self.assertEqual(edge.index, attrs[E.id_ig.value]) if isinstance(attrs[E.geometry.value], LineString): # note: this will fail if some of the edges are outside the noise data extent self.assertNotEqual(edge[E.noises.value], None) self.assertIsInstance(edge[E.noises.value], dict) self.assertNotEqual(edge[E.noise_source.value], None) self.assertIsInstance(edge[E.noise_source.value], str) else: # for edges without geometry the noise attributes should be nodata self.assertEqual(edge[E.noises.value], None) self.assertEqual(edge[E.noise_source.value], None) # if edge noises are nodata then also noise source must be nodata if edge[E.noises.value] == None: self.assertEqual(edge[E.noise_source.value], None) # if edge noises are not nodata but {} then noise source must also be just '' (not nodata) if edge[E.noises.value] == {}: self.assertEqual(edge[E.noise_source.value], '') # if edge has noises it must also have noise source if edge[E.noises.value]: self.assertNotEqual(edge[E.noise_source.value], '') self.assertNotEqual(edge[E.noise_source.value], None) # if edge has noise source it must have also noises if edge[E.noise_source.value]: self.assertNotEqual(edge[E.noises.value], '') self.assertNotEqual(edge[E.noises.value], None)
def test_path_set_1_quiet_path_geom(path_set_1): data = path_set_1 path_fc = data['path_FC'] q_path = [ feat for feat in path_fc['features'] if feat['properties']['id'] == path_id ][0] geom = q_path['geometry'] line = LineString(geom['coordinates']) proj_line = project_geom(line) assert round(proj_line.length, 2) == 1475.14
def main(conf: GraphNoiseJoinConf): data_extent: Polygon = geom_utils.project_geom( gpd.read_file(conf.noise_data_extent_fp)['geometry'][0]) graph = ig_utils.read_graphml(conf.graph_in_fp, log) set_default_and_na_edge_noises(graph, data_extent) noise_graph_update(graph, conf.noise_data_csv_dir) ig_utils.export_to_graphml(graph, conf.graph_out_fp) log.info(f'exported graph of {graph.ecount()} edges') log.info('all done')
def test_updates_noises_from_csv_to_graph(): in_graph_file = f'{base_dir}/data/test_graph.graphml' out_graph_file = f'{base_dir}/temp/test_graph_noises.graphml' data_extent_file = f'{base_dir}/data/HMA.geojson' noise_csv_dir = f'{base_dir}/noise_csv/' data_extent: Polygon = geom_utils.project_geom(gpd.read_file(data_extent_file)['geometry'][0]) graph = ig_utils.read_graphml(in_graph_file) noise_graph_update.set_default_and_na_edge_noises(graph, data_extent) noise_graph_update.noise_graph_update(graph, noise_csv_dir) ig_utils.export_to_graphml(graph, out_graph_file) graph = ig_utils.read_graphml(out_graph_file) assert graph.ecount() == 3702 for edge in graph.es: attrs = edge.attributes() # check that edge IDs are correct assert edge.index == attrs[E.id_ig.value] if isinstance(attrs[E.geometry.value], LineString): # note: this will fail if some of the edges are outside the noise data extent assert edge[E.noises.value] is not None assert isinstance(edge[E.noises.value], dict) assert edge[E.noise_source.value] is not None assert isinstance(edge[E.noise_source.value], str) else: # for edges without geometry the noise attributes should be nodata assert edge[E.noises.value] is None assert edge[E.noise_source.value] is None # if edge noises are nodata then also noise source must be nodata if edge[E.noises.value] is None: assert edge[E.noise_source.value] is None # if edge noises are not nodata but {} then noise source must also be just '' (not nodata) if edge[E.noises.value] == {}: assert edge[E.noise_source.value] == '' # if edge has noises it must also have noise source if edge[E.noises.value]: assert edge[E.noise_source.value] != '' assert edge[E.noise_source.value] is not None # if edge has noise source it must have also noises if edge[E.noise_source.value]: assert edge[E.noises.value] != '' assert edge[E.noises.value] is not None
def test_routes_quiet_paths_on_same_street(quiet_paths_on_one_street, test_line_geometry, test_fast_path_prop_types, test_edge_props): """Tests that if origin and destination are on the same street, the resultsets are still as expected.""" edge_fc, path_fc = quiet_paths_on_one_street # edges assert len(edge_fc['features']) == 1 test_line_geometry(edge_fc['features'][0]['geometry']) test_edge_props(edge_fc['features'][0]['properties']) # paths assert len(path_fc['features']) == 1 test_line_geometry(path_fc['features'][0]['geometry']) test_fast_path_prop_types(path_fc['features'][0]['properties']) # geom length coords = path_fc['features'][0]['geometry']['coordinates'] line = LineString(coords) line_proj = project_geom(line) assert round(line_proj.length, 2) == 82.73
def get_link_edge_data(new_node_id: int, link_to_edge_spec: LinkToEdgeSpec, create_inbound_links: bool, create_outbound_links: bool) -> Tuple[dict]: """ Returns complete edge attribute dictionaries for new linking edges. Args: new_node_id: An identifier of the new node. split_point: A point geometry of the new node (on an existing edge). edge: All attributes of the edge on which the new node was created. create_inbound_links: A boolean variable indicating whether links should be inbound. create_outbound_links: A boolean variable indicating whether links should be outbound. """ e_node_from = link_to_edge_spec.edge[E.uv.value][0] e_node_to = link_to_edge_spec.edge[E.uv.value][1] # create geometry objects for the links link1, link2 = geom_utils.split_line_at_point( link_to_edge_spec.edge[E.geometry.value], link_to_edge_spec.snap_point) link1_wgs, link2_wgs = tuple( geom_utils.project_geom( link, geom_epsg=gp_conf.proj_crs_epsg, to_epsg=4326) for link in (link1, link2)) link1_rev, link1_wgs_rev, link2_rev, link2_wgs_rev = (LineString( link.coords[::-1]) for link in (link1, link1_wgs, link2, link2_wgs)) outbound_links = tuple( __project_link_edge_attrs(u, v, geom, geom_wgs, link_to_edge_spec.edge) for u, v, geom, geom_wgs in ( (new_node_id, e_node_from, link1_rev, link1_wgs_rev), (new_node_id, e_node_to, link2, link2_wgs), )) if create_outbound_links else () inbound_links = tuple( __project_link_edge_attrs(u, v, geom, geom_wgs, link_to_edge_spec.edge) for u, v, geom, geom_wgs in ( (e_node_from, new_node_id, link1, link1_wgs), (e_node_to, new_node_id, link2_rev, link2_wgs_rev))) if create_inbound_links else () return outbound_links + inbound_links
def get_noise_data( hel_wfs_download: bool = False, process_hel: bool = False, process_espoo: bool = False, process_syke: bool = False, mask_poly_file: str = None, noise_layer_info_csv: str = None, noise_data_hel_gpkg: str = None, processed_data_gpkg: str = None, wfs_hki_url: str = None, ) -> None: if None in [noise_data_hel_gpkg, processed_data_gpkg]: raise ValueError( 'Arguments noise_data_hel_gpkg and processed_data_gpkg must be specified' ) try: noise_layer_info = pd.read_csv(noise_layer_info_csv).to_dict('records') except Exception: log.error('Missing or invalid argument noise_layer_info_csv') log.error(traceback.format_exc()) if os.path.exists(processed_data_gpkg): log.info( f'Removing previously processed data in {processed_data_gpkg}') try: os.remove(processed_data_gpkg) except Exception: log.error('Error in removing data') mask_poly = geom_utils.project_geom( gpd.read_file(mask_poly_file)['geometry'][0]).buffer(500) if hel_wfs_download: log.info('Starting to download noise data from Helsinki (WFS)') log.info(f'Initializing WFS connection to {wfs_hki_url}') wfs_hki = WebFeatureService(url=wfs_hki_url) log.info( f'Initialized WFS connection with name: {wfs_hki.identification.title}' f'and version: {wfs_hki.version}') log.info( f'Found available methods: {[operation.name for operation in wfs_hki.operations]}' ) for layer in noise_layer_info: if layer[L.source.name] == 'hel': try: log.info( f'Downloading WFS layer from {wfs_hki.identification.title}: {layer["name"]}' ) noise_features = get_wfs_feature(wfs_hki_url, layer['name']) noise_features.to_file(noise_data_hel_gpkg, layer=layer['export_name'], driver='GPKG') log.info( f'Exported features to file: {layer["export_name"]}') except Exception: log.error(traceback.format_exc()) log.info('Noise data from Helsinki downloaded (WFS)') else: log.info('Skipping noise data download from Helsinki WFS') log.info('Starting to process noise data') for layer in noise_layer_info: read_data = False if layer[L.source.name] == 'hel' and process_hel: log.info( f'Processing layer from {layer["source"]}: {layer["name"]}') gdf = gpd.read_file(noise_data_hel_gpkg, layer=layer['export_name']) read_data = True if layer[L.source.name] == 'espoo' and process_espoo: log.info( f'Processing layer from {layer["source"]}: {layer["name"]}') gdf = gpd.read_file(layer['name']) read_data = True if layer[L.source.name] == 'syke' and process_syke: log.info( f'Processing layer from {layer["source"]}: {layer["name"]}') gdf = gpd.read_file(layer['name']) gdf = filter_out_features_outside_mask( gdf, geom_utils.project_geom(mask_poly, geom_epsg=3879, to_epsg=3047)) gdf = gdf.to_crs(epsg=3879) # extract db low from strings like '55-60' and '>70' gdf[layer['noise_attr']] = [ int(db[-2:]) if (len(db) == 3) else int(db[:2]) for db in gdf[layer['noise_attr']] ] read_data = True if read_data: gdf = explode_multipolygons_to_polygons(gdf) gdf = gdf.rename(columns={layer['noise_attr']: L.db_low.name}) gdf[['geometry', L.db_low.name]].to_file(processed_data_gpkg, layer=layer['export_name'], driver='GPKG') log.info('All data processed')
def new_nearest_node(graph_handler: GraphHandler) -> OdNodeData: point = geom_utils.project_geom(Point(24.97086446863051, 60.21352729760156)) yield od_handler.get_nearest_node(graph_handler, point, avoid_node_creation=False)
log.info( f'found {real_edge_count - len(edges_within)} edges of {real_edge_count} outside noise data extent' ) # set noise attributes of edges within the data extent to default values (no noise) for edge in edges_within.itertuples(): graph.es[getattr(edge, E.id_ig.name)][E.noises.value] = {} graph.es[getattr(edge, E.id_ig.name)][E.noise_source.value] = '' if (__name__ == '__main__'): log = Logger(printing=True, log_file='noise_graph_update.log', level='debug') in_graph_file = 'data/hma.graphml' out_graph_file = 'out_graph/hma.graphml' data_extent_file = 'data/HMA.geojson' noise_csv_dir = 'out_csv/' data_extent: Polygon = geom_utils.project_geom( gpd.read_file(data_extent_file)['geometry'][0]) graph = ig_utils.read_graphml(in_graph_file, log) set_default_and_na_edge_noises(graph, data_extent, log) noise_graph_update(graph, noise_csv_dir, log) ig_utils.export_to_graphml(graph, out_graph_file) log.info(f'exported graph of {graph.ecount()} edges') log.info('all done')
def convert_otp_graph_to_igraph( node_csv_file: str, edge_csv_file: str, hma_poly_file: str, igraph_out_file: str, b_export_otp_data_to_gpkg: bool = False, b_export_decomposed_igraphs_to_gpkg: bool = False, b_export_final_graph_to_gpkg: bool = False, debug_otp_graph_gpkg: str = 'debug/otp_graph_features.gpkg', debug_igraph_gpkg: str = 'debug/otp2igraph_features.gpkg', ) -> ig.Graph: hma_poly = geom_utils.project_geom(gpd.read_file(hma_poly_file)['geometry'][0]) # 1) read nodes nodes from CSV n = pd.read_csv(node_csv_file, sep=';') log.info(f'read {len(n.index)} nodes') log.debug(f'node column types: {n.dtypes}') log.debug(f'nodes head: {n.head()}') log.info('creating node gdf') n[Node.geometry.name] = [ shapely.wkt.loads(geom) if isinstance(geom, str) else Point() for geom in n[Node.geometry.name] ] n[Node.geom_wgs.name] = n[Node.geometry.name] n = gpd.GeoDataFrame(n, geometry=Node.geometry.name, crs=CRS.from_epsg(4326)) log.info('reprojecting nodes to etrs') n = n.to_crs(epsg=gp_conf.proj_crs_epsg) log.debug(f'nodes head: {n.head()}') # 2) read edges from CSV e = pd.read_csv(edge_csv_file, sep=';') log.info(f'read {len(e.index)} edges') log.debug(f'edge column types: {e.dtypes}') log.debug(f'edges head: {e.head()}') log.info('creating edge gdf') e[Edge.geometry.name] = [ shapely.wkt.loads(geom) if isinstance(geom, str) else LineString() for geom in e[Edge.geometry.name] ] e[Edge.geom_wgs.name] = e[Edge.geometry.name] e = gpd.GeoDataFrame(e, geometry=Edge.geometry.name, crs=CRS.from_epsg(4326)) log.info('reprojecting edges to etrs') e = e.to_crs(epsg=gp_conf.proj_crs_epsg) log.debug(f'edges head: {e.head()}') # 3) export graph data to gpkg if b_export_otp_data_to_gpkg: log.info('writing otp graph data to gpkg') e.drop(columns=[Edge.geom_wgs.name]).to_file(debug_otp_graph_gpkg, layer='edges', driver='GPKG') log.info(f'exported edges to {debug_otp_graph_gpkg} (layer=edges)') n.drop(columns=[Edge.geom_wgs.name]).to_file(debug_otp_graph_gpkg, layer='nodes', driver='GPKG') log.info(f'exported nodes to {debug_otp_graph_gpkg} (layer=nodes)') # 4) filter out edges that are unsuitable for both walking and cycling def filter_df_by_query(df: pd.DataFrame, query: str, name: str = 'rows'): count_before = len(df.index) df_filt = df.query(query).copy() filt_ratio = (count_before-len(df_filt.index)) / count_before log.info(f'filtered out {count_before-len(df_filt.index)} {name} ({round(filt_ratio * 100, 1)} %) by {query}') return df_filt e_filt = filter_df_by_query(e, f'{Edge.allows_walking.name} == True or {Edge.allows_biking.name} == True', name='edges') e_filt = filter_df_by_query(e_filt, f'{Edge.is_no_thru_traffic.name} == False', name='edges') # 5) create a dictionaries for converting otp ids to ig ids and vice versa log.debug('create maps for converting otp ids to ig ids') n[Node.id_ig.name] = np.arange(len(n.index)) ids_otp_ig = {} ids_ig_otp = {} for node in n.itertuples(): ids_otp_ig[getattr(node, Node.id_otp.name)] = getattr(node, Node.id_ig.name) ids_ig_otp[getattr(node, Node.id_ig.name)] = getattr(node, Node.id_otp.name) # 6) add nodes to graph log.info('adding nodes to graph') G = ig.Graph(directed=True) G.add_vertices(len(n.index)) for attr in Node: if attr.name in n.columns: G.vs[attr.value] = list(n[attr.name]) else: log.warning(f'node column {attr.name} not present in dataframe') # 7) add edges to graph log.info('adding edges to graph') # get edge lengths by projected geometry e_filt[Edge.length.name] = [ round(geom.length, 4) if isinstance(geom, LineString) else 0.0 for geom in e_filt[Edge.geometry.name] ] def get_ig_uv(edge): return (ids_otp_ig[edge['node_orig_id']], ids_otp_ig[edge['node_dest_id']]) e_filt['uv_ig'] = e_filt.apply(lambda row: get_ig_uv(row), axis=1) e_filt[Edge.id_ig.name] = np.arange(len(e_filt.index)) G.add_edges(list(e_filt['uv_ig'])) for attr in Edge: if attr.name in e_filt.columns: G.es[attr.value] = list(e_filt[attr.name]) else: log.warning(f'edge column {attr.name} not present in dataframe') # 8) delete edges outside Helsinki Metropolitan Area (HMA) hma_buffered = hma_poly.buffer(100) def intersects_hma(geom: Union[LineString, None]): if not geom or geom.is_empty: return True return geom.intersects(hma_buffered) e_gdf = ig_utils.get_edge_gdf(G) log.info('finding edges that intersect with HMA') e_gdf['in_hma'] = [intersects_hma(line) for line in e_gdf[Edge.geometry.name]] e_gdf_del = e_gdf.query('in_hma == False').copy() out_ratio = round(100 * len(e_gdf_del.index)/len(e_gdf.index), 1) log.info(f'found {len(e_gdf_del.index)} ({out_ratio} %) edges outside HMA') log.info('deleting edges') before_count = G.ecount() G.delete_edges(e_gdf_del.index.tolist()) after_count = G.ecount() log.info(f'deleted {before_count-after_count} edges') # check if id_ig:s need to be updated to edge attributes mismatch_count = len( [edge.index for edge in G.es if edge.attributes()[Edge.id_ig.value] != edge.index] ) log.info(f'invalid edge ids: {mismatch_count}') # reassign igraph indexes to edge and node attributes G.es[Edge.id_ig.value] = [e.index for e in G.es] G.vs[Node.id_ig.value] = [v.index for v in G.vs] # check if id_ig:s need to be updated to edge attributes mismatch_count = len( [edge.index for edge in G.es if edge.attributes()[Edge.id_ig.value] != edge.index] ) log.info(f'invalid edge ids: {mismatch_count} (after re-indexing)') # 9) find and inspect subgraphs by decomposing the graph sub_graphs = G.decompose(mode='STRONG') log.info(f'found {len(sub_graphs)} subgraphs') graph_sizes = [graph.ecount() for graph in sub_graphs] log.info(f'subgraphs with more than 10 edges: {len([s for s in graph_sizes if s > 10])}') log.info(f'subgraphs with more than 50 edges: {len([s for s in graph_sizes if s > 50])}') log.info(f'subgraphs with more than 100 edges: {len([s for s in graph_sizes if s > 100])}') log.info(f'subgraphs with more than 500 edges: {len([s for s in graph_sizes if s > 500])}') log.info(f'subgraphs with more than 10000 edges: {len([s for s in graph_sizes if s > 10000])}') small_graphs = [graph for graph in sub_graphs if graph.ecount() <= 15] medium_graphs = [graph for graph in sub_graphs if (graph.ecount() > 15 and graph.ecount() <= 500)] big_graphs = [graph for graph in sub_graphs if graph.ecount() > 500] small_graph_edges = [] for graph_id, graph in enumerate(small_graphs): edges = ig_utils.get_edge_dicts(graph, attrs=[Edge.id_otp, Edge.id_ig, Edge.geometry]) for edge in edges: edge['graph_id'] = graph_id small_graph_edges.extend(edges) medium_graph_edges = [] for graph_id, graph in enumerate(medium_graphs): edges = ig_utils.get_edge_dicts(graph, attrs=[Edge.id_otp, Edge.id_ig, Edge.geometry]) for edge in edges: edge['graph_id'] = graph_id medium_graph_edges.extend(edges) big_graph_edges = [] for graph_id, graph in enumerate(big_graphs): edges = ig_utils.get_edge_dicts(graph, attrs=[Edge.id_otp, Edge.id_ig, Edge.geometry]) for edge in edges: edge['graph_id'] = graph_id big_graph_edges.extend(edges) if b_export_decomposed_igraphs_to_gpkg: log.info('exporting subgraphs to gpkg') # graphs with <= 15 edges small_graph_edges_gdf = gpd.GeoDataFrame(small_graph_edges, crs=CRS.from_epsg(gp_conf.proj_crs_epsg)) small_graph_edges_gdf.to_file(debug_igraph_gpkg, layer='small_graph_edges', driver='GPKG') # graphs with 15–500 edges medium_graph_edges_gdf = gpd.GeoDataFrame(medium_graph_edges, crs=CRS.from_epsg(gp_conf.proj_crs_epsg)) medium_graph_edges_gdf.to_file(debug_igraph_gpkg, layer='medium_graph_edges', driver='GPKG') # graphs with > 500 edges big_graph_edges_gdf = gpd.GeoDataFrame(big_graph_edges, crs=CRS.from_epsg(gp_conf.proj_crs_epsg)) big_graph_edges_gdf.to_file(debug_igraph_gpkg, layer='big_graph_edges', driver='GPKG') log.info('graphs exported') # 10) delete smallest subgraphs from the graph del_edge_ids = [edge[Edge.id_ig.name] for edge in small_graph_edges] log.info(f'deleting {len(del_edge_ids)} isolated edges') before_count = G.ecount() G.delete_edges(del_edge_ids) after_count = G.ecount() del_ratio = round(100 * (before_count-after_count) / before_count, 1) log.info(f'deleted {before_count-after_count} ({del_ratio} %) edges') # 11) delete isolated nodes from the graph del_node_ids = [v.index for v in G.vs.select(_degree_eq=0)] log.info(f'deleting {len(del_node_ids)} isolated nodes') before_count = G.vcount() G.delete_vertices(del_node_ids) after_count = G.vcount() del_ratio = round(100 * (before_count-after_count) / before_count, 1) log.info(f'deleted {before_count-after_count} ({del_ratio} %) nodes') # check if id_ig:s need to be updated to edge attributes mismatch_count = len([edge.index for edge in G.es if edge.attributes()[Edge.id_ig.value] != edge.index]) log.info(f'invalid edge ids: {mismatch_count}') # reassign igraph indexes to edge and node attributes G.es[Edge.id_ig.value] = [e.index for e in G.es] G.vs[Node.id_ig.value] = [v.index for v in G.vs] # check if id_ig:s need to be updated to edge attributes mismatch_count = len([edge.index for edge in G.es if edge.attributes()[Edge.id_ig.value] != edge.index]) log.info(f'invalid edge ids: {mismatch_count} (after re-indexing)') # 12) export graph data to GeoDataFrames fro debugging if b_export_final_graph_to_gpkg: log.info(f'exporting final graph to {debug_igraph_gpkg} for debugging') e_gdf = ig_utils.get_edge_gdf(G, attrs=[Edge.id_otp, Edge.id_ig], ig_attrs=['source', 'target']) n_gdf = ig_utils.get_node_gdf(G, ig_attrs=['index']) e_gdf.to_file(debug_igraph_gpkg, layer='final_graph_edges', driver='GPKG') n_gdf.to_file(debug_igraph_gpkg, layer='final_graph_nodes', driver='GPKG') if igraph_out_file: ig_utils.export_to_graphml(G, igraph_out_file) return G