예제 #1
0
def parse_od_settings(path_travel_mode: str, path_routing_mode: str,
                      routing_conf: RoutingConf, orig_lat, orig_lon, dest_lat,
                      dest_lon, aqi_updater: Union[GraphAqiUpdater,
                                                   None]) -> OdSettings:

    try:
        travel_mode = TravelMode(path_travel_mode)
    except Exception:
        raise RoutingException(ErrorKey.INVALID_TRAVEL_MODE_PARAM.value)

    if path_routing_mode == 'short':
        # retain support for legacy path variable 'short'
        routing_mode = RoutingMode.FAST
    else:
        try:
            routing_mode = RoutingMode(path_routing_mode)
        except Exception:
            raise RoutingException(ErrorKey.INVALID_ROUTING_MODE_PARAM.value)

    if travel_mode == TravelMode.BIKE and not conf.cycling_enabled:
        raise RoutingException(ErrorKey.BIKE_ROUTING_NOT_AVAILABLE.value)

    if travel_mode == TravelMode.WALK and not conf.walking_enabled:
        raise RoutingException(ErrorKey.WALK_ROUTING_NOT_AVAILABLE.value)

    if routing_mode == RoutingMode.GREEN and not conf.gvi_paths_enabled:
        raise RoutingException(ErrorKey.GREEN_PATH_ROUTING_NOT_AVAILABLE.value)

    if routing_mode == RoutingMode.QUIET and not conf.quiet_paths_enabled:
        raise RoutingException(ErrorKey.QUIET_PATH_ROUTING_NOT_AVAILABLE.value)

    if routing_mode == RoutingMode.CLEAN:
        if not conf.clean_paths_enabled:
            raise RoutingException(
                ErrorKey.CLEAN_PATH_ROUTING_NOT_AVAILABLE.value)
        if not aqi_updater or not aqi_updater.get_aqi_update_status_response(
        )['aqi_data_updated']:
            raise RoutingException(ErrorKey.NO_REAL_TIME_AQI_AVAILABLE.value)

    if travel_mode == TravelMode.WALK and routing_mode == RoutingMode.SAFE:
        raise RoutingException(
            ErrorKey.SAFE_PATHS_ONLY_AVAILABLE_FOR_BIKE.value)

    orig_latLon = {'lat': float(orig_lat), 'lon': float(orig_lon)}
    dest_latLon = {'lat': float(dest_lat), 'lon': float(dest_lon)}
    orig_point = geom_utils.project_geom(
        geom_utils.get_point_from_lat_lon(orig_latLon))
    dest_point = geom_utils.project_geom(
        geom_utils.get_point_from_lat_lon(dest_latLon))
    sens = routing_conf.sensitivities_by_routing_mode[routing_mode]

    return OdSettings(orig_point, dest_point, travel_mode, routing_mode, sens)
예제 #2
0
def get_nodata_zones(wfs_hsy_url: str, layer: str, hma_mask: str,
                     export_gpkg: str):
    """1) Downloads polygon layer of municipalities of Helsinki Metropolitan Area, 2)
    Creates buffered polygons from the boundary lines of these polygons,
    3) Exports the boundary-buffers to geopackage.
    """
    mask_poly: Polygon = geom_utils.project_geom(
        gpd.read_file(hma_mask)['geometry'][0]).buffer(500)

    municipalities = get_wfs_feature(wfs_hsy_url, layer)
    municipalities.to_file(export_gpkg,
                           layer='hma_municipalities',
                           driver='GPKG')
    boundaries = []
    for municipality in municipalities.itertuples():
        for poly in municipality.geometry.geoms:
            poly = municipality.geometry
            boundaries.append(poly.boundary.buffer(22))

    dissolved_buffer: Polygon = unary_union(boundaries)
    intersected_buffer = dissolved_buffer.intersection(mask_poly)

    boundary_gdf = gpd.GeoDataFrame(data=[{
        'nodata_zone': 1
    }],
                                    geometry=[intersected_buffer],
                                    crs=CRS.from_epsg(3879))
    boundary_gdf.to_file(export_gpkg,
                         layer='municipal_boundaries',
                         driver='GPKG')
def test_path_set_1_fastest_path_geom(path_set_1):
    _, path_fc = path_set_1
    s_path = [feat for feat in path_fc['features'] if feat['properties']['type'] == 'fast'][0]
    geom = s_path['geometry']
    line = LineString(geom['coordinates'])
    proj_line = project_geom(line)
    assert round(proj_line.length, 2) == 1340.0
def test_data_for_linking_edges_has_right_values(
    new_nearest_node: OdNodeData,
    new_linking_edge_data: List[dict],
):
    assert len(new_linking_edge_data) == 2
    link_edge = new_linking_edge_data[0]
    assert len(new_nearest_node.link_to_edge_spec.edge) == 28

    edge = new_nearest_node.link_to_edge_spec.edge
    link_edge_len_ratio = link_edge[E.length.value] / edge[E.length.value]
    assert round(link_edge_len_ratio, 2) == 0.1
    assert link_edge[E.length.value] == round(
        link_edge[E.geometry.value].length, 2)
    assert round(
        geom_utils.project_geom(link_edge[E.geom_wgs.value]).length,
        2) == link_edge[E.length.value]
    assert link_edge[E.length.value] == round(
        link_edge_len_ratio * edge[E.length.value], 2)
    assert link_edge[E.bike_time_cost.value] == round(
        link_edge_len_ratio * edge[E.bike_time_cost.value], 2)
    assert link_edge[E.bike_safety_cost.value] == round(
        link_edge_len_ratio * edge[E.bike_safety_cost.value], 2)
    link_edge_total_noise_exp = sum(link_edge[E.noises.value].values())
    assert round(link_edge_total_noise_exp, 2) == link_edge[E.length.value]
    for key in new_nearest_node.link_to_edge_spec.edge.keys():
        if key.startswith('c_') and not key.startswith('c_aq'):
            assert round(link_edge[key]) == round(link_edge_len_ratio *
                                                  edge[key])
def test_path_set_1_clean_path_geom(path_set_1):
    _, path_fc = path_set_1
    c_path = [feat for feat in path_fc['features'] if feat['properties']['id'] == path_id][0]
    geom = c_path['geometry']
    line = LineString(geom['coordinates'])
    proj_line = project_geom(line)
    assert round(proj_line.length, 2) == 1372.8
예제 #6
0
def edge_attrs_near_point(lat, lon):
    point = geom_utils.project_geom(
        geom_utils.get_point_from_lat_lon({
            'lat': float(lat),
            'lon': float(lon)
        }))
    edge = G.find_nearest_edge(point)
    return jsonify(
        G.format_edge_dict_for_debugging(edge.attrs) if edge else None)
예제 #7
0
 def test_func(geometry: dict):
     assert geometry['type'] == 'LineString'
     coords = geometry['coordinates']
     assert isinstance(coords, list)
     line = LineString(coords)
     assert isinstance(line, LineString)
     assert line.is_valid
     # test that the length is a positive number
     proj_line = project_geom(line)
     assert isinstance(proj_line.length, (float, int))
     assert proj_line.length >= 0.1
    def test_noise_graph_update(self):
        in_graph_file = 'data/test_graph.graphml'
        out_graph_file = 'temp/test_graph_noises.graphml'
        data_extent_file = 'data/HMA.geojson'
        noise_csv_dir = 'noise_csv/'

        data_extent: Polygon = geom_utils.project_geom(
            gpd.read_file(data_extent_file)['geometry'][0])
        graph = ig_utils.read_graphml(in_graph_file, log)

        noise_graph_update.set_default_and_na_edge_noises(
            graph, data_extent, log)

        noise_graph_update.noise_graph_update(graph, noise_csv_dir, log)
        ig_utils.export_to_graphml(graph, out_graph_file)

        graph = ig_utils.read_graphml(out_graph_file)

        self.assertEqual(graph.ecount(), 3702)

        for edge in graph.es:
            attrs = edge.attributes()

            # check that edge IDs are correct
            self.assertEqual(edge.index, attrs[E.id_ig.value])

            if isinstance(attrs[E.geometry.value], LineString):
                # note: this will fail if some of the edges are outside the noise data extent
                self.assertNotEqual(edge[E.noises.value], None)
                self.assertIsInstance(edge[E.noises.value], dict)
                self.assertNotEqual(edge[E.noise_source.value], None)
                self.assertIsInstance(edge[E.noise_source.value], str)
            else:
                # for edges without geometry the noise attributes should be nodata
                self.assertEqual(edge[E.noises.value], None)
                self.assertEqual(edge[E.noise_source.value], None)

            # if edge noises are nodata then also noise source must be nodata
            if edge[E.noises.value] == None:
                self.assertEqual(edge[E.noise_source.value], None)

            # if edge noises are not nodata but {} then noise source must also be just '' (not nodata)
            if edge[E.noises.value] == {}:
                self.assertEqual(edge[E.noise_source.value], '')

            # if edge has noises it must also have noise source
            if edge[E.noises.value]:
                self.assertNotEqual(edge[E.noise_source.value], '')
                self.assertNotEqual(edge[E.noise_source.value], None)

            # if edge has noise source it must have also noises
            if edge[E.noise_source.value]:
                self.assertNotEqual(edge[E.noises.value], '')
                self.assertNotEqual(edge[E.noises.value], None)
예제 #9
0
def test_path_set_1_quiet_path_geom(path_set_1):
    data = path_set_1
    path_fc = data['path_FC']
    q_path = [
        feat for feat in path_fc['features']
        if feat['properties']['id'] == path_id
    ][0]
    geom = q_path['geometry']
    line = LineString(geom['coordinates'])
    proj_line = project_geom(line)
    assert round(proj_line.length, 2) == 1475.14
예제 #10
0
def main(conf: GraphNoiseJoinConf):
    data_extent: Polygon = geom_utils.project_geom(
        gpd.read_file(conf.noise_data_extent_fp)['geometry'][0])
    graph = ig_utils.read_graphml(conf.graph_in_fp, log)

    set_default_and_na_edge_noises(graph, data_extent)

    noise_graph_update(graph, conf.noise_data_csv_dir)

    ig_utils.export_to_graphml(graph, conf.graph_out_fp)
    log.info(f'exported graph of {graph.ecount()} edges')
    log.info('all done')
def test_updates_noises_from_csv_to_graph():
    in_graph_file = f'{base_dir}/data/test_graph.graphml'
    out_graph_file = f'{base_dir}/temp/test_graph_noises.graphml'
    data_extent_file = f'{base_dir}/data/HMA.geojson'
    noise_csv_dir = f'{base_dir}/noise_csv/'

    data_extent: Polygon = geom_utils.project_geom(gpd.read_file(data_extent_file)['geometry'][0])
    graph = ig_utils.read_graphml(in_graph_file)

    noise_graph_update.set_default_and_na_edge_noises(graph, data_extent)

    noise_graph_update.noise_graph_update(graph, noise_csv_dir)
    ig_utils.export_to_graphml(graph, out_graph_file)

    graph = ig_utils.read_graphml(out_graph_file)

    assert graph.ecount() == 3702

    for edge in graph.es:
        attrs = edge.attributes()

        # check that edge IDs are correct
        assert edge.index == attrs[E.id_ig.value]

        if isinstance(attrs[E.geometry.value], LineString):
            # note: this will fail if some of the edges are outside the noise data extent
            assert edge[E.noises.value] is not None
            assert isinstance(edge[E.noises.value], dict)
            assert edge[E.noise_source.value] is not None
            assert isinstance(edge[E.noise_source.value], str)
        else:
            # for edges without geometry the noise attributes should be nodata
            assert edge[E.noises.value] is None
            assert edge[E.noise_source.value] is None

        # if edge noises are nodata then also noise source must be nodata
        if edge[E.noises.value] is None:
            assert edge[E.noise_source.value] is None

        # if edge noises are not nodata but {} then noise source must also be just '' (not nodata)
        if edge[E.noises.value] == {}:
            assert edge[E.noise_source.value] == ''

        # if edge has noises it must also have noise source
        if edge[E.noises.value]:
            assert edge[E.noise_source.value] != ''
            assert edge[E.noise_source.value] is not None

        # if edge has noise source it must have also noises
        if edge[E.noise_source.value]:
            assert edge[E.noises.value] != ''
            assert edge[E.noises.value] is not None
예제 #12
0
def test_routes_quiet_paths_on_same_street(quiet_paths_on_one_street,
                                           test_line_geometry,
                                           test_fast_path_prop_types,
                                           test_edge_props):
    """Tests that if origin and destination are on the same street, the resultsets are still as expected."""
    edge_fc, path_fc = quiet_paths_on_one_street
    # edges
    assert len(edge_fc['features']) == 1
    test_line_geometry(edge_fc['features'][0]['geometry'])
    test_edge_props(edge_fc['features'][0]['properties'])
    # paths
    assert len(path_fc['features']) == 1
    test_line_geometry(path_fc['features'][0]['geometry'])
    test_fast_path_prop_types(path_fc['features'][0]['properties'])
    # geom length
    coords = path_fc['features'][0]['geometry']['coordinates']
    line = LineString(coords)
    line_proj = project_geom(line)
    assert round(line_proj.length, 2) == 82.73
예제 #13
0
def get_link_edge_data(new_node_id: int, link_to_edge_spec: LinkToEdgeSpec,
                       create_inbound_links: bool,
                       create_outbound_links: bool) -> Tuple[dict]:
    """
    Returns complete edge attribute dictionaries for new linking edges.

    Args:
        new_node_id: An identifier of the new node.
        split_point: A point geometry of the new node (on an existing edge).
        edge: All attributes of the edge on which the new node was created.
        create_inbound_links: A boolean variable indicating whether links should be inbound.
        create_outbound_links: A boolean variable indicating whether links should be outbound.
    """
    e_node_from = link_to_edge_spec.edge[E.uv.value][0]
    e_node_to = link_to_edge_spec.edge[E.uv.value][1]

    # create geometry objects for the links
    link1, link2 = geom_utils.split_line_at_point(
        link_to_edge_spec.edge[E.geometry.value], link_to_edge_spec.snap_point)
    link1_wgs, link2_wgs = tuple(
        geom_utils.project_geom(
            link, geom_epsg=gp_conf.proj_crs_epsg, to_epsg=4326)
        for link in (link1, link2))
    link1_rev, link1_wgs_rev, link2_rev, link2_wgs_rev = (LineString(
        link.coords[::-1]) for link in (link1, link1_wgs, link2, link2_wgs))

    outbound_links = tuple(
        __project_link_edge_attrs(u, v, geom, geom_wgs, link_to_edge_spec.edge)
        for u, v, geom, geom_wgs in (
            (new_node_id, e_node_from, link1_rev, link1_wgs_rev),
            (new_node_id, e_node_to, link2, link2_wgs),
        )) if create_outbound_links else ()

    inbound_links = tuple(
        __project_link_edge_attrs(u, v, geom, geom_wgs, link_to_edge_spec.edge)
        for u, v, geom, geom_wgs in (
            (e_node_from, new_node_id, link1,
             link1_wgs), (e_node_to, new_node_id, link2_rev,
                          link2_wgs_rev))) if create_inbound_links else ()

    return outbound_links + inbound_links
예제 #14
0
def get_noise_data(
    hel_wfs_download: bool = False,
    process_hel: bool = False,
    process_espoo: bool = False,
    process_syke: bool = False,
    mask_poly_file: str = None,
    noise_layer_info_csv: str = None,
    noise_data_hel_gpkg: str = None,
    processed_data_gpkg: str = None,
    wfs_hki_url: str = None,
) -> None:

    if None in [noise_data_hel_gpkg, processed_data_gpkg]:
        raise ValueError(
            'Arguments noise_data_hel_gpkg and processed_data_gpkg must be specified'
        )

    try:
        noise_layer_info = pd.read_csv(noise_layer_info_csv).to_dict('records')
    except Exception:
        log.error('Missing or invalid argument noise_layer_info_csv')
        log.error(traceback.format_exc())

    if os.path.exists(processed_data_gpkg):
        log.info(
            f'Removing previously processed data in {processed_data_gpkg}')
        try:
            os.remove(processed_data_gpkg)
        except Exception:
            log.error('Error in removing data')

    mask_poly = geom_utils.project_geom(
        gpd.read_file(mask_poly_file)['geometry'][0]).buffer(500)

    if hel_wfs_download:
        log.info('Starting to download noise data from Helsinki (WFS)')
        log.info(f'Initializing WFS connection to {wfs_hki_url}')
        wfs_hki = WebFeatureService(url=wfs_hki_url)
        log.info(
            f'Initialized WFS connection with name: {wfs_hki.identification.title}'
            f'and version: {wfs_hki.version}')
        log.info(
            f'Found available methods: {[operation.name for operation in wfs_hki.operations]}'
        )

        for layer in noise_layer_info:
            if layer[L.source.name] == 'hel':
                try:
                    log.info(
                        f'Downloading WFS layer from {wfs_hki.identification.title}: {layer["name"]}'
                    )
                    noise_features = get_wfs_feature(wfs_hki_url,
                                                     layer['name'])
                    noise_features.to_file(noise_data_hel_gpkg,
                                           layer=layer['export_name'],
                                           driver='GPKG')
                    log.info(
                        f'Exported features to file: {layer["export_name"]}')
                except Exception:
                    log.error(traceback.format_exc())

        log.info('Noise data from Helsinki downloaded (WFS)')
    else:
        log.info('Skipping noise data download from Helsinki WFS')

    log.info('Starting to process noise data')
    for layer in noise_layer_info:
        read_data = False
        if layer[L.source.name] == 'hel' and process_hel:
            log.info(
                f'Processing layer from {layer["source"]}: {layer["name"]}')
            gdf = gpd.read_file(noise_data_hel_gpkg,
                                layer=layer['export_name'])
            read_data = True
        if layer[L.source.name] == 'espoo' and process_espoo:
            log.info(
                f'Processing layer from {layer["source"]}: {layer["name"]}')
            gdf = gpd.read_file(layer['name'])
            read_data = True
        if layer[L.source.name] == 'syke' and process_syke:
            log.info(
                f'Processing layer from {layer["source"]}: {layer["name"]}')
            gdf = gpd.read_file(layer['name'])
            gdf = filter_out_features_outside_mask(
                gdf,
                geom_utils.project_geom(mask_poly,
                                        geom_epsg=3879,
                                        to_epsg=3047))
            gdf = gdf.to_crs(epsg=3879)
            # extract db low from strings like '55-60' and '>70'
            gdf[layer['noise_attr']] = [
                int(db[-2:]) if (len(db) == 3) else int(db[:2])
                for db in gdf[layer['noise_attr']]
            ]
            read_data = True
        if read_data:
            gdf = explode_multipolygons_to_polygons(gdf)
            gdf = gdf.rename(columns={layer['noise_attr']: L.db_low.name})
            gdf[['geometry',
                 L.db_low.name]].to_file(processed_data_gpkg,
                                         layer=layer['export_name'],
                                         driver='GPKG')

    log.info('All data processed')
def new_nearest_node(graph_handler: GraphHandler) -> OdNodeData:
    point = geom_utils.project_geom(Point(24.97086446863051,
                                          60.21352729760156))
    yield od_handler.get_nearest_node(graph_handler,
                                      point,
                                      avoid_node_creation=False)
    log.info(
        f'found {real_edge_count - len(edges_within)} edges of {real_edge_count} outside noise data extent'
    )

    # set noise attributes of edges within the data extent to default values (no noise)
    for edge in edges_within.itertuples():
        graph.es[getattr(edge, E.id_ig.name)][E.noises.value] = {}
        graph.es[getattr(edge, E.id_ig.name)][E.noise_source.value] = ''


if (__name__ == '__main__'):
    log = Logger(printing=True,
                 log_file='noise_graph_update.log',
                 level='debug')
    in_graph_file = 'data/hma.graphml'
    out_graph_file = 'out_graph/hma.graphml'
    data_extent_file = 'data/HMA.geojson'
    noise_csv_dir = 'out_csv/'

    data_extent: Polygon = geom_utils.project_geom(
        gpd.read_file(data_extent_file)['geometry'][0])
    graph = ig_utils.read_graphml(in_graph_file, log)

    set_default_and_na_edge_noises(graph, data_extent, log)

    noise_graph_update(graph, noise_csv_dir, log)

    ig_utils.export_to_graphml(graph, out_graph_file)
    log.info(f'exported graph of {graph.ecount()} edges')
    log.info('all done')
def convert_otp_graph_to_igraph(
    node_csv_file: str,
    edge_csv_file: str,
    hma_poly_file: str,
    igraph_out_file: str,
    b_export_otp_data_to_gpkg: bool = False,
    b_export_decomposed_igraphs_to_gpkg: bool = False,
    b_export_final_graph_to_gpkg: bool = False,
    debug_otp_graph_gpkg: str = 'debug/otp_graph_features.gpkg',
    debug_igraph_gpkg: str = 'debug/otp2igraph_features.gpkg',
) -> ig.Graph:

    hma_poly = geom_utils.project_geom(gpd.read_file(hma_poly_file)['geometry'][0])

    # 1) read nodes nodes from CSV
    n = pd.read_csv(node_csv_file, sep=';')
    log.info(f'read {len(n.index)} nodes')
    log.debug(f'node column types: {n.dtypes}')
    log.debug(f'nodes head: {n.head()}')
    log.info('creating node gdf')
    n[Node.geometry.name] = [
        shapely.wkt.loads(geom) if isinstance(geom, str) else Point() for geom in n[Node.geometry.name]
    ]
    n[Node.geom_wgs.name] = n[Node.geometry.name]
    n = gpd.GeoDataFrame(n, geometry=Node.geometry.name, crs=CRS.from_epsg(4326))
    log.info('reprojecting nodes to etrs')
    n = n.to_crs(epsg=gp_conf.proj_crs_epsg)
    log.debug(f'nodes head: {n.head()}')

    # 2) read edges from CSV
    e = pd.read_csv(edge_csv_file, sep=';')
    log.info(f'read {len(e.index)} edges')
    log.debug(f'edge column types: {e.dtypes}')
    log.debug(f'edges head: {e.head()}')
    log.info('creating edge gdf')
    e[Edge.geometry.name] = [
        shapely.wkt.loads(geom) if isinstance(geom, str) else LineString() for geom in e[Edge.geometry.name]
    ]
    e[Edge.geom_wgs.name] = e[Edge.geometry.name]
    e = gpd.GeoDataFrame(e, geometry=Edge.geometry.name, crs=CRS.from_epsg(4326))
    log.info('reprojecting edges to etrs')
    e = e.to_crs(epsg=gp_conf.proj_crs_epsg)
    log.debug(f'edges head: {e.head()}')

    # 3) export graph data to gpkg
    if b_export_otp_data_to_gpkg:
        log.info('writing otp graph data to gpkg')
        e.drop(columns=[Edge.geom_wgs.name]).to_file(debug_otp_graph_gpkg, layer='edges', driver='GPKG')
        log.info(f'exported edges to {debug_otp_graph_gpkg} (layer=edges)')
        n.drop(columns=[Edge.geom_wgs.name]).to_file(debug_otp_graph_gpkg, layer='nodes', driver='GPKG')
        log.info(f'exported nodes to {debug_otp_graph_gpkg} (layer=nodes)')

    # 4) filter out edges that are unsuitable for both walking and cycling
    def filter_df_by_query(df: pd.DataFrame, query: str, name: str = 'rows'):
        count_before = len(df.index)
        df_filt = df.query(query).copy()
        filt_ratio = (count_before-len(df_filt.index)) / count_before
        log.info(f'filtered out {count_before-len(df_filt.index)} {name} ({round(filt_ratio * 100, 1)} %) by {query}')
        return df_filt

    e_filt = filter_df_by_query(e, f'{Edge.allows_walking.name} == True or {Edge.allows_biking.name} == True', name='edges')
    e_filt = filter_df_by_query(e_filt, f'{Edge.is_no_thru_traffic.name} == False', name='edges')

    # 5) create a dictionaries for converting otp ids to ig ids and vice versa
    log.debug('create maps for converting otp ids to ig ids')
    n[Node.id_ig.name] = np.arange(len(n.index))
    ids_otp_ig = {}
    ids_ig_otp = {}
    for node in n.itertuples():
        ids_otp_ig[getattr(node, Node.id_otp.name)] = getattr(node, Node.id_ig.name)
        ids_ig_otp[getattr(node, Node.id_ig.name)] = getattr(node, Node.id_otp.name)

    # 6) add nodes to graph
    log.info('adding nodes to graph')
    G = ig.Graph(directed=True)
    G.add_vertices(len(n.index))
    for attr in Node:
        if attr.name in n.columns:
            G.vs[attr.value] = list(n[attr.name])
        else:
            log.warning(f'node column {attr.name} not present in dataframe')

    # 7) add edges to graph
    log.info('adding edges to graph')

    # get edge lengths by projected geometry
    e_filt[Edge.length.name] = [
        round(geom.length, 4) if isinstance(geom, LineString) else 0.0 for geom in e_filt[Edge.geometry.name]
    ]

    def get_ig_uv(edge):
        return (ids_otp_ig[edge['node_orig_id']], ids_otp_ig[edge['node_dest_id']])

    e_filt['uv_ig'] = e_filt.apply(lambda row: get_ig_uv(row), axis=1)
    e_filt[Edge.id_ig.name] = np.arange(len(e_filt.index))
    G.add_edges(list(e_filt['uv_ig']))
    for attr in Edge:
        if attr.name in e_filt.columns:
            G.es[attr.value] = list(e_filt[attr.name])
        else:
            log.warning(f'edge column {attr.name} not present in dataframe')

    # 8) delete edges outside Helsinki Metropolitan Area (HMA)
    hma_buffered = hma_poly.buffer(100)

    def intersects_hma(geom: Union[LineString, None]):
        if not geom or geom.is_empty:
            return True
        return geom.intersects(hma_buffered)

    e_gdf = ig_utils.get_edge_gdf(G)
    log.info('finding edges that intersect with HMA')
    e_gdf['in_hma'] = [intersects_hma(line) for line in e_gdf[Edge.geometry.name]]
    e_gdf_del = e_gdf.query('in_hma == False').copy()
    out_ratio = round(100 * len(e_gdf_del.index)/len(e_gdf.index), 1)
    log.info(f'found {len(e_gdf_del.index)} ({out_ratio} %) edges outside HMA')

    log.info('deleting edges')
    before_count = G.ecount()
    G.delete_edges(e_gdf_del.index.tolist())
    after_count = G.ecount()
    log.info(f'deleted {before_count-after_count} edges')

    # check if id_ig:s need to be updated to edge attributes
    mismatch_count = len(
        [edge.index for edge in G.es if edge.attributes()[Edge.id_ig.value] != edge.index]
    )
    log.info(f'invalid edge ids: {mismatch_count}')
    # reassign igraph indexes to edge and node attributes
    G.es[Edge.id_ig.value] = [e.index for e in G.es]
    G.vs[Node.id_ig.value] = [v.index for v in G.vs]
    # check if id_ig:s need to be updated to edge attributes
    mismatch_count = len(
        [edge.index for edge in G.es if edge.attributes()[Edge.id_ig.value] != edge.index]
    )
    log.info(f'invalid edge ids: {mismatch_count} (after re-indexing)')

    # 9) find and inspect subgraphs by decomposing the graph
    sub_graphs = G.decompose(mode='STRONG')
    log.info(f'found {len(sub_graphs)} subgraphs')

    graph_sizes = [graph.ecount() for graph in sub_graphs]
    log.info(f'subgraphs with more than 10 edges: {len([s for s in graph_sizes if s > 10])}')
    log.info(f'subgraphs with more than 50 edges: {len([s for s in graph_sizes if s > 50])}')
    log.info(f'subgraphs with more than 100 edges: {len([s for s in graph_sizes if s > 100])}')
    log.info(f'subgraphs with more than 500 edges: {len([s for s in graph_sizes if s > 500])}')
    log.info(f'subgraphs with more than 10000 edges: {len([s for s in graph_sizes if s > 10000])}')

    small_graphs = [graph for graph in sub_graphs if graph.ecount() <= 15]
    medium_graphs = [graph for graph in sub_graphs if (graph.ecount() > 15 and graph.ecount() <= 500)]
    big_graphs = [graph for graph in sub_graphs if graph.ecount() > 500]

    small_graph_edges = []
    for graph_id, graph in enumerate(small_graphs):
        edges = ig_utils.get_edge_dicts(graph, attrs=[Edge.id_otp, Edge.id_ig, Edge.geometry])
        for edge in edges:
            edge['graph_id'] = graph_id
        small_graph_edges.extend(edges)

    medium_graph_edges = []
    for graph_id, graph in enumerate(medium_graphs):
        edges = ig_utils.get_edge_dicts(graph, attrs=[Edge.id_otp, Edge.id_ig, Edge.geometry])
        for edge in edges:
            edge['graph_id'] = graph_id
        medium_graph_edges.extend(edges)

    big_graph_edges = []
    for graph_id, graph in enumerate(big_graphs):
        edges = ig_utils.get_edge_dicts(graph, attrs=[Edge.id_otp, Edge.id_ig, Edge.geometry])
        for edge in edges:
            edge['graph_id'] = graph_id
        big_graph_edges.extend(edges)

    if b_export_decomposed_igraphs_to_gpkg:
        log.info('exporting subgraphs to gpkg')
        # graphs with <= 15 edges
        small_graph_edges_gdf = gpd.GeoDataFrame(small_graph_edges, crs=CRS.from_epsg(gp_conf.proj_crs_epsg))
        small_graph_edges_gdf.to_file(debug_igraph_gpkg, layer='small_graph_edges', driver='GPKG')
        # graphs with  15–500 edges
        medium_graph_edges_gdf = gpd.GeoDataFrame(medium_graph_edges, crs=CRS.from_epsg(gp_conf.proj_crs_epsg))
        medium_graph_edges_gdf.to_file(debug_igraph_gpkg, layer='medium_graph_edges', driver='GPKG')
        # graphs with > 500 edges
        big_graph_edges_gdf = gpd.GeoDataFrame(big_graph_edges, crs=CRS.from_epsg(gp_conf.proj_crs_epsg))
        big_graph_edges_gdf.to_file(debug_igraph_gpkg, layer='big_graph_edges', driver='GPKG')
        log.info('graphs exported')

    # 10) delete smallest subgraphs from the graph
    del_edge_ids = [edge[Edge.id_ig.name] for edge in small_graph_edges]
    log.info(f'deleting {len(del_edge_ids)} isolated edges')
    before_count = G.ecount()
    G.delete_edges(del_edge_ids)
    after_count = G.ecount()
    del_ratio = round(100 * (before_count-after_count) / before_count, 1)
    log.info(f'deleted {before_count-after_count} ({del_ratio} %) edges')

    # 11) delete isolated nodes from the graph
    del_node_ids = [v.index for v in G.vs.select(_degree_eq=0)]
    log.info(f'deleting {len(del_node_ids)} isolated nodes')
    before_count = G.vcount()
    G.delete_vertices(del_node_ids)
    after_count = G.vcount()
    del_ratio = round(100 * (before_count-after_count) / before_count, 1)
    log.info(f'deleted {before_count-after_count} ({del_ratio} %) nodes')

    # check if id_ig:s need to be updated to edge attributes
    mismatch_count = len([edge.index for edge in G.es if edge.attributes()[Edge.id_ig.value] != edge.index])
    log.info(f'invalid edge ids: {mismatch_count}')
    # reassign igraph indexes to edge and node attributes
    G.es[Edge.id_ig.value] = [e.index for e in G.es]
    G.vs[Node.id_ig.value] = [v.index for v in G.vs]
    # check if id_ig:s need to be updated to edge attributes
    mismatch_count = len([edge.index for edge in G.es if edge.attributes()[Edge.id_ig.value] != edge.index])
    log.info(f'invalid edge ids: {mismatch_count} (after re-indexing)')

    # 12) export graph data to GeoDataFrames fro debugging

    if b_export_final_graph_to_gpkg:
        log.info(f'exporting final graph to {debug_igraph_gpkg} for debugging')
        e_gdf = ig_utils.get_edge_gdf(G, attrs=[Edge.id_otp, Edge.id_ig], ig_attrs=['source', 'target'])
        n_gdf = ig_utils.get_node_gdf(G, ig_attrs=['index'])
        e_gdf.to_file(debug_igraph_gpkg, layer='final_graph_edges', driver='GPKG')
        n_gdf.to_file(debug_igraph_gpkg, layer='final_graph_nodes', driver='GPKG')

    if igraph_out_file:
        ig_utils.export_to_graphml(G, igraph_out_file)

    return G