def test_noise_graph_update(self):
        in_graph_file = 'data/test_graph.graphml'
        out_graph_file = 'temp/test_graph_noises.graphml'
        data_extent_file = 'data/HMA.geojson'
        noise_csv_dir = 'noise_csv/'

        data_extent: Polygon = geom_utils.project_geom(
            gpd.read_file(data_extent_file)['geometry'][0])
        graph = ig_utils.read_graphml(in_graph_file, log)

        noise_graph_update.set_default_and_na_edge_noises(
            graph, data_extent, log)

        noise_graph_update.noise_graph_update(graph, noise_csv_dir, log)
        ig_utils.export_to_graphml(graph, out_graph_file)

        graph = ig_utils.read_graphml(out_graph_file)

        self.assertEqual(graph.ecount(), 3702)

        for edge in graph.es:
            attrs = edge.attributes()

            # check that edge IDs are correct
            self.assertEqual(edge.index, attrs[E.id_ig.value])

            if isinstance(attrs[E.geometry.value], LineString):
                # note: this will fail if some of the edges are outside the noise data extent
                self.assertNotEqual(edge[E.noises.value], None)
                self.assertIsInstance(edge[E.noises.value], dict)
                self.assertNotEqual(edge[E.noise_source.value], None)
                self.assertIsInstance(edge[E.noise_source.value], str)
            else:
                # for edges without geometry the noise attributes should be nodata
                self.assertEqual(edge[E.noises.value], None)
                self.assertEqual(edge[E.noise_source.value], None)

            # if edge noises are nodata then also noise source must be nodata
            if edge[E.noises.value] == None:
                self.assertEqual(edge[E.noise_source.value], None)

            # if edge noises are not nodata but {} then noise source must also be just '' (not nodata)
            if edge[E.noises.value] == {}:
                self.assertEqual(edge[E.noise_source.value], '')

            # if edge has noises it must also have noise source
            if edge[E.noises.value]:
                self.assertNotEqual(edge[E.noise_source.value], '')
                self.assertNotEqual(edge[E.noise_source.value], None)

            # if edge has noise source it must have also noises
            if edge[E.noise_source.value]:
                self.assertNotEqual(edge[E.noises.value], '')
                self.assertNotEqual(edge[E.noises.value], None)
Example #2
0
def main(conf: GraphNoiseJoinConf):
    data_extent: Polygon = geom_utils.project_geom(
        gpd.read_file(conf.noise_data_extent_fp)['geometry'][0])
    graph = ig_utils.read_graphml(conf.graph_in_fp, log)

    set_default_and_na_edge_noises(graph, data_extent)

    noise_graph_update(graph, conf.noise_data_csv_dir)

    ig_utils.export_to_graphml(graph, conf.graph_out_fp)
    log.info(f'exported graph of {graph.ecount()} edges')
    log.info('all done')
def test_updates_noises_from_csv_to_graph():
    in_graph_file = f'{base_dir}/data/test_graph.graphml'
    out_graph_file = f'{base_dir}/temp/test_graph_noises.graphml'
    data_extent_file = f'{base_dir}/data/HMA.geojson'
    noise_csv_dir = f'{base_dir}/noise_csv/'

    data_extent: Polygon = geom_utils.project_geom(gpd.read_file(data_extent_file)['geometry'][0])
    graph = ig_utils.read_graphml(in_graph_file)

    noise_graph_update.set_default_and_na_edge_noises(graph, data_extent)

    noise_graph_update.noise_graph_update(graph, noise_csv_dir)
    ig_utils.export_to_graphml(graph, out_graph_file)

    graph = ig_utils.read_graphml(out_graph_file)

    assert graph.ecount() == 3702

    for edge in graph.es:
        attrs = edge.attributes()

        # check that edge IDs are correct
        assert edge.index == attrs[E.id_ig.value]

        if isinstance(attrs[E.geometry.value], LineString):
            # note: this will fail if some of the edges are outside the noise data extent
            assert edge[E.noises.value] is not None
            assert isinstance(edge[E.noises.value], dict)
            assert edge[E.noise_source.value] is not None
            assert isinstance(edge[E.noise_source.value], str)
        else:
            # for edges without geometry the noise attributes should be nodata
            assert edge[E.noises.value] is None
            assert edge[E.noise_source.value] is None

        # if edge noises are nodata then also noise source must be nodata
        if edge[E.noises.value] is None:
            assert edge[E.noise_source.value] is None

        # if edge noises are not nodata but {} then noise source must also be just '' (not nodata)
        if edge[E.noises.value] == {}:
            assert edge[E.noise_source.value] == ''

        # if edge has noises it must also have noise source
        if edge[E.noises.value]:
            assert edge[E.noise_source.value] != ''
            assert edge[E.noise_source.value] is not None

        # if edge has noise source it must have also noises
        if edge[E.noise_source.value]:
            assert edge[E.noises.value] != ''
            assert edge[E.noises.value] is not None
Example #4
0
        log.info('Writing edges to PostGIS')
        write_to_postgis = db.get_db_writer(log)
        write_to_postgis(edges_2_db[[E.id_way.name, 'geometry']],
                         edge_table_db_name)

        log.info(
            'Wrote graph edges to db, run land_cover_overlay_analysis.py next')
        exit()

    else:
        log.info(
            f'Edges were already exported to db table: {edge_table_db_name}')

    # get mean GSV GVI per edge
    gsv_gvi_list_by_way_id = get_gsv_gvi_list_by_way_id(
        log, edge_gdf, gsv_gvi_gdf)
    mean_gsv_gvi_by_way_id = get_mean_gsv_gvi_by_way_id(
        log, gsv_gvi_list_by_way_id, edge_gdf)

    # fetch low and high vegetation shares from db per edge buffer (way ID)
    low_veg_share_by_way_id = lc_analysis.get_low_veg_share_by_way_id()
    high_veg_share_by_way_id = lc_analysis.get_high_veg_share_by_way_id()

    graph = update_gvi_attributes_to_graph(graph, mean_gsv_gvi_by_way_id,
                                           low_veg_share_by_way_id,
                                           high_veg_share_by_way_id)

    ig_utils.export_to_graphml(graph, graph_file_out)

    log.info(f'Exported graph to file {graph_file_out}')
    log.info(
        f'found {real_edge_count - len(edges_within)} edges of {real_edge_count} outside noise data extent'
    )

    # set noise attributes of edges within the data extent to default values (no noise)
    for edge in edges_within.itertuples():
        graph.es[getattr(edge, E.id_ig.name)][E.noises.value] = {}
        graph.es[getattr(edge, E.id_ig.name)][E.noise_source.value] = ''


if (__name__ == '__main__'):
    log = Logger(printing=True,
                 log_file='noise_graph_update.log',
                 level='debug')
    in_graph_file = 'data/hma.graphml'
    out_graph_file = 'out_graph/hma.graphml'
    data_extent_file = 'data/HMA.geojson'
    noise_csv_dir = 'out_csv/'

    data_extent: Polygon = geom_utils.project_geom(
        gpd.read_file(data_extent_file)['geometry'][0])
    graph = ig_utils.read_graphml(in_graph_file, log)

    set_default_and_na_edge_noises(graph, data_extent, log)

    noise_graph_update(graph, noise_csv_dir, log)

    ig_utils.export_to_graphml(graph, out_graph_file)
    log.info(f'exported graph of {graph.ecount()} edges')
    log.info('all done')
Example #6
0
def main(conf: GraphGreenViewJoinConf):
    edge_table_db_name = conf.db_edge_table

    execute_sql = db.get_sql_executor(log)
    db_tables = db.get_db_table_names(execute_sql)

    # load GSV GVI points from GPKG
    gsv_gvi_gdf = load_gsv_gvi_gdf(conf.greenery_points_fp)

    # load street network graph from GraphML
    graph = ig_utils.read_graphml(conf.graph_file_in)
    log.info(f'Read graph of {graph.ecount()} edges')

    # load edge_gdf
    edge_gdf: GeoDataFrame = ig_utils.get_edge_gdf(
        graph, attrs=[E.id_ig, E.length, E.id_way])
    edge_gdf = edge_gdf.drop_duplicates(E.id_way.name, keep='first')
    # drop edges without geometry
    edge_gdf = edge_gdf[edge_gdf['geometry'].apply(
        lambda geom: isinstance(geom, LineString))]
    log.info(f'Subset edge_gdf to {len(edge_gdf)} unique geometries')

    # export edges to db if not there yet for land cover overlay analysis
    if edge_table_db_name not in db_tables:
        # add simplified buffers to edge_gdf
        edges_2_db = edge_gdf.copy()
        log.info('Calculating 30m buffers from edge geometries')
        edges_2_db['b30'] = [
            geom.buffer(30, resolution=3) for geom in edges_2_db['geometry']
        ]
        edges_2_db = edges_2_db.rename(columns={
            'geometry': 'line_geom',
            'b30': 'geometry'
        })
        edges_2_db = edges_2_db.set_geometry('geometry')

        log.info('Writing edges to PostGIS')
        write_to_postgis = db.get_db_writer(log)
        write_to_postgis(edges_2_db[[E.id_way.name, 'geometry']],
                         edge_table_db_name)

        log.info(
            'Wrote graph edges to db, run land_cover_overlay_analysis.py next')
        exit()

    else:
        log.info(
            f'Edges were already exported to db table: {edge_table_db_name}')

    # get mean GSV GVI per edge
    gsv_gvi_list_by_way_id = get_gsv_gvi_list_by_way_id(edge_gdf, gsv_gvi_gdf)
    mean_gsv_gvi_by_way_id = get_mean_gsv_gvi_by_way_id(
        gsv_gvi_list_by_way_id, edge_gdf)

    # fetch low and high vegetation shares from db per edge buffer (way ID)
    low_veg_share_by_way_id = lc_analysis.get_low_veg_share_by_way_id(
        conf.db_low_veg_share_table)
    high_veg_share_by_way_id = lc_analysis.get_high_veg_share_by_way_id(
        conf.db_high_veg_share_table)

    graph = update_gvi_attributes_to_graph(graph, mean_gsv_gvi_by_way_id,
                                           low_veg_share_by_way_id,
                                           high_veg_share_by_way_id)

    ig_utils.export_to_graphml(graph, conf.graph_file_out)

    log.info(f'Exported graph to file {conf.graph_file_out}')
def convert_otp_graph_to_igraph(
    node_csv_file: str,
    edge_csv_file: str,
    hma_poly_file: str,
    igraph_out_file: str,
    b_export_otp_data_to_gpkg: bool = False,
    b_export_decomposed_igraphs_to_gpkg: bool = False,
    b_export_final_graph_to_gpkg: bool = False,
    debug_otp_graph_gpkg: str = 'debug/otp_graph_features.gpkg',
    debug_igraph_gpkg: str = 'debug/otp2igraph_features.gpkg',
) -> ig.Graph:

    hma_poly = geom_utils.project_geom(gpd.read_file(hma_poly_file)['geometry'][0])

    # 1) read nodes nodes from CSV
    n = pd.read_csv(node_csv_file, sep=';')
    log.info(f'read {len(n.index)} nodes')
    log.debug(f'node column types: {n.dtypes}')
    log.debug(f'nodes head: {n.head()}')
    log.info('creating node gdf')
    n[Node.geometry.name] = [
        shapely.wkt.loads(geom) if isinstance(geom, str) else Point() for geom in n[Node.geometry.name]
    ]
    n[Node.geom_wgs.name] = n[Node.geometry.name]
    n = gpd.GeoDataFrame(n, geometry=Node.geometry.name, crs=CRS.from_epsg(4326))
    log.info('reprojecting nodes to etrs')
    n = n.to_crs(epsg=gp_conf.proj_crs_epsg)
    log.debug(f'nodes head: {n.head()}')

    # 2) read edges from CSV
    e = pd.read_csv(edge_csv_file, sep=';')
    log.info(f'read {len(e.index)} edges')
    log.debug(f'edge column types: {e.dtypes}')
    log.debug(f'edges head: {e.head()}')
    log.info('creating edge gdf')
    e[Edge.geometry.name] = [
        shapely.wkt.loads(geom) if isinstance(geom, str) else LineString() for geom in e[Edge.geometry.name]
    ]
    e[Edge.geom_wgs.name] = e[Edge.geometry.name]
    e = gpd.GeoDataFrame(e, geometry=Edge.geometry.name, crs=CRS.from_epsg(4326))
    log.info('reprojecting edges to etrs')
    e = e.to_crs(epsg=gp_conf.proj_crs_epsg)
    log.debug(f'edges head: {e.head()}')

    # 3) export graph data to gpkg
    if b_export_otp_data_to_gpkg:
        log.info('writing otp graph data to gpkg')
        e.drop(columns=[Edge.geom_wgs.name]).to_file(debug_otp_graph_gpkg, layer='edges', driver='GPKG')
        log.info(f'exported edges to {debug_otp_graph_gpkg} (layer=edges)')
        n.drop(columns=[Edge.geom_wgs.name]).to_file(debug_otp_graph_gpkg, layer='nodes', driver='GPKG')
        log.info(f'exported nodes to {debug_otp_graph_gpkg} (layer=nodes)')

    # 4) filter out edges that are unsuitable for both walking and cycling
    def filter_df_by_query(df: pd.DataFrame, query: str, name: str = 'rows'):
        count_before = len(df.index)
        df_filt = df.query(query).copy()
        filt_ratio = (count_before-len(df_filt.index)) / count_before
        log.info(f'filtered out {count_before-len(df_filt.index)} {name} ({round(filt_ratio * 100, 1)} %) by {query}')
        return df_filt

    e_filt = filter_df_by_query(e, f'{Edge.allows_walking.name} == True or {Edge.allows_biking.name} == True', name='edges')
    e_filt = filter_df_by_query(e_filt, f'{Edge.is_no_thru_traffic.name} == False', name='edges')

    # 5) create a dictionaries for converting otp ids to ig ids and vice versa
    log.debug('create maps for converting otp ids to ig ids')
    n[Node.id_ig.name] = np.arange(len(n.index))
    ids_otp_ig = {}
    ids_ig_otp = {}
    for node in n.itertuples():
        ids_otp_ig[getattr(node, Node.id_otp.name)] = getattr(node, Node.id_ig.name)
        ids_ig_otp[getattr(node, Node.id_ig.name)] = getattr(node, Node.id_otp.name)

    # 6) add nodes to graph
    log.info('adding nodes to graph')
    G = ig.Graph(directed=True)
    G.add_vertices(len(n.index))
    for attr in Node:
        if attr.name in n.columns:
            G.vs[attr.value] = list(n[attr.name])
        else:
            log.warning(f'node column {attr.name} not present in dataframe')

    # 7) add edges to graph
    log.info('adding edges to graph')

    # get edge lengths by projected geometry
    e_filt[Edge.length.name] = [
        round(geom.length, 4) if isinstance(geom, LineString) else 0.0 for geom in e_filt[Edge.geometry.name]
    ]

    def get_ig_uv(edge):
        return (ids_otp_ig[edge['node_orig_id']], ids_otp_ig[edge['node_dest_id']])

    e_filt['uv_ig'] = e_filt.apply(lambda row: get_ig_uv(row), axis=1)
    e_filt[Edge.id_ig.name] = np.arange(len(e_filt.index))
    G.add_edges(list(e_filt['uv_ig']))
    for attr in Edge:
        if attr.name in e_filt.columns:
            G.es[attr.value] = list(e_filt[attr.name])
        else:
            log.warning(f'edge column {attr.name} not present in dataframe')

    # 8) delete edges outside Helsinki Metropolitan Area (HMA)
    hma_buffered = hma_poly.buffer(100)

    def intersects_hma(geom: Union[LineString, None]):
        if not geom or geom.is_empty:
            return True
        return geom.intersects(hma_buffered)

    e_gdf = ig_utils.get_edge_gdf(G)
    log.info('finding edges that intersect with HMA')
    e_gdf['in_hma'] = [intersects_hma(line) for line in e_gdf[Edge.geometry.name]]
    e_gdf_del = e_gdf.query('in_hma == False').copy()
    out_ratio = round(100 * len(e_gdf_del.index)/len(e_gdf.index), 1)
    log.info(f'found {len(e_gdf_del.index)} ({out_ratio} %) edges outside HMA')

    log.info('deleting edges')
    before_count = G.ecount()
    G.delete_edges(e_gdf_del.index.tolist())
    after_count = G.ecount()
    log.info(f'deleted {before_count-after_count} edges')

    # check if id_ig:s need to be updated to edge attributes
    mismatch_count = len(
        [edge.index for edge in G.es if edge.attributes()[Edge.id_ig.value] != edge.index]
    )
    log.info(f'invalid edge ids: {mismatch_count}')
    # reassign igraph indexes to edge and node attributes
    G.es[Edge.id_ig.value] = [e.index for e in G.es]
    G.vs[Node.id_ig.value] = [v.index for v in G.vs]
    # check if id_ig:s need to be updated to edge attributes
    mismatch_count = len(
        [edge.index for edge in G.es if edge.attributes()[Edge.id_ig.value] != edge.index]
    )
    log.info(f'invalid edge ids: {mismatch_count} (after re-indexing)')

    # 9) find and inspect subgraphs by decomposing the graph
    sub_graphs = G.decompose(mode='STRONG')
    log.info(f'found {len(sub_graphs)} subgraphs')

    graph_sizes = [graph.ecount() for graph in sub_graphs]
    log.info(f'subgraphs with more than 10 edges: {len([s for s in graph_sizes if s > 10])}')
    log.info(f'subgraphs with more than 50 edges: {len([s for s in graph_sizes if s > 50])}')
    log.info(f'subgraphs with more than 100 edges: {len([s for s in graph_sizes if s > 100])}')
    log.info(f'subgraphs with more than 500 edges: {len([s for s in graph_sizes if s > 500])}')
    log.info(f'subgraphs with more than 10000 edges: {len([s for s in graph_sizes if s > 10000])}')

    small_graphs = [graph for graph in sub_graphs if graph.ecount() <= 15]
    medium_graphs = [graph for graph in sub_graphs if (graph.ecount() > 15 and graph.ecount() <= 500)]
    big_graphs = [graph for graph in sub_graphs if graph.ecount() > 500]

    small_graph_edges = []
    for graph_id, graph in enumerate(small_graphs):
        edges = ig_utils.get_edge_dicts(graph, attrs=[Edge.id_otp, Edge.id_ig, Edge.geometry])
        for edge in edges:
            edge['graph_id'] = graph_id
        small_graph_edges.extend(edges)

    medium_graph_edges = []
    for graph_id, graph in enumerate(medium_graphs):
        edges = ig_utils.get_edge_dicts(graph, attrs=[Edge.id_otp, Edge.id_ig, Edge.geometry])
        for edge in edges:
            edge['graph_id'] = graph_id
        medium_graph_edges.extend(edges)

    big_graph_edges = []
    for graph_id, graph in enumerate(big_graphs):
        edges = ig_utils.get_edge_dicts(graph, attrs=[Edge.id_otp, Edge.id_ig, Edge.geometry])
        for edge in edges:
            edge['graph_id'] = graph_id
        big_graph_edges.extend(edges)

    if b_export_decomposed_igraphs_to_gpkg:
        log.info('exporting subgraphs to gpkg')
        # graphs with <= 15 edges
        small_graph_edges_gdf = gpd.GeoDataFrame(small_graph_edges, crs=CRS.from_epsg(gp_conf.proj_crs_epsg))
        small_graph_edges_gdf.to_file(debug_igraph_gpkg, layer='small_graph_edges', driver='GPKG')
        # graphs with  15–500 edges
        medium_graph_edges_gdf = gpd.GeoDataFrame(medium_graph_edges, crs=CRS.from_epsg(gp_conf.proj_crs_epsg))
        medium_graph_edges_gdf.to_file(debug_igraph_gpkg, layer='medium_graph_edges', driver='GPKG')
        # graphs with > 500 edges
        big_graph_edges_gdf = gpd.GeoDataFrame(big_graph_edges, crs=CRS.from_epsg(gp_conf.proj_crs_epsg))
        big_graph_edges_gdf.to_file(debug_igraph_gpkg, layer='big_graph_edges', driver='GPKG')
        log.info('graphs exported')

    # 10) delete smallest subgraphs from the graph
    del_edge_ids = [edge[Edge.id_ig.name] for edge in small_graph_edges]
    log.info(f'deleting {len(del_edge_ids)} isolated edges')
    before_count = G.ecount()
    G.delete_edges(del_edge_ids)
    after_count = G.ecount()
    del_ratio = round(100 * (before_count-after_count) / before_count, 1)
    log.info(f'deleted {before_count-after_count} ({del_ratio} %) edges')

    # 11) delete isolated nodes from the graph
    del_node_ids = [v.index for v in G.vs.select(_degree_eq=0)]
    log.info(f'deleting {len(del_node_ids)} isolated nodes')
    before_count = G.vcount()
    G.delete_vertices(del_node_ids)
    after_count = G.vcount()
    del_ratio = round(100 * (before_count-after_count) / before_count, 1)
    log.info(f'deleted {before_count-after_count} ({del_ratio} %) nodes')

    # check if id_ig:s need to be updated to edge attributes
    mismatch_count = len([edge.index for edge in G.es if edge.attributes()[Edge.id_ig.value] != edge.index])
    log.info(f'invalid edge ids: {mismatch_count}')
    # reassign igraph indexes to edge and node attributes
    G.es[Edge.id_ig.value] = [e.index for e in G.es]
    G.vs[Node.id_ig.value] = [v.index for v in G.vs]
    # check if id_ig:s need to be updated to edge attributes
    mismatch_count = len([edge.index for edge in G.es if edge.attributes()[Edge.id_ig.value] != edge.index])
    log.info(f'invalid edge ids: {mismatch_count} (after re-indexing)')

    # 12) export graph data to GeoDataFrames fro debugging

    if b_export_final_graph_to_gpkg:
        log.info(f'exporting final graph to {debug_igraph_gpkg} for debugging')
        e_gdf = ig_utils.get_edge_gdf(G, attrs=[Edge.id_otp, Edge.id_ig], ig_attrs=['source', 'target'])
        n_gdf = ig_utils.get_node_gdf(G, ig_attrs=['index'])
        e_gdf.to_file(debug_igraph_gpkg, layer='final_graph_edges', driver='GPKG')
        n_gdf.to_file(debug_igraph_gpkg, layer='final_graph_nodes', driver='GPKG')

    if igraph_out_file:
        ig_utils.export_to_graphml(G, igraph_out_file)

    return G
Example #8
0
    graph.es[E.id_way.value] = list(edge_gdf['way_id'])


edge_gdf = ig_utils.get_edge_gdf(
    graph,
    attrs=[E.id_ig, E.length, E.bike_safety_factor],
    ig_attrs=['source', 'target'])

set_biking_lengths(graph, edge_gdf)
set_uv(graph, edge_gdf)
set_way_ids(graph, edge_gdf)

# set combined GVI to GVI attribute & export graph
graph.es[E.gvi.value] = list(graph.es[E.gvi_comb_gsv_veg.value])
ig_utils.export_to_graphml(graph,
                           out_graph,
                           n_attrs=out_node_attrs,
                           e_attrs=out_edge_attrs)

# create GeoJSON files for vector tiles
geojson = utils.create_geojson(graph)
utils.write_geojson(geojson, out_geojson, overwrite=True, id_attr=True)
utils.write_geojson(geojson,
                    out_geojson_noise_gvi,
                    overwrite=True,
                    db_prop=True,
                    gvi_prop=True)

# for research use, set combined GVI that omits low vegetation to GVI attribute and export graph
graph.es[E.gvi.value] = list(graph.es[E.gvi_comb_gsv_high_veg.value])
ig_utils.export_to_graphml(graph,
                           out_graph_research,
def graph_export(conf: GraphExportConf, ):
    in_graph = fr'{conf.base_dir}/graph_in/{conf.graph_id}.graphml'
    out_graph = fr'{conf.base_dir}/graph_out/{conf.graph_id}.graphml'
    out_graph_research = fr'{conf.base_dir}/graph_out/{conf.graph_id}_r.graphml'
    out_graph_research_hel = fr'{conf.base_dir}/graph_out/{conf.graph_id}_r_hel-clip.graphml'
    out_geojson_noise_gvi = fr'{conf.base_dir}/graph_out/{conf.graph_id}_noise_gvi.geojson'
    out_geojson = fr'{conf.base_dir}/graph_out/{conf.graph_id}.geojson'

    hel_extent = gpd.read_file(conf.hel_extent_fp)

    out_node_attrs = [N.geometry]
    out_edge_attrs = [
        E.id_ig, E.uv, E.id_way, E.geometry, E.geom_wgs, E.length,
        E.allows_biking, E.is_stairs, E.bike_safety_factor, E.noises, E.gvi
    ]

    if not conf.with_noise_data:
        out_edge_attrs.remove(E.noises)

    if not conf.with_greenery_data:
        out_edge_attrs.remove(E.gvi)

    log.info(f'Reading graph file: {in_graph}')
    graph = ig_utils.read_graphml(in_graph)

    edge_gdf = ig_utils.get_edge_gdf(graph,
                                     attrs=[E.id_ig, E.length],
                                     ig_attrs=['source', 'target'])

    set_uv(graph, edge_gdf)
    set_way_ids(graph, edge_gdf)

    graph.es[E.bike_safety_factor.value] = [
        round(v, 2) if (v and np.isfinite(v)) else 1
        for v in graph.es[E.bike_safety_factor.value]
    ]

    # set combined GVI to GVI attribute & export graph
    graph.es[E.gvi.value] = list(graph.es[E.gvi_comb_gsv_veg.value])
    ig_utils.export_to_graphml(graph,
                               out_graph,
                               n_attrs=out_node_attrs,
                               e_attrs=out_edge_attrs)

    # create GeoJSON files for vector tiles
    geojson = utils.create_geojson(graph)
    utils.write_geojson(geojson, out_geojson, overwrite=True, id_attr=True)
    utils.write_geojson(geojson,
                        out_geojson_noise_gvi,
                        overwrite=True,
                        db_prop=True,
                        gvi_prop=True)

    # for research use, set combined GVI that omits low vegetation to GVI attribute and export graph
    graph.es[E.gvi.value] = list(graph.es[E.gvi_comb_gsv_high_veg.value])
    ig_utils.export_to_graphml(graph,
                               out_graph_research,
                               n_attrs=out_node_attrs,
                               e_attrs=out_edge_attrs)

    # export clip of the graph by the extent of Helsinki

    node_gdf = ig_utils.get_node_gdf(graph, attrs=[N.id_ig])
    # replace geometry with buffered one (500 m)
    hel_extent['geometry'] = [
        geom.buffer(500) for geom in hel_extent['geometry']
    ]
    inside_hel = gpd.sjoin(node_gdf, hel_extent)
    inside_hel_ids = list(inside_hel[N.id_ig.name])
    outside_hel_ids = [
        id_ig for id_ig in list(node_gdf[N.id_ig.name])
        if id_ig not in inside_hel_ids
    ]

    graph.delete_vertices(outside_hel_ids)
    # delete isolated nodes
    del_node_ids = [v.index for v in graph.vs.select(_degree_eq=0)]
    graph.delete_vertices(del_node_ids)
    # reassign igraph indexes to edge and node attributes
    graph.es[E.id_ig.value] = [e.index for e in graph.es]
    graph.vs[N.id_ig.value] = [v.index for v in graph.vs]
    # recalculate uv_id edge attributes
    edge_gdf = ig_utils.get_edge_gdf(graph, ig_attrs=['source', 'target'])
    set_uv(graph, edge_gdf)

    # export clipped graph
    ig_utils.export_to_graphml(graph,
                               out_graph_research_hel,
                               n_attrs=out_node_attrs,
                               e_attrs=out_edge_attrs)