コード例 #1
0
ファイル: process_routing.py プロジェクト: cjber/ahah
def get_buffers(
    poi: cudf.DataFrame,
    postcodes: cudf.DataFrame,
    k: int,
) -> cudf.DataFrame:
    """
    Estimate buffer sizes required to capture each necessary road node
    Calculates k nearest neighbours for each POI to each road node. Finds
    each node that is considered a neighbour to a poi `k*len(poi)`. Buffers
    are taken as the distance to the further neighbour and all nodes associated with
    each POI are saved.
    Parameters
    ----------
    poi : cudf.DataFrame
        Dataframe of all POIs
    postcodes : cudf.DataFrame
        Dataframe of postcodes
    k : int
        Number of neigbours to use
    Returns
    -------
    cudf.DataFrame:
        POI dataframe including buffer and column with list of nodes
    """
    nbrs = NearestNeighbors(n_neighbors=k,
                            output_type="cudf",
                            algorithm="brute").fit(poi[["easting",
                                                        "northing"]])
    distances, indices = nbrs.kneighbors(postcodes[["easting", "northing"]])

    poi_nn = (
        postcodes.join(indices)[["node_id"] +
                                indices.columns.tolist()].set_index("node_id").
        stack().rename("poi_idx").reset_index().rename(columns={
            "level_0": "pc_node"
        }).drop("level_1",
                axis=1).groupby("poi_idx").agg(list).join(poi, how="right"))

    # retain only unique postcode ids
    poi_nn["pc_node"] = (poi_nn["pc_node"].to_pandas().apply(
        lambda row: list(set(row)) if row is not None else row))

    distances = distances.stack().rename("dist").reset_index().drop("level_1",
                                                                    axis=1)
    indices = indices.stack().rename("ind").reset_index().drop("level_1",
                                                               axis=1)

    poi_nodes = (poi_nn[[
        "node_id"
    ]].iloc[indices["ind"].values]["node_id"].reset_index(drop=True))
    buffers = cudf.DataFrame({
        "node_id": poi_nodes,
        "buffer": distances["dist"].values
    })
    buffers = buffers.sort_values("buffer",
                                  ascending=False).drop_duplicates("node_id")
    buffers["buffer"] = buffers["buffer"].astype("int")

    # this will drop rows that did not appear in the KNN i.e unneeded poi
    return (poi_nn.merge(buffers, on="node_id",
                         how="left").dropna().drop_duplicates("node_id"))