def nearest_nodes(df: cudf.DataFrame, nodes: cudf.DataFrame) -> cudf.DataFrame: """ Find nearest road node to point of interest Uses `cuml` nearest neighbours for GPU accelerated nearest points. It is assumed that all points use a planar coordinate system like BNG. Parameters ---------- df : cudf.DataFrame POI df containing coordinate information nodes : cudf.DataFrame Road nodes with coordinate information Returns ------- cudf.DataFrame: Road nodes that are nearest neighbour to some POI """ df = df.dropna(subset=["easting", "northing"]) nbrs = NearestNeighbors(n_neighbors=1, output_type="cudf", algorithm="brute").fit( nodes[["easting", "northing"]]) _, indices = nbrs.kneighbors(df[["easting", "northing"]]) df["node_id"] = nodes.iloc[indices]["node_id"].reset_index(drop=True) return df
def apply_op( self, gdf: cudf.DataFrame, columns_ctx: dict, input_cols, target_cols=["base"], stats_context=None, ): target_columns = self.get_columns(columns_ctx, input_cols, target_cols) new_gdf = gdf.dropna(subset=target_columns or None) new_gdf.reset_index(drop=True, inplace=True) self.update_columns_ctx(columns_ctx, input_cols, new_gdf.columns, target_columns) return new_gdf
def transform(self, columns: ColumnNames, gdf: cudf.DataFrame) -> cudf.DataFrame: new_gdf = gdf.dropna(subset=columns or None) new_gdf.reset_index(drop=True, inplace=True) return new_gdf