def test_zero_polygons(): f_pos, r_pos, points = cuspatial.read_polygon_shapefile( os.path.join(shapefiles_path, "empty_poly.shp")) assert_eq(f_pos, cudf.Series(dtype=np.int32, name="f_pos")) assert_eq(r_pos, cudf.Series(dtype=np.int32, name="r_pos")) assert_eq( points, cudf.DataFrame({ "x": cudf.Series(dtype=np.float64), "y": cudf.Series(dtype=np.float64), }), )
def test_one_polygon(): f_pos, r_pos, points = cuspatial.read_polygon_shapefile( os.path.join(shapefiles_path, "one_poly.shp")) assert_eq(f_pos, cudf.Series([0], dtype=np.int32, name="f_pos")) assert_eq(r_pos, cudf.Series([0], dtype=np.int32, name="r_pos")) assert_eq( points, cudf.DataFrame({ "x": cudf.Series([-10, 5, 5, -10, -10], dtype=np.float64), "y": cudf.Series([-10, -10, 5, 5, -10], dtype=np.float64), }), )
def point_in_polygon_gpu( points_df, # cudf.DataFrame with x and y columns of point coordinates poly_df: gpd.GeoDataFrame, # geopandas.GeoDataFrame with polygon shapes points_x_col: str = "x", points_y_col: str = "y", poly_label_col: str = None, ): """ Find polygon labels for each of the input points. This is a GPU accelerated version that requires cuspatial! Parameters ---------- points_df : cudf.DataFrame A dataframe in GPU memory containing the x and y coordinates. points_x_col : str Name of the x coordinate column in points_df. Default is "x". points_y_col : str Name of the y coordinate column in points_df. Default is "y". poly_df : geopandas.GeoDataFrame A geodataframe in CPU memory containing polygons geometries in each row. poly_label_col : str Name of the column in poly_df that will be used to label the points, e.g. "placename". Default is to automatically use the first column unless otherwise specified. Returns ------- point_labels : cudf.Series A column of labels that indicates which polygon the points fall into. """ import cudf import cuspatial poly_df_: gpd.GeoDataFrame = poly_df.reset_index() # Simply use first column of geodataframe as label if not provided (None) # See https://stackoverflow.com/a/22736342/6611055 poly_label_col: str = poly_label_col or poly_df.columns[0] point_labels: cudf.Series = cudf.Series(index=points_df.index).astype( poly_df[poly_label_col].dtype) # Load CPU-based GeoDataFrame into a GPU-based cuspatial friendly format # This is a workaround until the related feature request at # https://github.com/rapidsai/cuspatial/issues/165 is implemented with tempfile.TemporaryDirectory() as tmpdir: # Save geodataframe to a temporary shapefile, # so that we can load it into GPU memory using cuspatial tmpshpfile = os.path.join(tmpdir, "poly_df.shp") poly_df_.to_file(filename=tmpshpfile, driver="ESRI Shapefile") # Load polygon_offsets, ring_offsets and polygon xy points # from temporary shapefile into GPU memory poly_offsets, poly_ring_offsets, poly_points = cuspatial.read_polygon_shapefile( filename=tmpshpfile) # Run the actual point in polygon algorithm! # Note that cuspatial's point_in_polygon function has a 31 polygon limit, # hence the for-loop code below. See also # https://github.com/rapidsai/cuspatial/blob/branch-0.15/notebooks/nyc_taxi_years_correlation.ipynb num_poly: int = len(poly_df_) point_in_poly_iter: list = list(np.arange(0, num_poly, 31)) + [num_poly] for i in range(len(point_in_poly_iter) - 1): start, end = point_in_poly_iter[i], point_in_poly_iter[i + 1] poly_labels: cudf.DataFrame = cuspatial.point_in_polygon( test_points_x=points_df[points_x_col], test_points_y=points_df[points_y_col], poly_offsets=poly_offsets[start:end], poly_ring_offsets=poly_ring_offsets, poly_points_x=poly_points.x, poly_points_y=poly_points.y, ) # Label each point with polygon they fall in for label in poly_labels.columns: point_labels.loc[ poly_labels[label]] = poly_df_.loc[label][poly_label_col] return point_labels
print("data ingesting time (from SSD) in ms={}".format((end - start) * 1000)) start = time.time() x1 = Series(df["Start_Lon"]) y1 = Series(df["Start_Lat"]) x2 = Series(df["End_Lon"]) y2 = Series(df["End_Lat"]) end = time.time() print("data frame to gdf column conversion time in ms={}".format( (end - start) * 1000)) NYC_boroughs = gpd.read_file( 'https://data.cityofnewyork.us/api/geospatial/tqmj-j8zm?method=export&format=GeoJSON' ) NYC_boroughs.to_file('NYC_boroughs.shp') NYC_gpu = cuspatial.read_polygon_shapefile('NYC_boroughs.shp') plyreader = shapefile.Reader("NYC_boroughs.shp") polygons = plyreader.shapes() plys = [] for ply in polygons: plys.append(shape(ply)) start = time.time() bm1 = cpp_point_in_polygon_bitmap(x1, y1, NYC_gpu[0], NYC_gpu[1], NYC_gpu[2]['x'], NYC_gpu[2]['y']) bm2 = cpp_point_in_polygon_bitmap(x2, y2, NYC_gpu[0], NYC_gpu[1], NYC_gpu[2]['x'], NYC_gpu[2]['y']) end = time.time() print("Python GPU Time in ms (end-to-end)={}".format((end - start) * 1000))
def test_non_existent_file(): with pytest.raises(RuntimeError): f_pos, r_pos, points = cuspatial.read_polygon_shapefile( "non_exist.shp")