Beispiel #1
0
def test_zero_polygons():
    f_pos, r_pos, points = cuspatial.read_polygon_shapefile(
        os.path.join(shapefiles_path, "empty_poly.shp"))
    assert_eq(f_pos, cudf.Series(dtype=np.int32, name="f_pos"))
    assert_eq(r_pos, cudf.Series(dtype=np.int32, name="r_pos"))
    assert_eq(
        points,
        cudf.DataFrame({
            "x": cudf.Series(dtype=np.float64),
            "y": cudf.Series(dtype=np.float64),
        }),
    )
Beispiel #2
0
def test_one_polygon():
    f_pos, r_pos, points = cuspatial.read_polygon_shapefile(
        os.path.join(shapefiles_path, "one_poly.shp"))
    assert_eq(f_pos, cudf.Series([0], dtype=np.int32, name="f_pos"))
    assert_eq(r_pos, cudf.Series([0], dtype=np.int32, name="r_pos"))
    assert_eq(
        points,
        cudf.DataFrame({
            "x":
            cudf.Series([-10, 5, 5, -10, -10], dtype=np.float64),
            "y":
            cudf.Series([-10, -10, 5, 5, -10], dtype=np.float64),
        }),
    )
Beispiel #3
0
def point_in_polygon_gpu(
    points_df,  # cudf.DataFrame with x and y columns of point coordinates
    poly_df: gpd.GeoDataFrame,  # geopandas.GeoDataFrame with polygon shapes
    points_x_col: str = "x",
    points_y_col: str = "y",
    poly_label_col: str = None,
):
    """
    Find polygon labels for each of the input points.
    This is a GPU accelerated version that requires cuspatial!

    Parameters
    ----------
    points_df : cudf.DataFrame
        A dataframe in GPU memory containing the x and y coordinates.
    points_x_col : str
        Name of the x coordinate column in points_df. Default is "x".
    points_y_col : str
        Name of the y coordinate column in points_df. Default is "y".

    poly_df : geopandas.GeoDataFrame
        A geodataframe in CPU memory containing polygons geometries in each
        row.
    poly_label_col : str
        Name of the column in poly_df that will be used to label the points,
        e.g. "placename". Default is to automatically use the first column
        unless otherwise specified.

    Returns
    -------
    point_labels : cudf.Series
        A column of labels that indicates which polygon the points fall into.

    """
    import cudf
    import cuspatial

    poly_df_: gpd.GeoDataFrame = poly_df.reset_index()

    # Simply use first column of geodataframe as label if not provided (None)
    # See https://stackoverflow.com/a/22736342/6611055
    poly_label_col: str = poly_label_col or poly_df.columns[0]
    point_labels: cudf.Series = cudf.Series(index=points_df.index).astype(
        poly_df[poly_label_col].dtype)

    # Load CPU-based GeoDataFrame into a GPU-based cuspatial friendly format
    # This is a workaround until the related feature request at
    # https://github.com/rapidsai/cuspatial/issues/165 is implemented
    with tempfile.TemporaryDirectory() as tmpdir:
        # Save geodataframe to a temporary shapefile,
        # so that we can load it into GPU memory using cuspatial
        tmpshpfile = os.path.join(tmpdir, "poly_df.shp")
        poly_df_.to_file(filename=tmpshpfile, driver="ESRI Shapefile")

        # Load polygon_offsets, ring_offsets and polygon xy points
        # from temporary shapefile into GPU memory
        poly_offsets, poly_ring_offsets, poly_points = cuspatial.read_polygon_shapefile(
            filename=tmpshpfile)

    # Run the actual point in polygon algorithm!
    # Note that cuspatial's point_in_polygon function has a 31 polygon limit,
    # hence the for-loop code below. See also
    # https://github.com/rapidsai/cuspatial/blob/branch-0.15/notebooks/nyc_taxi_years_correlation.ipynb
    num_poly: int = len(poly_df_)
    point_in_poly_iter: list = list(np.arange(0, num_poly, 31)) + [num_poly]
    for i in range(len(point_in_poly_iter) - 1):
        start, end = point_in_poly_iter[i], point_in_poly_iter[i + 1]
        poly_labels: cudf.DataFrame = cuspatial.point_in_polygon(
            test_points_x=points_df[points_x_col],
            test_points_y=points_df[points_y_col],
            poly_offsets=poly_offsets[start:end],
            poly_ring_offsets=poly_ring_offsets,
            poly_points_x=poly_points.x,
            poly_points_y=poly_points.y,
        )

        # Label each point with polygon they fall in
        for label in poly_labels.columns:
            point_labels.loc[
                poly_labels[label]] = poly_df_.loc[label][poly_label_col]

    return point_labels
print("data ingesting time (from SSD) in ms={}".format((end - start) * 1000))

start = time.time()
x1 = Series(df["Start_Lon"])
y1 = Series(df["Start_Lat"])
x2 = Series(df["End_Lon"])
y2 = Series(df["End_Lat"])
end = time.time()
print("data frame to gdf column conversion time in ms={}".format(
    (end - start) * 1000))

NYC_boroughs = gpd.read_file(
    'https://data.cityofnewyork.us/api/geospatial/tqmj-j8zm?method=export&format=GeoJSON'
)
NYC_boroughs.to_file('NYC_boroughs.shp')
NYC_gpu = cuspatial.read_polygon_shapefile('NYC_boroughs.shp')

plyreader = shapefile.Reader("NYC_boroughs.shp")
polygons = plyreader.shapes()
plys = []
for ply in polygons:
    plys.append(shape(ply))

start = time.time()
bm1 = cpp_point_in_polygon_bitmap(x1, y1, NYC_gpu[0], NYC_gpu[1],
                                  NYC_gpu[2]['x'], NYC_gpu[2]['y'])
bm2 = cpp_point_in_polygon_bitmap(x2, y2, NYC_gpu[0], NYC_gpu[1],
                                  NYC_gpu[2]['x'], NYC_gpu[2]['y'])
end = time.time()
print("Python GPU Time in ms (end-to-end)={}".format((end - start) * 1000))
Beispiel #5
0
def test_non_existent_file():
    with pytest.raises(RuntimeError):
        f_pos, r_pos, points = cuspatial.read_polygon_shapefile(
            "non_exist.shp")