def _copy_type_metadata(self, other, include_index: bool = True): """ Copy type metadata from each column of `other` to the corresponding column of `self`. See `ColumnBase._with_type_metadata` for more information. """ for name, col, other_col in zip(self._data.keys(), self._data.values(), other._data.values()): # libcudf APIs lose all information about GeoColumns, operating # solely on the underlying base data. Therefore, our only recourse # is to recreate a new GeoColumn with the same underlying data. # Since there's no easy way to create a GeoColumn from a # NumericalColumn, we're forced to do so manually. if isinstance(other_col, GeoColumn): col = GeoColumn(other_col._geo, other_col._meta, cudf.Index(col)) self._data.set_by_label(name, col._with_type_metadata(other_col.dtype), validate=False) if include_index: if self._index is not None and other._index is not None: self._index._copy_type_metadata(other._index) # When other._index is a CategoricalIndex, there is if isinstance( other._index, cudf.core.index.CategoricalIndex) and not isinstance( self._index, cudf.core.index.CategoricalIndex): self._index = cudf.Index(self._index._column) return self
def __init__( self, data: Union[gpd.GeoSeries], index: Union[cudf.Index, pd.Index] = None, dtype=None, name=None, nan_as_null=True, ): # Condition index if isinstance(data, (gpGeoSeries, GeoSeries)): if index is None: index = data.index if index is None: index = cudf.RangeIndex(0, len(data)) # Condition data if isinstance(data, pd.Series): data = gpGeoSeries(data) # Create column if isinstance(data, GeoColumn): column = data elif isinstance(data, GeoSeries): column = data._column elif isinstance(data, gpGeoSeries): adapter = GeoPandasAdapter(data) buffers = GeoArrowBuffers(adapter.get_geoarrow_host_buffers()) pandas_meta = GeoMeta(adapter.get_geopandas_meta()) column = GeoColumn(buffers, pandas_meta) else: raise TypeError( f"Incompatible object passed to GeoSeries ctor {type(data)}" ) super().__init__(column, index, dtype, name, nan_as_null)
def __init__(self, data: gpGeoDataFrame = None): """ Constructs a GPU GeoDataFrame from a GeoPandas dataframe. Parameters ---------- data : A geopandas.geodataframe.GeoDataFrame object """ super().__init__() if isinstance(data, gpGeoDataFrame): self.index = data.index for col in data.columns: if is_geometry_type(data[col]): adapter = GeoPandasAdapter(data[col]) buffers = GeoArrowBuffers( adapter.get_geoarrow_host_buffers()) pandas_meta = GeoMeta(adapter.get_geopandas_meta()) column = GeoColumn(buffers, pandas_meta) self._data[col] = column else: self._data[col] = data[col] elif data is None: pass else: raise ValueError("Invalid type passed to GeoDataFrame ctor")
def test_points(): buffers = GeoArrowBuffers({"points_xy": [0, 1, 2, 3]}) cudf.testing.assert_series_equal(cudf.Series([0, 1, 2, 3]), buffers.points.xy) assert len(buffers.points) == 2 column = GeoColumn(buffers) pd.testing.assert_series_equal( GeoSeries(column).to_pandas(), gpGeoSeries([Point(0, 1), Point(2, 3)]))
def test_homogeneous_lines(): buffers = GeoArrowBuffers({ "lines_xy": range(24), "lines_offsets": np.array(range(5)) * 6 }) cudf.testing.assert_series_equal(cudf.Series(range(24)), buffers.lines.xy) assert len(buffers.lines) == 4 column = GeoColumn(buffers) pd.testing.assert_series_equal( GeoSeries(column).to_pandas(), gpGeoSeries([ LineString([[0, 1], [2, 3], [4, 5]]), LineString([[6, 7], [8, 9], [10, 11]]), LineString([[12, 13], [14, 15], [16, 17]]), LineString([[18, 19], [20, 21], [22, 23]]), ]), )
def test_multipoints(): buffers = GeoArrowBuffers({ "mpoints_xy": np.arange(0, 16), "mpoints_offsets": [0, 4, 8, 12, 16] }) assert_eq(cudf.Series(np.arange(0, 16)), buffers.multipoints.xy) assert len(buffers.multipoints) == 4 column = GeoColumn(buffers) assert_eq( GeoSeries(column), gpGeoSeries([ MultiPoint([Point([0, 1]), Point([2, 3])]), MultiPoint([Point(4, 5), Point(6, 7)]), MultiPoint([Point(8, 9), Point(10, 11)]), MultiPoint([Point(12, 13), Point(14, 15)]), ]), )
def test_polygons(): polygons_xy = np.array([ np.concatenate((x[0:6], x[0:2]), axis=None) for x in np.arange(60).reshape(10, 6) ]) buffers = GeoArrowBuffers({ "polygons_xy": polygons_xy.flatten(), "polygons_polygons": np.array([0, 1, 3, 5, 7, 9, 10]), "polygons_rings": np.arange(11) * 8, "mpolygons": [2, 4], }) cudf.testing.assert_series_equal(cudf.Series(polygons_xy.flatten()), buffers.polygons.xy) assert len(buffers.polygons) == 5 column = GeoColumn(buffers) pd.testing.assert_series_equal( GeoSeries(column).to_pandas(), gpGeoSeries([ Polygon(((0, 1), (2, 3), (4, 5))), Polygon( ((6, 7), (8, 9), (10, 11)), [((12, 13), (14, 15), (16, 17))], ), MultiPolygon([ ( ((18, 19), (20, 21), (22, 23)), [((24, 25), (26, 27), (28, 29))], ), ( ((30, 31), (32, 33), (34, 35)), [((36, 37), (38, 39), (40, 41))], ), ]), Polygon( ((42, 43), (44, 45), (46, 47)), [((48, 49), (50, 51), (52, 53))], ), Polygon(((54, 55), (56, 57), (58, 59))), ]), )
def test_mixed_lines(): buffers = GeoArrowBuffers({ "lines_xy": range(24), "lines_offsets": np.array(range(5)) * 6, "mlines": [1, 3], }) assert_eq(cudf.Series(range(24)), buffers.lines.xy) assert len(buffers.lines) == 3 column = GeoColumn(buffers) assert_eq( GeoSeries(column), gpGeoSeries([ LineString([[0, 1], [2, 3], [4, 5]]), MultiLineString([ LineString([[6, 7], [8, 9], [10, 11]]), LineString([[12, 13], [14, 15], [16, 17]]), ]), LineString([[18, 19], [20, 21], [22, 23]]), ]), )
def test_points(): buffers = GeoArrowBuffers({"points_xy": [0, 1, 2, 3]}) assert_eq(cudf.Series([0, 1, 2, 3]), buffers.points.xy) assert len(buffers.points) == 2 column = GeoColumn(buffers) assert_eq(GeoSeries(column), gpGeoSeries([Point(0, 1), Point(2, 3)]))