def from_shapely(data): """Converts shapely based data formats to spatialpandas.GeoDataFrame. Args: data: A list of shapely objects or dictionaries containing shapely objects Returns: A GeoDataFrame containing the shapely geometry data. """ from spatialpandas import GeoDataFrame, GeoSeries from shapely.geometry.base import BaseGeometry if not data: pass elif all(isinstance(d, BaseGeometry) for d in data): data = GeoSeries(data).to_frame() elif all( isinstance(d, dict) and 'geometry' in d and isinstance(d['geometry'], BaseGeometry) for d in data): new_data = {col: [] for col in data[0]} for d in data: for col, val in d.items(): new_data[col].append(val if isscalar(val) or isinstance( val, BaseGeometry) else np.asarray(val)) new_data['geometry'] = GeoSeries(new_data['geometry']) data = GeoDataFrame(new_data) return data
def test_multipoint_cx_series_selection(gp_multipoint, rect): x0, y0, x1, y1 = rect expected = GeoSeries(gp_multipoint.cx[x0:x1, y0:y1], dtype='multipoint') sp_multipoint = GeoSeries(gp_multipoint) result = sp_multipoint.cx[x0:x1, y0:y1] assert_series_equal(expected, result, obj='GeoSeries')
def test_parquet_dask(gp_multipoint, gp_multiline, tmp_path): # Build dataframe n = min(len(gp_multipoint), len(gp_multiline)) df = GeoDataFrame({ 'points': GeoSeries(gp_multipoint[:n]), 'lines': GeoSeries(gp_multiline[:n]), 'a': list(range(n)) }) ddf = dd.from_pandas(df, npartitions=3) path = tmp_path / 'ddf.parq' ddf.to_parquet(path) ddf_read = read_parquet_dask(path) # Check type assert isinstance(ddf_read, DaskGeoDataFrame) # Check that partition bounds were loaded assert set(ddf_read._partition_bounds) == {'points', 'lines'} pd.testing.assert_frame_equal( ddf['points'].partition_bounds, ddf_read._partition_bounds['points'], ) pd.testing.assert_frame_equal( ddf['lines'].partition_bounds, ddf_read._partition_bounds['lines'], )
def test_pack_partitions(gp_multipoint, gp_multiline): # Build dataframe n = min(len(gp_multipoint), len(gp_multiline)) df = GeoDataFrame({ 'points': GeoSeries(gp_multipoint[:n]), 'lines': GeoSeries(gp_multiline[:n]), 'a': list(range(n)) }).set_geometry('lines') ddf = dd.from_pandas(df, npartitions=3) # Pack partitions ddf_packed = ddf.pack_partitions(npartitions=4) # Check the number of partitions assert ddf_packed.npartitions == 4 # Check that rows are now sorted in order of hilbert distance total_bounds = df.lines.total_bounds hilbert_distances = ddf_packed.lines.map_partitions( lambda s: s.hilbert_distance(total_bounds=total_bounds)).compute( ).values # Compute expected total_bounds expected_distances = np.sort( df.lines.hilbert_distance(total_bounds=total_bounds).values) np.testing.assert_equal(expected_distances, hilbert_distances)
def test_points_intersects_multipolygon(gp_points, gp_multipolygon): # Get scalar MultiPolygon sg_multipolygon = gp_multipolygon[0] # Compute expected intersection expected = gp_points.intersects(sg_multipolygon) # Create spatialpandas objects multipolygon = MultiPolygon.from_shapely(sg_multipolygon) points = PointArray.from_geopandas(gp_points) points_series = GeoSeries(points, index=np.arange(10, 10 + len(points))) # Test Point.intersects result = np.array([ point_el.intersects(multipolygon) for point_el in points ]) np.testing.assert_equal(result, expected) # Test PointArray.intersect result = points.intersects(multipolygon) np.testing.assert_equal(result, expected) # Test PointArray.intersects with inds inds = np.flipud(np.arange(0, len(points))) result = points.intersects(multipolygon, inds) np.testing.assert_equal(result, np.flipud(expected)) # Test GeoSeries.intersects pd.testing.assert_series_equal( points_series.intersects(multipolygon), pd.Series(expected, index=points_series.index) )
def test_points_intersects_line(gp_points, gp_line): # Get scalar Line sg_line = gp_line[0] # Compute expected intersection expected = gp_points.intersects(sg_line) # Create spatialpandas objects line = Line.from_shapely(sg_line) points = PointArray.from_geopandas(gp_points) points_series = GeoSeries(points, index=np.arange(10, 10 + len(points))) # Test Point.intersects result = np.array([ point_el.intersects(line) for point_el in points ]) np.testing.assert_equal(result, expected) # Test PointArray.intersect result = points.intersects(line) np.testing.assert_equal(result, expected) # Test PointArray.intersects with inds inds = np.flipud(np.arange(0, len(points))) result = points.intersects(line, inds) np.testing.assert_equal(result, np.flipud(expected)) # Test GeoSeries.intersects pd.testing.assert_series_equal( points_series.intersects(line), pd.Series(expected, index=points_series.index) )
def test_points_intersects_multipoint(gp_points, gp_multipoint): # Get scalar Point sg_multipoint = gp_multipoint[0] if len(gp_points) > 0: # Add gp_point to gp_multipoints so we know something will intersect gp_points = from_shapely(list(gp_points) + [gp_multipoint[0][-1]]) # Compute expected intersection expected = gp_points.intersects(sg_multipoint) # Create spatialpandas PointArray multipoint = MultiPoint.from_shapely(sg_multipoint) points = PointArray.from_geopandas(gp_points) points_series = GeoSeries(points, index=np.arange(10, 10 + len(points))) # Test Point.intersects result = np.array([ point_el.intersects(multipoint) for point_el in points ]) np.testing.assert_equal(result, expected) # Test PointArray.intersect result = points.intersects(multipoint) np.testing.assert_equal(result, expected) # Test PointArray.intersects with inds inds = np.flipud(np.arange(0, len(points))) result = points.intersects(multipoint, inds) np.testing.assert_equal(result, np.flipud(expected)) # Test GeoSeries.intersects pd.testing.assert_series_equal( points_series.intersects(multipoint), pd.Series(expected, index=points_series.index) )
def test_multipoint_cx_series_selection_dask(gp_multipoint, rect): x0, y0, x1, y1 = rect expected = GeoSeries(gp_multipoint.cx[x0:x1, y0:y1], dtype='multipoint') sp_multipoint = dd.from_pandas(GeoSeries(gp_multipoint), npartitions=3) result = sp_multipoint.cx[x0:x1, y0:y1].compute() assert_series_equal(expected, result, obj='GeoSeries')
def test_multipoint_cx_frame_selection(gp_multipoint, rect): x0, y0, x1, y1 = rect expected = GeoDataFrame( GeoSeries(gp_multipoint.cx[x0:x1, y0:y1], dtype='multipoint')) sp_multipoint = GeoSeries(gp_multipoint).to_frame() result = sp_multipoint.cx[x0:x1, y0:y1] assert_frame_equal(expected, result, obj='GeoDataFrame')
def test_pack_partitions_to_parquet_glob(gp_multipoint1, gp_multiline1, gp_multipoint2, gp_multiline2, tmp_path): # Build dataframe1 n = min(len(gp_multipoint1), len(gp_multiline1)) df1 = GeoDataFrame({ 'points': GeoSeries(gp_multipoint1[:n]), 'lines': GeoSeries(gp_multiline1[:n]), 'a': list(range(n)) }).set_geometry('lines') ddf1 = dd.from_pandas(df1, npartitions=3) path1 = tmp_path / 'ddf1.parq' ddf_packed1 = ddf1.pack_partitions_to_parquet(str(path1), npartitions=3) # Build dataframe2 n = min(len(gp_multipoint2), len(gp_multiline2)) df2 = GeoDataFrame({ 'points': GeoSeries(gp_multipoint2[:n]), 'lines': GeoSeries(gp_multiline2[:n]), 'a': list(range(n)) }).set_geometry('lines') ddf2 = dd.from_pandas(df2, npartitions=3) path2 = tmp_path / 'ddf2.parq' ddf_packed2 = ddf2.pack_partitions_to_parquet(str(path2), npartitions=4) # Load both packed datasets with glob ddf_globbed = read_parquet_dask(tmp_path / "ddf*.parq", geometry="lines") # Check the number of partitions (< 7 can happen in the case of empty partitions) assert ddf_globbed.npartitions <= 7 # Check contents expected_df = pd.concat([ddf_packed1.compute(), ddf_packed2.compute()]) df_globbed = ddf_globbed.compute() pd.testing.assert_frame_equal(df_globbed, expected_df) # Check partition bounds expected_bounds = { 'points': pd.concat([ ddf_packed1._partition_bounds['points'], ddf_packed2._partition_bounds['points'], ]).reset_index(drop=True), 'lines': pd.concat([ ddf_packed1._partition_bounds['lines'], ddf_packed2._partition_bounds['lines'], ]).reset_index(drop=True), } expected_bounds['points'].index.name = 'partition' expected_bounds['lines'].index.name = 'partition' pd.testing.assert_frame_equal(expected_bounds['points'], ddf_globbed._partition_bounds['points']) pd.testing.assert_frame_equal(expected_bounds['lines'], ddf_globbed._partition_bounds['lines']) assert ddf_globbed.geometry.name == 'lines'
def test_pack_partitions_to_parquet(gp_multipoint, gp_multiline, use_temp_format, tmp_path_factory): with tmp_path_factory.mktemp("spatialpandas", numbered=True) as tmp_path: # Build dataframe n = min(len(gp_multipoint), len(gp_multiline)) df = GeoDataFrame({ 'points': GeoSeries(gp_multipoint[:n]), 'lines': GeoSeries(gp_multiline[:n]), 'a': list(range(n)) }).set_geometry('lines') ddf = dd.from_pandas(df, npartitions=3) path = tmp_path / 'ddf.parq' if use_temp_format: (tmp_path / 'scratch').mkdir(parents=True, exist_ok=True) tempdir_format = str(tmp_path / 'scratch' / 'part-{uuid}-{partition:03d}') else: tempdir_format = None _retry_args = dict(wait_exponential_multiplier=10, wait_exponential_max=20000, stop_max_attempt_number=4) ddf_packed = ddf.pack_partitions_to_parquet( str(path), npartitions=12, tempdir_format=tempdir_format, _retry_args=_retry_args, ) # Check the number of partitions (< 4 can happen in the case of empty partitions) assert ddf_packed.npartitions <= 12 # Check that rows are now sorted in order of hilbert distance total_bounds = df.lines.total_bounds hilbert_distances = ddf_packed.lines.map_partitions( lambda s: s.hilbert_distance(total_bounds=total_bounds)).compute( ).values # Compute expected total_bounds expected_distances = np.sort( df.lines.hilbert_distance(total_bounds=total_bounds).values) np.testing.assert_equal(expected_distances, hilbert_distances) assert ddf_packed.geometry.name == 'points' # Read columns columns = ['a', 'lines'] ddf_read_cols = read_parquet_dask(path, columns=columns) pd.testing.assert_frame_equal(ddf_read_cols.compute(), ddf_packed[columns].compute())
def test_parquet(gp_point, gp_multipoint, gp_multiline, tmp_path): # Build dataframe n = min(len(gp_multipoint), len(gp_multiline)) df = GeoDataFrame({ 'point': GeoSeries(gp_point[:n]), 'multipoint': GeoSeries(gp_multipoint[:n]), 'multiline': GeoSeries(gp_multiline[:n]), 'a': list(range(n)) }) path = tmp_path / 'df.parq' to_parquet(df, path) df_read = read_parquet(path) assert isinstance(df_read, GeoDataFrame) assert all(df == df_read)
def test_parquet_columns(gp_point, gp_multipoint, gp_multiline, tmp_path): # Build dataframe n = min(len(gp_multipoint), len(gp_multiline)) df = GeoDataFrame({ 'point': GeoSeries(gp_point[:n]), 'multipoint': GeoSeries(gp_multipoint[:n]), 'multiline': GeoSeries(gp_multiline[:n]), 'a': list(range(n)) }) path = tmp_path / 'df.parq' to_parquet(df, path) columns = ['a', 'multiline'] df_read = read_parquet(str(path), columns=columns) assert isinstance(df_read, GeoDataFrame) pd.testing.assert_frame_equal(df[columns], df_read)
def test_pack_partitions_to_parquet(gp_multipoint, gp_multiline, use_temp_format, tmp_path): # Build dataframe n = min(len(gp_multipoint), len(gp_multiline)) df = GeoDataFrame({ 'points': GeoSeries(gp_multipoint[:n]), 'lines': GeoSeries(gp_multiline[:n]), 'a': list(range(n)) }).set_geometry('lines') ddf = dd.from_pandas(df, npartitions=3) path = tmp_path / 'ddf.parq' if use_temp_format: tempdir_format = str(tmp_path / 'scratch' / 'part-{uuid}-{partition:03d}') else: tempdir_format = None ddf_packed = ddf.pack_partitions_to_parquet(path, npartitions=12, tempdir_format=tempdir_format) # Check the number of partitions (< 4 can happen in the case of empty partitions) assert ddf_packed.npartitions <= 12 # Check that rows are now sorted in order of hilbert distance total_bounds = df.lines.total_bounds hilbert_distances = ddf_packed.lines.map_partitions( lambda s: s.hilbert_distance(total_bounds=total_bounds)).compute( ).values # Compute expected total_bounds expected_distances = np.sort( df.lines.hilbert_distance(total_bounds=total_bounds).values) np.testing.assert_equal(expected_distances, hilbert_distances) assert ddf_packed.geometry.name == 'points' # Read columns columns = ['a', 'lines'] ddf_read_cols = read_parquet_dask(path, columns=columns + ['hilbert_distance']) pd.testing.assert_frame_equal(ddf_read_cols.compute(), ddf_packed[columns].compute())
def test_parquet(gp_point, gp_multipoint, gp_multiline, tmp_path): # Build dataframe n = min(len(gp_multipoint), len(gp_multiline)) df = GeoDataFrame({ 'point': GeoSeries(gp_point[:n]), 'multipoint': GeoSeries(gp_multipoint[:n]), 'multiline': GeoSeries(gp_multiline[:n]), 'a': list(range(n)) }) df.index.name = 'range_idx' path = tmp_path / 'df.parq' to_parquet(df, path) df_read = read_parquet(str(path), columns=['point', 'multipoint', 'multiline', 'a']) assert isinstance(df_read, GeoDataFrame) pd.testing.assert_frame_equal(df, df_read) assert df_read.index.name == df.index.name
def test_parquet_dask(gp_multipoint, gp_multiline, tmp_path_factory): with tmp_path_factory.mktemp("spatialpandas", numbered=True) as tmp_path: # Build dataframe n = min(len(gp_multipoint), len(gp_multiline)) df = GeoDataFrame({ 'points': GeoSeries(gp_multipoint[:n]), 'lines': GeoSeries(gp_multiline[:n]), 'a': list(range(n)) }) ddf = dd.from_pandas(df, npartitions=3) path = tmp_path / 'ddf.parq' ddf.to_parquet(str(path)) ddf_read = read_parquet_dask(str(path)) # Check type assert isinstance(ddf_read, DaskGeoDataFrame) # Check that partition bounds were loaded nonempty = np.nonzero( np.asarray(ddf.map_partitions(len).compute() > 0))[0] assert set(ddf_read._partition_bounds) == {'points', 'lines'} expected_partition_bounds = ( ddf['points'].partition_bounds.iloc[nonempty].reset_index( drop=True)) expected_partition_bounds.index.name = 'partition' pd.testing.assert_frame_equal( expected_partition_bounds, ddf_read._partition_bounds['points'], ) expected_partition_bounds = ( ddf['lines'].partition_bounds.iloc[nonempty].reset_index( drop=True)) expected_partition_bounds.index.name = 'partition' pd.testing.assert_frame_equal( expected_partition_bounds, ddf_read._partition_bounds['lines'], ) assert ddf_read.geometry.name == 'points'
def split(cls, dataset, start, end, datatype, **kwargs): from spatialpandas import GeoDataFrame, GeoSeries from ...element import Polygons objs = [] if not len(dataset.data): return [] xdim, ydim = cls.geom_dims(dataset) value_dims = [ dim for dim in dataset.kdims + dataset.vdims if dim not in (xdim, ydim) ] row = dataset.data.iloc[0] col = cls.geo_column(dataset.data) geom_type = cls.geom_type(dataset) if datatype is not None: arr = geom_to_array(row[col], geom_type=geom_type) d = {(xdim.name, ydim.name): arr} d.update({dim.name: row[dim.name] for dim in value_dims}) ds = dataset.clone(d, datatype=['dictionary']) holes = cls.holes(dataset) if cls.has_holes(dataset) else None for i, row in dataset.data.iterrows(): if datatype is None: gdf = GeoDataFrame({ c: GeoSeries([row[c]]) if c == 'geometry' else [row[c]] for c in dataset.data.columns }) objs.append(dataset.clone(gdf)) continue geom = row[col] gt = geom_type or get_geom_type(dataset.data, col) arr = geom_to_array(geom, geom_type=gt) d = {xdim.name: arr[:, 0], ydim.name: arr[:, 1]} d.update({dim.name: row[dim.name] for dim in value_dims}) if datatype in ('dictionary', 'columns'): if holes is not None: d[Polygons._hole_key] = holes[i] d['geom_type'] = gt objs.append(d) continue ds.data = d if datatype == 'array': obj = ds.array(**kwargs) elif datatype == 'dataframe': obj = ds.dframe(**kwargs) else: raise ValueError("%s datatype not support" % datatype) objs.append(obj) return objs
def test_multipoint_array_to_geopandas(gp_multipoint): result = GeoSeries(gp_multipoint, dtype='multipoint').to_geopandas() assert_series_equal(result, gp_multipoint)
def test_pack_partitions_to_parquet_list_bounds( gp_multipoint1, gp_multiline1, gp_multipoint2, gp_multiline2, bounds, tmp_path, ): # Build dataframe1 n = min(len(gp_multipoint1), len(gp_multiline1)) df1 = GeoDataFrame({ 'points': GeoSeries(gp_multipoint1[:n]), 'lines': GeoSeries(gp_multiline1[:n]), 'a': list(range(n)) }).set_geometry('lines') ddf1 = dd.from_pandas(df1, npartitions=3) path1 = tmp_path / 'ddf1.parq' ddf_packed1 = ddf1.pack_partitions_to_parquet(str(path1), npartitions=3) # Build dataframe2 n = min(len(gp_multipoint2), len(gp_multiline2)) df2 = GeoDataFrame({ 'points': GeoSeries(gp_multipoint2[:n]), 'lines': GeoSeries(gp_multiline2[:n]), 'a': list(range(n)) }).set_geometry('lines') ddf2 = dd.from_pandas(df2, npartitions=3) path2 = tmp_path / 'ddf2.parq' ddf_packed2 = ddf2.pack_partitions_to_parquet(str(path2), npartitions=4) # Load both packed datasets with glob ddf_read = read_parquet_dask( [str(tmp_path / "ddf1.parq"), str(tmp_path / "ddf2.parq")], geometry="points", bounds=bounds) # Check the number of partitions (< 7 can happen in the case of empty partitions) assert ddf_read.npartitions <= 7 # Check contents xslice = slice(bounds[0], bounds[2]) yslice = slice(bounds[1], bounds[3]) expected_df = pd.concat([ ddf_packed1.cx_partitions[xslice, yslice].compute(), ddf_packed2.cx_partitions[xslice, yslice].compute() ]) df_read = ddf_read.compute() pd.testing.assert_frame_equal(df_read, expected_df) # Compute expected partition bounds points_bounds = pd.concat([ ddf_packed1._partition_bounds['points'], ddf_packed2._partition_bounds['points'], ]).reset_index(drop=True) x0, y0, x1, y1 = bounds x0, x1 = (x0, x1) if x0 <= x1 else (x1, x0) y0, y1 = (y0, y1) if y0 <= y1 else (y1, y0) partition_inds = ~((points_bounds.x1 < x0) | (points_bounds.y1 < y0) | (points_bounds.x0 > x1) | (points_bounds.y0 > y1)) points_bounds = points_bounds[partition_inds].reset_index(drop=True) lines_bounds = pd.concat([ ddf_packed1._partition_bounds['lines'], ddf_packed2._partition_bounds['lines'], ]).reset_index(drop=True)[partition_inds].reset_index(drop=True) points_bounds.index.name = 'partition' lines_bounds.index.name = 'partition' # Check partition bounds pd.testing.assert_frame_equal(points_bounds, ddf_read._partition_bounds['points']) pd.testing.assert_frame_equal(lines_bounds, ddf_read._partition_bounds['lines']) # Check active geometry column assert ddf_read.geometry.name == 'points'
def test_multiline_array_to_geopandas(gp_multiline): result = GeoSeries(gp_multiline, dtype='multiline').to_geopandas() assert_series_equal(result, gp_multiline)
def to_spatialpandas(data, xdim, ydim, columns=[], geom='point'): """Converts list of dictionary format geometries to spatialpandas line geometries. Args: data: List of dictionaries representing individual geometries xdim: Name of x-coordinates column ydim: Name of y-coordinates column columns: List of columns to add geom: The type of geometry Returns: A spatialpandas.GeoDataFrame version of the data """ from spatialpandas import GeoSeries, GeoDataFrame from spatialpandas.geometry import (Point, Line, Polygon, Ring, LineArray, PolygonArray, PointArray, MultiLineArray, MultiPolygonArray, MultiPointArray, RingArray) from ...element import Polygons poly = any(Polygons._hole_key in d for d in data) or geom == 'Polygon' if poly: geom_type = Polygon single_array, multi_array = PolygonArray, MultiPolygonArray elif geom == 'Line': geom_type = Line single_array, multi_array = LineArray, MultiLineArray elif geom == 'Ring': geom_type = Ring single_array, multi_array = RingArray, MultiLineArray else: geom_type = Point single_array, multi_array = PointArray, MultiPointArray array_type = None hole_arrays, geom_arrays = [], [] for geom in data: geom = dict(geom) if xdim not in geom or ydim not in geom: raise ValueError('Could not find geometry dimensions') xs, ys = geom.pop(xdim), geom.pop(ydim) xscalar, yscalar = isscalar(xs), isscalar(ys) if xscalar and yscalar: xs, ys = np.array([xs]), np.array([ys]) elif xscalar: xs = np.full_like(ys, xs) elif yscalar: ys = np.full_like(xs, ys) geom_array = np.column_stack([xs, ys]) if geom_type in (Polygon, Ring): geom_array = ensure_ring(geom_array) splits = np.where( np.isnan(geom_array[:, :2].astype('float')).sum(axis=1))[0] split_geoms = np.split(geom_array, splits + 1) if len(splits) else [geom_array] split_holes = geom.pop(Polygons._hole_key, None) if split_holes is not None: if len(split_holes) != len(split_geoms): raise DataError( 'Polygons with holes containing multi-geometries ' 'must declare a list of holes for each geometry.', SpatialPandasInterface) else: split_holes = [[ensure_ring(np.asarray(h)) for h in hs] for hs in split_holes] geom_arrays.append(split_geoms) hole_arrays.append(split_holes) if geom_type is Point: if len(splits) > 1 or any(len(g) > 1 for g in split_geoms): array_type = multi_array elif array_type is None: array_type = single_array elif len(splits): array_type = multi_array elif array_type is None: array_type = single_array converted = defaultdict(list) for geom, arrays, holes in zip(data, geom_arrays, hole_arrays): parts = [] for i, g in enumerate(arrays): if i != (len(arrays) - 1): g = g[:-1] if len(g) < (3 if poly else 2) and geom_type is not Point: continue if poly: parts.append([]) subparts = parts[-1] else: subparts = parts subparts.append(g[:, :2]) if poly and holes is not None: subparts += [np.array(h) for h in holes[i]] for c, v in geom.items(): converted[c].append(v) if array_type is PointArray: parts = parts[0].flatten() elif array_type is MultiPointArray: parts = np.concatenate([sp.flatten() for sp in parts]) elif array_type is multi_array: parts = [[ssp.flatten() for ssp in sp] if poly else sp.flatten() for sp in parts] else: parts = [np.asarray(sp).flatten() for sp in parts[0]] if poly else parts[0].flatten() converted['geometry'].append(parts) if converted: geometries = converted['geometry'] if array_type is PointArray: geometries = np.concatenate(geometries) geom_array = array_type(geometries) if poly: geom_array = geom_array.oriented() converted['geometry'] = GeoSeries(geom_array) else: converted['geometry'] = GeoSeries(single_array([])) return GeoDataFrame(converted, columns=['geometry'] + columns)
def iloc(cls, dataset, index): from spatialpandas import GeoSeries from spatialpandas.geometry import MultiPointDtype rows, cols = index geom_dims = cls.geom_dims(dataset) geom_col = cls.geo_column(dataset.data) scalar = False columns = list(dataset.data.columns) if isinstance(cols, slice): cols = [d.name for d in dataset.dimensions()][cols] elif np.isscalar(cols): scalar = np.isscalar(rows) cols = [dataset.get_dimension(cols).name] else: cols = [dataset.get_dimension(d).name for d in index[1]] if not all(d in cols for d in geom_dims): raise DataError( "Cannot index a dimension which is part of the " "geometry column of a spatialpandas DataFrame.", cls) cols = list( unique_iterator([ columns.index(geom_col) if c in geom_dims else columns.index(c) for c in cols ])) if not isinstance(dataset.data[geom_col].dtype, MultiPointDtype): if scalar: return dataset.data.iloc[rows[0], cols[0]] elif isscalar(rows): rows = [rows] return dataset.data.iloc[rows, cols] geoms = dataset.data[geom_col] count = 0 new_geoms, indexes = [], [] for i, geom in enumerate(geoms): length = int(len(geom.buffer_values) / 2) if np.isscalar(rows): if count <= rows < (count + length): idx = (rows - count) * 2 data = geom.buffer_values[idx:idx + 2] new_geoms.append(type(geom)(data)) indexes.append(i) break elif isinstance(rows, slice): if rows.start is not None and rows.start > (count + length): continue elif rows.stop is not None and rows.stop < count: break start = None if rows.start is None else max( rows.start - count, 0) * 2 stop = None if rows.stop is None else min( rows.stop - count, length) * 2 if rows.step is not None: dataset.param.warning( ".iloc step slicing currently not supported for" "the multi-tabular data format.") sliced = geom.buffer_values[start:stop] if len(sliced): indexes.append(i) new_geoms.append(type(geom)(sliced)) else: sub_rows = [ v for r in rows for v in ((r - count) * 2, (r - count) * 2 + 1) if count <= r < (count + length) ] if sub_rows: indexes.append(i) idxs = np.array(sub_rows, dtype=int) new_geoms.append(type(geom)(geom.buffer_values[idxs])) count += length new = dataset.data.iloc[indexes].copy() new[geom_col] = GeoSeries(new_geoms) return new
def geohashes_to_geoseries(s: pd.Series) -> GeoSeries: """Create spatialpandas GeoSeries from geohashes.""" s = pd.Series(s) return GeoSeries(PointArray(s.apply(geohash_decode_xy)), index=s.index)
def test_multipolygon_array_to_geopandas(gp_multipolygon): result = GeoSeries(gp_multipolygon, dtype='multipolygon').to_geopandas() assert_series_equal(result, gp_multipolygon)
def test_ring_array_to_geopandas(gp_ring): result = GeoSeries(gp_ring, dtype='ring').to_geopandas() assert_series_equal(result, gp_ring)