def validate(cls, dataset, vdims=True): dim_types = 'key' if vdims else 'all' geom_dims = cls.geom_dims(dataset) if len(geom_dims) != 2: raise DataError('Expected %s instance to declare two key ' 'dimensions corresponding to the geometry ' 'coordinates but %d dimensions were found ' 'which did not refer to any columns.' % (type(dataset).__name__, len(geom_dims)), cls) not_found = [d.name for d in dataset.dimensions(dim_types) if d not in geom_dims and d.name not in dataset.data] if not_found: raise DataError("Supplied data does not contain specified " "dimensions, the following dimensions were " "not found: %s" % repr(not_found), cls)
def init(cls, eltype, data, kdims, vdims): import pandas as pd from geopandas import GeoDataFrame, GeoSeries if kdims is None: kdims = eltype.kdims if isinstance(data, GeoSeries): data = data.to_frame() if isinstance(data, list): if all(isinstance(d, geom_types) for d in data): data = [{'geometry': d} for d in data] if all( isinstance(d, dict) and 'geometry' in d and isinstance(d['geometry'], geom_types) for d in data): data = GeoDataFrame(data) if not isinstance(data, GeoDataFrame): vdims = vdims or eltype.vdims data = from_multi(eltype, data, kdims, vdims) elif not isinstance(data, GeoDataFrame): raise ValueError("GeoPandasInterface only support geopandas " "DataFrames not %s." % type(data)) elif 'geometry' not in data: cls.geo_column(data) if vdims is None: vdims = [ col for col in data.columns if not isinstance(data[col], GeoSeries) ] index_names = data.index.names if isinstance( data, pd.DataFrame) else [data.index.name] if index_names == [None]: index_names = ['index'] for kd in kdims + vdims: kd = dimension_name(kd) if kd in data.columns: continue if any(kd == ('index' if name is None else name) for name in index_names): data = data.reset_index() break try: shp_types = { gt[5:] if 'Multi' in gt else gt for gt in data.geom_type } except: shp_types = [] if len(shp_types) > 1: raise DataError( 'The GeopandasInterface can only read dataframes which ' 'share a common geometry type, found %s types.' % shp_types, cls) return data, {'kdims': kdims, 'vdims': vdims}, {}
def sort(cls, dataset, by=[], reverse=False): geo_dims = cls.geom_dims(dataset) if any(d in geo_dims for d in by): raise DataError( "SpatialPandasInterface does not allow sorting " "by geometry dimension.", cls) return PandasInterface.sort(dataset, by, reverse)
def validate(cls, dataset, validate_vdims): from shapely.geometry.base import BaseGeometry geom_dims = cls.geom_dims(dataset) if len(geom_dims) != 2: raise DataError( 'Expected %s instance to declare two key ' 'dimensions corresponding to the geometry ' 'coordinates but %d dimensions were found ' 'which did not refer to any columns.' % (type(dataset).__name__, len(geom_dims)), cls) elif 'geometry' not in dataset.data: raise DataError("Could not find a 'geometry' column in the data.") elif not isinstance(dataset.data['geometry'], BaseGeometry): raise DataError("The 'geometry' column should be a shapely" "geometry type, found %s type instead." % type(dataset.data['geometry']).__name__)
def groupby(cls, dataset, dimensions, container_type, group_type, **kwargs): geo_dims = cls.geom_dims(dataset) if any(d in geo_dims for d in dimensions): raise DataError("GeoPandasInterface does not allow grouping " "by geometry dimension.", cls) return PandasInterface.groupby(dataset, dimensions, container_type, group_type, **kwargs)
def validate(cls, dataset, vdims=True): dim_types = 'key' if vdims else 'all' not_found = [d for d in dataset.dimensions(dim_types, label='name')[2:] if d not in dataset.data.columns] if not_found: raise DataError("Supplied data does not contain specified " "dimensions, the following dimensions were " "not found: %s" % repr(not_found))
def init(cls, eltype, data, kdims, vdims): if not isinstance(data, GeoDataFrame): raise ValueError( "GeoPandasInterface only support geopandas DataFrames.") elif 'geometry' not in data: raise DataError( "GeoPandas dataframe must contain geometry column, " "to plot non-geographic data use pandas DataFrame.", cls) if kdims is not None: if len(kdims) != 2: raise DataError( "Expected two kdims to use GeoDataFrame, found %d." % len(kdims)) else: kdims = eltype.kdims if vdims is None: vdims = eltype.vdims return data, {'kdims': kdims, 'vdims': vdims}, {}
def geom_from_dict(geom, xdim, ydim, single_type, multi_type): from shapely.geometry import (Point, LineString, Polygon, MultiPoint, MultiPolygon, MultiLineString) if (xdim, ydim) in geom: xs, ys = np.asarray(geom.pop((xdim, ydim))).T elif xdim in geom and ydim in geom: xs, ys = geom.pop(xdim), geom.pop(ydim) else: raise ValueError('Could not find geometry dimensions') xscalar, yscalar = isscalar(xs), isscalar(ys) if xscalar and yscalar: xs, ys = np.array([xs]), np.array([ys]) elif xscalar: xs = np.full_like(ys, xs) elif yscalar: ys = np.full_like(xs, ys) geom_array = np.column_stack([xs, ys]) splits = np.where(np.isnan( geom_array[:, :2].astype('float')).sum(axis=1))[0] if len(splits): split_geoms = [ g[:-1] if i == (len(splits) - 1) else g for i, g in enumerate(np.split(geom_array, splits + 1)) ] else: split_geoms = [geom_array] split_holes = geom.pop('holes', None) if split_holes is not None and len(split_holes) != len(split_geoms): raise DataError('Polygons with holes containing multi-geometries ' 'must declare a list of holes for each geometry.') if single_type is Point: if len(splits) > 1 or any(len(g) > 1 for g in split_geoms): geom = MultiPoint(np.concatenate(split_geoms)) else: geom = Point(*split_geoms[0]) elif len(splits): if multi_type is MultiPolygon: if split_holes is None: split_holes = [[]] * len(split_geoms) geom = MultiPolygon(list(zip(split_geoms, split_holes))) else: geom = MultiLineString(split_geoms) elif single_type is Polygon: if split_holes is None or not len(split_holes): split_holes = [None] geom = Polygon(split_geoms[0], split_holes[0]) else: geom = LineString(split_geoms[0]) return geom
def from_multi(eltype, data, kdims, vdims): """Converts list formats into geopandas.GeoDataFrame. Args: eltype: Element type to convert data: The original data kdims: The declared key dimensions vdims: The declared value dimensions Returns: A GeoDataFrame containing the data in the list based format. """ from geopandas import GeoDataFrame new_data = [] types = [] xname, yname = (kd.name for kd in kdims[:2]) for d in data: types.append(type(d)) if isinstance(d, dict): d = {k: v if isscalar(v) else np.asarray(v) for k, v in d.items()} new_data.append(d) continue new_el = eltype(d, kdims, vdims) if new_el.interface is GeoPandasInterface: types[-1] = GeoDataFrame new_data.append(new_el.data) continue new_dict = {} for d in new_el.dimensions(): if d in (xname, yname): scalar = False else: scalar = new_el.interface.isscalar(new_el, d) vals = new_el.dimension_values(d, not scalar) new_dict[d.name] = vals[0] if scalar else vals new_data.append(new_dict) if len(set(types)) > 1: raise DataError('Mixed types not supported') if new_data and types[0] is GeoDataFrame: data = pd.concat(new_data) else: columns = [d.name for d in kdims + vdims if d not in (xname, yname)] geom = GeoPandasInterface.geom_type(eltype) if not len(data): return GeoDataFrame([], columns=['geometry'] + columns) data = to_geopandas(new_data, xname, yname, columns, geom) return data
def init(cls, eltype, data, kdims, vdims): import pandas as pd from geopandas import GeoDataFrame if isinstance(data, list): if all(isinstance(d, geom_types) for d in data): data = [{'geometry': d} for d in data] if all('geometry' in d and isinstance(d['geometry'], geom_types) for d in data): data = GeoDataFrame(data) elif not isinstance(data, GeoDataFrame): raise ValueError( "GeoPandasInterface only support geopandas DataFrames.") elif 'geometry' not in data: raise ValueError( "GeoPandas dataframe must contain geometry column, " "to plot non-geographic data use pandas DataFrame.", cls) if kdims is None: kdims = eltype.kdims if vdims is None: vdims = eltype.vdims index_names = data.index.names if isinstance( data, pd.DataFrame) else [data.index.name] if index_names == [None]: index_names = ['index'] for kd in kdims + vdims: kd = dimension_name(kd) if kd in data.columns: continue if any(kd == ('index' if name is None else name) for name in index_names): data = data.reset_index() break shp_types = {gt[5:] if 'Multi' in gt else gt for gt in data.geom_type} if len(shp_types) > 1: raise DataError( 'The GeopandasInterface can only read dataframes which ' 'share a common geometry type, found %s types.' % shp_types, cls) return data, {'kdims': kdims, 'vdims': vdims}, {}
def iloc(cls, dataset, index): from geopandas import GeoSeries from shapely.geometry import MultiPoint rows, cols = index geom_dims = cls.geom_dims(dataset) geom_col = cls.geo_column(dataset.data) scalar = False columns = list(dataset.data.columns) if isinstance(cols, slice): cols = [d.name for d in dataset.dimensions()][cols] elif np.isscalar(cols): scalar = np.isscalar(rows) cols = [dataset.get_dimension(cols).name] else: cols = [dataset.get_dimension(d).name for d in index[1]] if not all(d in cols for d in geom_dims): raise DataError( "Cannot index a dimension which is part of the " "geometry column of a spatialpandas DataFrame.", cls) cols = list( unique_iterator([ columns.index(geom_col) if c in geom_dims else columns.index(c) for c in cols ])) geom_type = dataset.data[geom_col].geom_type.iloc[0] if geom_type != 'MultiPoint': if scalar: return dataset.data.iloc[rows[0], cols[0]] elif isscalar(rows): rows = [rows] return dataset.data.iloc[rows, cols] geoms = dataset.data[geom_col] count = 0 new_geoms, indexes = [], [] for i, geom in enumerate(geoms): length = len(geom) if np.isscalar(rows): if count <= rows < (count + length): new_geoms.append(geom[rows - count]) indexes.append(i) break elif isinstance(rows, slice): if rows.start is not None and rows.start > (count + length): continue elif rows.stop is not None and rows.stop < count: break start = None if rows.start is None else max( rows.start - count, 0) stop = None if rows.stop is None else min( rows.stop - count, length) if rows.step is not None: dataset.param.warning( ".iloc step slicing currently not supported for" "the multi-tabular data format.") indexes.append(i) new_geoms.append(geom[start:stop]) elif isinstance(rows, (list, set)): sub_rows = [(r - count) for r in rows if count <= r < (count + length)] if not sub_rows: continue indexes.append(i) new_geoms.append(MultiPoint([geom[r] for r in sub_rows])) count += length new = dataset.data.iloc[indexes].copy() new[geom_col] = GeoSeries(new_geoms) return new
def validate(cls, dataset, vdims=True): if vdims and len(dataset.vdims) > 1: raise DataError( "Iris cubes do not support more than one value dimension", cls)