Example #1
0
 def validate(cls, dataset, vdims=True):
     dim_types = 'key' if vdims else 'all'
     geom_dims = cls.geom_dims(dataset)
     if len(geom_dims) != 2:
         raise DataError('Expected %s instance to declare two key '
                         'dimensions corresponding to the geometry '
                         'coordinates but %d dimensions were found '
                         'which did not refer to any columns.'
                         % (type(dataset).__name__, len(geom_dims)), cls)
     not_found = [d.name for d in dataset.dimensions(dim_types)
                  if d not in geom_dims and d.name not in dataset.data]
     if not_found:
         raise DataError("Supplied data does not contain specified "
                          "dimensions, the following dimensions were "
                          "not found: %s" % repr(not_found), cls)
Example #2
0
    def init(cls, eltype, data, kdims, vdims):
        import pandas as pd
        from geopandas import GeoDataFrame, GeoSeries

        if kdims is None:
            kdims = eltype.kdims

        if isinstance(data, GeoSeries):
            data = data.to_frame()

        if isinstance(data, list):
            if all(isinstance(d, geom_types) for d in data):
                data = [{'geometry': d} for d in data]
            if all(
                    isinstance(d, dict) and 'geometry' in d
                    and isinstance(d['geometry'], geom_types) for d in data):
                data = GeoDataFrame(data)
            if not isinstance(data, GeoDataFrame):
                vdims = vdims or eltype.vdims
                data = from_multi(eltype, data, kdims, vdims)
        elif not isinstance(data, GeoDataFrame):
            raise ValueError("GeoPandasInterface only support geopandas "
                             "DataFrames not %s." % type(data))
        elif 'geometry' not in data:
            cls.geo_column(data)

        if vdims is None:
            vdims = [
                col for col in data.columns
                if not isinstance(data[col], GeoSeries)
            ]

        index_names = data.index.names if isinstance(
            data, pd.DataFrame) else [data.index.name]
        if index_names == [None]:
            index_names = ['index']

        for kd in kdims + vdims:
            kd = dimension_name(kd)
            if kd in data.columns:
                continue
            if any(kd == ('index' if name is None else name)
                   for name in index_names):
                data = data.reset_index()
                break

        try:
            shp_types = {
                gt[5:] if 'Multi' in gt else gt
                for gt in data.geom_type
            }
        except:
            shp_types = []
        if len(shp_types) > 1:
            raise DataError(
                'The GeopandasInterface can only read dataframes which '
                'share a common geometry type, found %s types.' % shp_types,
                cls)

        return data, {'kdims': kdims, 'vdims': vdims}, {}
Example #3
0
 def sort(cls, dataset, by=[], reverse=False):
     geo_dims = cls.geom_dims(dataset)
     if any(d in geo_dims for d in by):
         raise DataError(
             "SpatialPandasInterface does not allow sorting "
             "by geometry dimension.", cls)
     return PandasInterface.sort(dataset, by, reverse)
Example #4
0
 def validate(cls, dataset, validate_vdims):
     from shapely.geometry.base import BaseGeometry
     geom_dims = cls.geom_dims(dataset)
     if len(geom_dims) != 2:
         raise DataError(
             'Expected %s instance to declare two key '
             'dimensions corresponding to the geometry '
             'coordinates but %d dimensions were found '
             'which did not refer to any columns.' %
             (type(dataset).__name__, len(geom_dims)), cls)
     elif 'geometry' not in dataset.data:
         raise DataError("Could not find a 'geometry' column in the data.")
     elif not isinstance(dataset.data['geometry'], BaseGeometry):
         raise DataError("The 'geometry' column should be a shapely"
                         "geometry type, found %s type instead." %
                         type(dataset.data['geometry']).__name__)
Example #5
0
    def groupby(cls, dataset, dimensions, container_type, group_type, **kwargs):
        geo_dims = cls.geom_dims(dataset)
        if any(d in geo_dims for d in dimensions):
            raise DataError("GeoPandasInterface does not allow grouping "
                            "by geometry dimension.", cls)

        return PandasInterface.groupby(dataset, dimensions, container_type, group_type, **kwargs)
Example #6
0
 def validate(cls, dataset, vdims=True):
     dim_types = 'key' if vdims else 'all'
     not_found = [d for d in dataset.dimensions(dim_types, label='name')[2:]
                  if d not in dataset.data.columns]
     if not_found:
         raise DataError("Supplied data does not contain specified "
                          "dimensions, the following dimensions were "
                          "not found: %s" % repr(not_found))
Example #7
0
    def init(cls, eltype, data, kdims, vdims):
        if not isinstance(data, GeoDataFrame):
            raise ValueError(
                "GeoPandasInterface only support geopandas DataFrames.")
        elif 'geometry' not in data:
            raise DataError(
                "GeoPandas dataframe must contain geometry column, "
                "to plot non-geographic data use pandas DataFrame.", cls)
        if kdims is not None:
            if len(kdims) != 2:
                raise DataError(
                    "Expected two kdims to use GeoDataFrame, found %d." %
                    len(kdims))
        else:
            kdims = eltype.kdims

        if vdims is None:
            vdims = eltype.vdims
        return data, {'kdims': kdims, 'vdims': vdims}, {}
Example #8
0
def geom_from_dict(geom, xdim, ydim, single_type, multi_type):
    from shapely.geometry import (Point, LineString, Polygon, MultiPoint,
                                  MultiPolygon, MultiLineString)
    if (xdim, ydim) in geom:
        xs, ys = np.asarray(geom.pop((xdim, ydim))).T
    elif xdim in geom and ydim in geom:
        xs, ys = geom.pop(xdim), geom.pop(ydim)
    else:
        raise ValueError('Could not find geometry dimensions')

    xscalar, yscalar = isscalar(xs), isscalar(ys)
    if xscalar and yscalar:
        xs, ys = np.array([xs]), np.array([ys])
    elif xscalar:
        xs = np.full_like(ys, xs)
    elif yscalar:
        ys = np.full_like(xs, ys)
    geom_array = np.column_stack([xs, ys])
    splits = np.where(np.isnan(
        geom_array[:, :2].astype('float')).sum(axis=1))[0]
    if len(splits):
        split_geoms = [
            g[:-1] if i == (len(splits) - 1) else g
            for i, g in enumerate(np.split(geom_array, splits + 1))
        ]
    else:
        split_geoms = [geom_array]
    split_holes = geom.pop('holes', None)
    if split_holes is not None and len(split_holes) != len(split_geoms):
        raise DataError('Polygons with holes containing multi-geometries '
                        'must declare a list of holes for each geometry.')

    if single_type is Point:
        if len(splits) > 1 or any(len(g) > 1 for g in split_geoms):
            geom = MultiPoint(np.concatenate(split_geoms))
        else:
            geom = Point(*split_geoms[0])
    elif len(splits):
        if multi_type is MultiPolygon:
            if split_holes is None:
                split_holes = [[]] * len(split_geoms)
            geom = MultiPolygon(list(zip(split_geoms, split_holes)))
        else:
            geom = MultiLineString(split_geoms)
    elif single_type is Polygon:
        if split_holes is None or not len(split_holes):
            split_holes = [None]
        geom = Polygon(split_geoms[0], split_holes[0])
    else:
        geom = LineString(split_geoms[0])
    return geom
Example #9
0
def from_multi(eltype, data, kdims, vdims):
    """Converts list formats into geopandas.GeoDataFrame.

    Args:
        eltype: Element type to convert
        data: The original data
        kdims: The declared key dimensions
        vdims: The declared value dimensions

    Returns:
        A GeoDataFrame containing the data in the list based format.
    """

    from geopandas import GeoDataFrame

    new_data = []
    types = []
    xname, yname = (kd.name for kd in kdims[:2])
    for d in data:
        types.append(type(d))
        if isinstance(d, dict):
            d = {k: v if isscalar(v) else np.asarray(v) for k, v in d.items()}
            new_data.append(d)
            continue
        new_el = eltype(d, kdims, vdims)
        if new_el.interface is GeoPandasInterface:
            types[-1] = GeoDataFrame
            new_data.append(new_el.data)
            continue
        new_dict = {}
        for d in new_el.dimensions():
            if d in (xname, yname):
                scalar = False
            else:
                scalar = new_el.interface.isscalar(new_el, d)
            vals = new_el.dimension_values(d, not scalar)
            new_dict[d.name] = vals[0] if scalar else vals
        new_data.append(new_dict)
    if len(set(types)) > 1:
        raise DataError('Mixed types not supported')
    if new_data and types[0] is GeoDataFrame:
        data = pd.concat(new_data)
    else:
        columns = [d.name for d in kdims + vdims if d not in (xname, yname)]
        geom = GeoPandasInterface.geom_type(eltype)
        if not len(data):
            return GeoDataFrame([], columns=['geometry'] + columns)
        data = to_geopandas(new_data, xname, yname, columns, geom)
    return data
Example #10
0
    def init(cls, eltype, data, kdims, vdims):
        import pandas as pd
        from geopandas import GeoDataFrame

        if isinstance(data, list):
            if all(isinstance(d, geom_types) for d in data):
                data = [{'geometry': d} for d in data]
            if all('geometry' in d and isinstance(d['geometry'], geom_types)
                   for d in data):
                data = GeoDataFrame(data)
        elif not isinstance(data, GeoDataFrame):
            raise ValueError(
                "GeoPandasInterface only support geopandas DataFrames.")
        elif 'geometry' not in data:
            raise ValueError(
                "GeoPandas dataframe must contain geometry column, "
                "to plot non-geographic data use pandas DataFrame.", cls)
        if kdims is None:
            kdims = eltype.kdims

        if vdims is None:
            vdims = eltype.vdims

        index_names = data.index.names if isinstance(
            data, pd.DataFrame) else [data.index.name]
        if index_names == [None]:
            index_names = ['index']

        for kd in kdims + vdims:
            kd = dimension_name(kd)
            if kd in data.columns:
                continue
            if any(kd == ('index' if name is None else name)
                   for name in index_names):
                data = data.reset_index()
                break

        shp_types = {gt[5:] if 'Multi' in gt else gt for gt in data.geom_type}
        if len(shp_types) > 1:
            raise DataError(
                'The GeopandasInterface can only read dataframes which '
                'share a common geometry type, found %s types.' % shp_types,
                cls)

        return data, {'kdims': kdims, 'vdims': vdims}, {}
Example #11
0
    def iloc(cls, dataset, index):
        from geopandas import GeoSeries
        from shapely.geometry import MultiPoint
        rows, cols = index
        geom_dims = cls.geom_dims(dataset)
        geom_col = cls.geo_column(dataset.data)
        scalar = False
        columns = list(dataset.data.columns)
        if isinstance(cols, slice):
            cols = [d.name for d in dataset.dimensions()][cols]
        elif np.isscalar(cols):
            scalar = np.isscalar(rows)
            cols = [dataset.get_dimension(cols).name]
        else:
            cols = [dataset.get_dimension(d).name for d in index[1]]
        if not all(d in cols for d in geom_dims):
            raise DataError(
                "Cannot index a dimension which is part of the "
                "geometry column of a spatialpandas DataFrame.", cls)
        cols = list(
            unique_iterator([
                columns.index(geom_col) if c in geom_dims else columns.index(c)
                for c in cols
            ]))

        geom_type = dataset.data[geom_col].geom_type.iloc[0]
        if geom_type != 'MultiPoint':
            if scalar:
                return dataset.data.iloc[rows[0], cols[0]]
            elif isscalar(rows):
                rows = [rows]
            return dataset.data.iloc[rows, cols]

        geoms = dataset.data[geom_col]
        count = 0
        new_geoms, indexes = [], []
        for i, geom in enumerate(geoms):
            length = len(geom)
            if np.isscalar(rows):
                if count <= rows < (count + length):
                    new_geoms.append(geom[rows - count])
                    indexes.append(i)
                    break
            elif isinstance(rows, slice):
                if rows.start is not None and rows.start > (count + length):
                    continue
                elif rows.stop is not None and rows.stop < count:
                    break
                start = None if rows.start is None else max(
                    rows.start - count, 0)
                stop = None if rows.stop is None else min(
                    rows.stop - count, length)
                if rows.step is not None:
                    dataset.param.warning(
                        ".iloc step slicing currently not supported for"
                        "the multi-tabular data format.")
                indexes.append(i)
                new_geoms.append(geom[start:stop])
            elif isinstance(rows, (list, set)):
                sub_rows = [(r - count) for r in rows
                            if count <= r < (count + length)]
                if not sub_rows:
                    continue
                indexes.append(i)
                new_geoms.append(MultiPoint([geom[r] for r in sub_rows]))
            count += length

        new = dataset.data.iloc[indexes].copy()
        new[geom_col] = GeoSeries(new_geoms)
        return new
Example #12
0
 def validate(cls, dataset, vdims=True):
     if vdims and len(dataset.vdims) > 1:
         raise DataError(
             "Iris cubes do not support more than one value dimension", cls)