コード例 #1
0
ファイル: ogr.py プロジェクト: ancatfi/pygeoapi
    def get(self, identifier):
        """
        Get Feature by id

        :param identifier: feature id

        :returns: feature collection
        """
        result = None
        try:
            LOGGER.debug('Fetching identifier {}'.format(identifier))
            layer = self._get_layer()

            layer.SetAttributeFilter("{field} = '{id}'".format(
                field=self.id_field, id=identifier))

            ogr_feature = self._get_next_feature(layer)
            result = self._ogr_feature_to_json(ogr_feature)

        except RuntimeError as err:
            LOGGER.error(err)
            raise ProviderQueryError(err)
        except ProviderConnectionError as err:
            LOGGER.error(err)
            raise ProviderConnectionError(err)
        except Exception as err:
            LOGGER.error(err)
            raise ProviderGenericError(err)

        finally:
            self._close()

        return result
コード例 #2
0
ファイル: postgresql.py プロジェクト: jorgejesus/GLOSIS
    def get(self, identifier):
        """
        Query the provider for a specific
        feature id e.g: /collections/hotosm_bdi_waterways/items/13990765

        :param identifier: feature id

        :returns: GeoJSON FeaturesCollection
        """

        LOGGER.debug('Get item from Postgis')
        with DatabaseConnection(self.conn_dic, self.table) as db:
            cursor = db.conn.cursor(cursor_factory=RealDictCursor)

            sql_query = SQL("select {0},ST_AsGeoJSON({1}) \
            from {2} WHERE {3}=%s").format(db.columns,
                                           Identifier('geom'),
                                           Identifier(self.table),
                                           Identifier(self.id_field))

            LOGGER.debug('SQL Query:{}'.format(sql_query.as_string(db.conn)))
            LOGGER.debug('Identifier:{}'.format(identifier))
            try:
                cursor.execute(sql_query, (identifier, ))
            except Exception as err:
                LOGGER.error('Error executing sql_query: {}'.format(
                    sql_query.as_string(cursor)))
                LOGGER.error('Using public schema: {}'.format(db.schema))
                LOGGER.error(err)
                raise ProviderQueryError()

            self.dataDB = cursor.fetchall()
            feature_collection = self.__response_feature_collection()
            return feature_collection
コード例 #3
0
    def get(self, identifier):
        """
        Query the provider for a specific
        feature id e.g: /collections/hotosm_bdi_waterways/items/13990765

        :param identifier: feature id

        :returns: GeoJSON FeaturesCollection
        """

        LOGGER.debug('Get item from Postgis')
        with DatabaseConnection(self.conn_dic, self.table) as db:
            cursor = db.conn.cursor(cursor_factory=RealDictCursor)

            sql_query = SQL("select {},ST_AsGeoJSON({}) \
            from {} WHERE {}=%s").format(db.columns, Identifier(self.geom),
                                         Identifier(self.table),
                                         Identifier(self.id_field))

            LOGGER.debug('SQL Query: {}'.format(sql_query.as_string(db.conn)))
            LOGGER.debug('Identifier: {}'.format(identifier))
            try:
                cursor.execute(sql_query, (identifier, ))
            except Exception as err:
                LOGGER.error('Error executing sql_query: {}'.format(
                    sql_query.as_string(cursor)))
                LOGGER.error(err)
                raise ProviderQueryError()

            row_data = cursor.fetchall()[0]
            feature = self.__response_feature(row_data)

            feature['prev'] = self.get_previous(cursor, identifier)
            feature['next'] = self.get_next(cursor, identifier)
            return feature
コード例 #4
0
    def __init__(self, provider_def):
        """
        Initialize object

        :param provider_def: provider definition

        :returns: pygeoapi.providers.elasticsearch_.ElasticsearchProvider
        """

        BaseProvider.__init__(self, provider_def)

        url_tokens = self.data.split('/')

        LOGGER.debug('Setting Elasticsearch properties')
        self.es_host = url_tokens[2]
        self.index_name = url_tokens[-2]
        self.type_name = url_tokens[-1]
        LOGGER.debug('host: {}'.format(self.es_host))
        LOGGER.debug('index: {}'.format(self.index_name))
        LOGGER.debug('type: {}'.format(self.type_name))

        LOGGER.debug('Connecting to Elasticsearch')
        self.es = Elasticsearch(self.es_host)
        if not self.es.ping():
            msg = 'Cannot connect to Elasticsearch'
            LOGGER.error(msg)
            raise ProviderConnectionError(msg)

        LOGGER.debug('Grabbing field information')
        try:
            self.fields = self.get_fields()
        except exceptions.NotFoundError as err:
            LOGGER.error(err)
            raise ProviderQueryError(err)
コード例 #5
0
    def get_months_number(self, possible_time, year, month, datetime_):
        """
        Get the difference in number of months between
        dim_reference_time (year, month) and datetime_

        :param possible_time: list of possible time from dim_refenrence_time
        :param year: year from dim_refenrence_time
        :param month: month from dim_refenrence_time
        :param datetime_: forecast time from the query

        :returns: number of months as integer
        """

        if datetime_ not in possible_time:
            err = 'Not a valid datetime'
            LOGGER.error(err)
            raise ProviderQueryError(err)
        else:
            # from dim_ref_time
            begin_date = datetime(int(year), int(month), 1)
            # from datetime_
            year2, month2 = datetime_.split('-')
            end_date = datetime(int(year2), int(month2), 1)
            num_months = (end_date.year - begin_date.year) \
                * 12 + (end_date.month - begin_date.month)
            return num_months
コード例 #6
0
    def __init__(self, provider_def):
        """
        Initialize object
        :param provider_def: provider definition
        :returns: pygeoapi.provider.elasticsearch_.ElasticsearchProvider
        """

        super().__init__(provider_def)

        self.es_host, self.index_name = self.data.rsplit('/', 1)

        LOGGER.debug('Setting Elasticsearch properties')
        self.is_gdal = False

        LOGGER.debug('host: {}'.format(self.es_host))
        LOGGER.debug('index: {}'.format(self.index_name))

        self.type_name = 'FeatureCollection'
        self.url_parsed = urlparse(self.es_host)

        LOGGER.debug('Connecting to Elasticsearch')

        if self.url_parsed.port is None:  # proxy to default HTTP(S) port
            if self.url_parsed.scheme == 'https':
                port = 443
            else:
                port = 80
        else:  # was set explictly
            port = self.url_parsed.port

        url_settings = {
            'scheme': self.url_parsed.scheme,
            'host': self.url_parsed.hostname,
            'port': port
        }

        if self.url_parsed.path:
            url_settings['url_prefix'] = self.url_parsed.path

        LOGGER.debug('URL settings: {}'.format(url_settings))
        LOGGER.debug('Connecting to Elasticsearch')
        self.es = Elasticsearch([url_settings])
        if not self.es.ping():
            msg = 'Cannot connect to Elasticsearch'
            LOGGER.error(msg)
            raise ProviderConnectionError(msg)

        LOGGER.debug('Determining ES version')
        v = self.es.info()['version']['number'][:3]
        if float(v) < 7:
            msg = 'only ES 7+ supported'
            LOGGER.error(msg)
            raise ProviderConnectionError(msg)

        LOGGER.debug('Grabbing field information')
        try:
            self.fields = self.get_fields()
        except exceptions.NotFoundError as err:
            LOGGER.error(err)
            raise ProviderQueryError(err)
コード例 #7
0
    def get_band_datetime(self, datetime_, year, month):
        """
        generate list of bands from dim_refenrece_time and datetime_

        :param datetime_: forecast time from the query
        :param year: year from dim_refenrence_time
        :param month: month from dim_refenrence_time

        :returns: list of bands
        """

        # making a list of the datetime for the given dim_ref_time
        possible_time = []
        for i in range(1, 13):
            possible_time.append(self.get_time_from_dim(
                '{}-{}'.format(year, month), i))

        if '/' not in datetime_:
            if datetime_ not in possible_time:
                err = 'Not a valid datetime'
                LOGGER.error(err)
                raise ProviderQueryError(err)
            else:
                num_months = self.get_months_number(
                    possible_time, year, month, datetime_)
                return [num_months + 12 * (self.member[0] - 1)]

        else:
            datetime1, datetime2 = datetime_.split('/')
            if datetime1 not in possible_time or \
                    datetime2 not in possible_time:
                err = 'Not a valid datetime'
                LOGGER.error(err)
                raise ProviderQueryError(err)
            num_months_1 = self.get_months_number(
                possible_time, year, month, datetime1)
            num_months_2 = self.get_months_number(
                possible_time, year, month, datetime2)

            num_months_1 = num_months_1 + 12 * (self.member[0] - 1)
            num_months_2 = num_months_2 + 12 * (self.member[0] - 1)
            return (list(range(num_months_1, num_months_2 + 1)))
コード例 #8
0
    def esdoc2geojson(self, doc):
        """
        generate GeoJSON `dict` from ES document

        :param doc: `dict` of ES document

        :returns: GeoJSON `dict`
        """

        feature_ = {}
        feature_thinned = {}

        if 'properties' not in doc['_source']:
            LOGGER.debug('Looks like a GDAL ES 7 document')
            id_ = doc['_source'][self.id_field]
            if 'type' not in doc['_source']:
                feature_['id'] = id_
                feature_['type'] = 'Feature'
            feature_['geometry'] = doc['_source'].get('geometry')
            feature_['properties'] = {}
            for key, value in doc['_source'].items():
                if key == 'geometry':
                    continue
                feature_['properties'][key] = value
        else:
            LOGGER.debug('Looks like true GeoJSON document')
            feature_ = doc['_source']
            id_ = doc['_source']['properties'][self.id_field]
            feature_['id'] = id_
            feature_['geometry'] = doc['_source'].get('geometry')

        if self.properties or self.select_properties:
            LOGGER.debug('Filtering properties')
            all_properties = self.get_properties()

            feature_thinned = {
                'id': id_,
                'type': feature_['type'],
                'geometry': feature_.get('geometry'),
                'properties': OrderedDict()
            }
            for p in all_properties:
                try:
                    feature_thinned['properties'][p] = feature_['properties'][
                        p]  # noqa
                except KeyError as err:
                    LOGGER.error(err)
                    raise ProviderQueryError()

        if feature_thinned:
            return feature_thinned
        else:
            return feature_
コード例 #9
0
    def _request_json(self, url, params):
        """ Performs a GET request on `url` and returns the JSON response. """
        response = None
        try:
            response = requests.get(url, params)
            response.raise_for_status()
        except requests.HTTPError as err:
            LOGGER.error(err)
            raise ProviderQueryError(
                f'failed to query {response.url if response else url}')
        except requests.ConnectionError as err:
            LOGGER.error(err)
            raise ProviderConnectionError(
                f'failed to connect to {response.url if response else url}')

        return self._parse_json(response.text)
コード例 #10
0
ファイル: sensorthings.py プロジェクト: m-burgoyne/pygeoapi
    def _make_filter(self, properties, bbox=[], datetime_=None):
        """
        Private function: Make STA filter from query properties

        :param properties: list of tuples (name, value)
        :param bbox: bounding box [minx,miny,maxx,maxy]
        :param datetime_: temporal (datestamp or extent)

        :returns: STA $filter string of properties
        """
        ret = []
        for (name, value) in properties:
            if name in ENTITY:
                ret.append(f'{name}/@iot.id eq {value}')
            else:
                ret.append(f'{name} eq {value}')

        if bbox:
            minx, miny, maxx, maxy = bbox
            bbox_ = f'POLYGON (({minx} {miny}, {maxx} {miny}, \
                     {maxx} {maxy}, {minx} {maxy}, {minx} {miny}))'

            if self.entity == 'Things':
                loc = 'Locations/location'
            elif self.entity == 'Datastreams':
                loc = 'Thing/Locations/location'
            elif self.entity == 'Observations':
                loc = 'FeatureOfInterest/feature'
            ret.append(f"st_within({loc}, geography'{bbox_}')")

        if datetime_ is not None:
            if self.time_field is None:
                LOGGER.error('time_field not enabled for collection')
                raise ProviderQueryError()

            if '/' in datetime_:
                time_start, time_end = datetime_.split('/')
                if time_start != '..':
                    ret.append(f'{self.time_field} ge {time_start}')
                if time_end != '..':
                    ret.append(f'{self.time_field} le {time_end}')
            else:
                ret.append(f'{self.time_field} eq {datetime_}')

        return ' and '.join(ret)
コード例 #11
0
    def __init__(self, provider_def):
        """
        Initialize object

        :param provider_def: provider definition

        :returns: pygeoapi.providers.elasticsearch_.ElasticsearchProvider
        """

        BaseProvider.__init__(self, provider_def)

        url_tokens = self.data.split('/')

        LOGGER.debug('Setting Elasticsearch properties')
        self.es_host = url_tokens[2]
        self.index_name = url_tokens[-1]
        self.is_gdal = False

        LOGGER.debug('host: {}'.format(self.es_host))
        LOGGER.debug('index: {}'.format(self.index_name))

        LOGGER.debug('Connecting to Elasticsearch')
        self.es = Elasticsearch(self.es_host)
        if not self.es.ping():
            msg = 'Cannot connect to Elasticsearch'
            LOGGER.error(msg)
            raise ProviderConnectionError(msg)

        LOGGER.debug('Determining ES version')
        v = self.es.info()['version']['number'][:3]
        if float(v) < 7:
            msg = 'only ES 7+ supported'
            LOGGER.error(msg)
            raise ProviderConnectionError(msg)

        LOGGER.debug('Grabbing field information')
        try:
            self.fields = self.get_fields()
        except exceptions.NotFoundError as err:
            LOGGER.error(err)
            raise ProviderQueryError(err)
コード例 #12
0
    def _request_json(self, url, params):
        """ Performs a GET request on `url` and returns the JSON response. """
        response = None
        if 'lang' not in params and self.locale:
            # Add language parameter, if missing (geoCore wants ISO 639-1 codes)  # noqa
            LOGGER.debug(
                f"Requesting geoCore response in '{self.locale.language}'"
            )  # noqa
            params['lang'] = self.locale.language
        try:
            response = requests.get(url, params)
            response.raise_for_status()
        except requests.HTTPError as err:
            LOGGER.error(err)
            raise ProviderQueryError(
                f'failed to query {response.url if response else url}')
        except requests.ConnectionError as err:
            LOGGER.error(err)
            raise ProviderConnectionError(
                f'failed to connect to {response.url if response else url}')

        return self._parse_json(response.text)
コード例 #13
0
    def get_fields(self):
        """
         Get fields of STA Provider

        :returns: dict of fields
        """
        if not self.fields:
            p = {'$expand': EXPAND[self.entity], '$top': 1}
            r = get(self._url, params=p)
            try:
                results = r.json()['value'][0]
            except JSONDecodeError as err:
                LOGGER.error('Entity {} error: {}'.format(self.entity, err))
                LOGGER.error('Bad url response at {}'.format(r.url))
                raise ProviderQueryError(err)

            for (n, v) in results.items():
                if isinstance(v, (int, float)) or \
                   (isinstance(v, (dict, list)) and n in ENTITY):
                    self.fields[n] = {'type': 'number'}
                elif isinstance(v, str):
                    self.fields[n] = {'type': 'string'}

        return self.fields
コード例 #14
0
    def query(self,
              startindex=0,
              limit=10,
              resulttype='results',
              bbox=[],
              datetime_=None,
              properties=[],
              sortby=[],
              select_properties=[],
              skip_geometry=False,
              q=None,
              filterq=None,
              **kwargs):
        """
        query Elasticsearch index

        :param startindex: starting record to return (default 0)
        :param limit: number of records to return (default 10)
        :param resulttype: return results or hit limit (default results)
        :param bbox: bounding box [minx,miny,maxx,maxy]
        :param datetime_: temporal (datestamp or extent)
        :param properties: list of tuples (name, value)
        :param sortby: list of dicts (property, order)
        :param select_properties: list of property names
        :param skip_geometry: bool of whether to skip geometry (default False)
        :param q: full-text search term(s)
        :param filterq: filter object

        :returns: dict of 0..n GeoJSON features
        """

        query = {'track_total_hits': True, 'query': {'bool': {'filter': []}}}
        filter_ = []

        feature_collection = {'type': 'FeatureCollection', 'features': []}

        if resulttype == 'hits':
            LOGGER.debug('hits only specified')
            limit = 0

        if bbox:
            LOGGER.debug('processing bbox parameter')
            minx, miny, maxx, maxy = bbox
            bbox_filter = {
                'geo_shape': {
                    'geometry': {
                        'shape': {
                            'type': 'envelope',
                            'coordinates': [[minx, maxy], [maxx, miny]]
                        },
                        'relation': 'intersects'
                    }
                }
            }

            query['query']['bool']['filter'].append(bbox_filter)

        if datetime_ is not None:
            LOGGER.debug('processing datetime parameter')
            if self.time_field is None:
                LOGGER.error('time_field not enabled for collection')
                raise ProviderQueryError()

            time_field = self.mask_prop(self.time_field)

            if '/' in datetime_:  # envelope
                LOGGER.debug('detected time range')
                time_begin, time_end = datetime_.split('/')

                range_ = {
                    'range': {
                        time_field: {
                            'gte': time_begin,
                            'lte': time_end
                        }
                    }
                }
                if time_begin == '..':
                    range_['range'][time_field].pop('gte')
                elif time_end == '..':
                    range_['range'][time_field].pop('lte')

                filter_.append(range_)

            else:  # time instant
                LOGGER.debug('detected time instant')
                filter_.append({'match': {time_field: datetime_}})

            LOGGER.debug(filter_)
            query['query']['bool']['filter'].append(*filter_)

        if properties:
            LOGGER.debug('processing properties')
            for prop in properties:
                prop_name = self.mask_prop(prop[0])
                pf = {'match': {prop_name: {'query': prop[1]}}}
                query['query']['bool']['filter'].append(pf)

            if '|' not in prop[1]:
                pf['match'][prop_name]['minimum_should_match'] = '100%'

        if sortby:
            LOGGER.debug('processing sortby')
            query['sort'] = []
            for sort in sortby:
                LOGGER.debug('processing sort object: {}'.format(sort))

                sp = sort['property']

                if self.fields[sp]['type'] == 'string':
                    LOGGER.debug('setting ES .raw on property')
                    sort_property = '{}.raw'.format(self.mask_prop(sp))
                else:
                    sort_property = self.mask_prop(sp)

                sort_order = 'asc'
                if sort['order'] == '-':
                    sort_order = 'desc'

                sort_ = {sort_property: {'order': sort_order}}
                query['sort'].append(sort_)

        if q is not None:
            LOGGER.debug('Adding free-text search')
            query['query']['bool']['must'] = {'query_string': {'query': q}}

            query['_source'] = {
                'excludes': [
                    'properties._metadata-payload',
                    'properties._metadata-schema',
                    'properties._metadata-format'
                ]
            }

        if self.properties or select_properties:
            LOGGER.debug('including specified fields: {}'.format(
                self.properties))
            query['_source'] = {
                'includes':
                list(
                    map(self.mask_prop,
                        set(self.properties) | set(select_properties)))  # noqa
            }
            query['_source']['includes'].append(self.mask_prop(self.id_field))
            query['_source']['includes'].append('type')
            query['_source']['includes'].append('geometry')
        if skip_geometry:
            LOGGER.debug(
                'limiting to specified fields: {}'.format(select_properties))
            try:
                query['_source']['excludes'] = ['geometry']
            except KeyError:
                query['_source'] = {'excludes': ['geometry']}
        try:
            LOGGER.debug('querying Elasticsearch')
            if filterq:
                LOGGER.debug('adding cql object: {}'.format(filterq.json()))
                query = update_query(input_query=query, cql=filterq)
            LOGGER.debug(json.dumps(query, indent=4))

            LOGGER.debug('Setting ES paging zero-based')
            if startindex > 0:
                startindex2 = startindex - 1
            else:
                startindex2 = startindex

            if startindex2 + limit > 10000:
                gen = helpers.scan(client=self.es,
                                   query=query,
                                   preserve_order=True,
                                   index=self.index_name)
                results = {'hits': {'total': limit, 'hits': []}}
                for i in range(startindex2 + limit):
                    try:
                        if i >= startindex2:
                            results['hits']['hits'].append(next(gen))
                        else:
                            next(gen)
                    except StopIteration:
                        break
                results['hits']['total'] = \
                    len(results['hits']['hits']) + startindex2
            else:
                results = self.es.search(index=self.index_name,
                                         from_=startindex2,
                                         size=limit,
                                         body=query)
                results['hits']['total'] = results['hits']['total']['value']

        except exceptions.ConnectionError as err:
            LOGGER.error(err)
            raise ProviderConnectionError()
        except exceptions.RequestError as err:
            LOGGER.error(err)
            raise ProviderQueryError()
        except exceptions.NotFoundError as err:
            LOGGER.error(err)
            raise ProviderQueryError()

        feature_collection['numberMatched'] = results['hits']['total']

        if resulttype == 'hits':
            return feature_collection

        feature_collection['numberReturned'] = len(results['hits']['hits'])

        LOGGER.debug('serializing features')
        for feature in results['hits']['hits']:
            feature_ = self.esdoc2geojson(feature)
            feature_collection['features'].append(feature_)

        return feature_collection
コード例 #15
0
    def query(self, startindex=0, limit=10, resulttype='results',
              bbox=[], time=None, properties=[]):
        """
        query Elasticsearch index

        :param startindex: starting record to return (default 0)
        :param limit: number of records to return (default 10)
        :param resulttype: return results or hit limit (default results)
        :param bbox: bounding box [minx,miny,maxx,maxy]
        :param time: temporal (datestamp or extent)
        :param properties: list of tuples (name, value)

        :returns: dict of 0..n GeoJSON features
        """

        query = {'query': {'bool': {'filter': []}}}
        filter_ = []

        feature_collection = {
            'type': 'FeatureCollection',
            'features': []
        }

        if resulttype == 'hits':
            LOGGER.debug('hits only specified')
            limit = 0

        if bbox:
            LOGGER.debug('processing bbox parameter')
            minx, miny, maxx, maxy = bbox
            bbox_filter = {
                'geo_shape': {
                    'geometry': {
                        'shape': {
                            'type': 'envelope',
                            'coordinates': [[minx, miny], [maxx, maxy]]
                        },
                        'relation': 'intersects'
                    }
                }
            }

            query['query']['bool']['filter'].append(bbox_filter)

        if time is not None:
            LOGGER.debug('processing time parameter')
            if self.time_field is None:
                LOGGER.error('time_field not enabled for collection')
                raise ProviderQueryError()

            time_field = 'properties.{}'.format(self.time_field)

            if '/' in time:  # envelope
                LOGGER.debug('detected time range')
                time_begin, time_end = time.split('/')

                range_ = {
                    'range': {
                        time_field: {
                            'gte': time_begin,
                            'lte': time_end,
                        }
                    }
                }

                filter_.append(range_)

            else:  # time instant
                LOGGER.debug('detected time instant')
                filter_.append({'match': {time_field: time}})

            LOGGER.debug(filter_)
            query['query']['bool']['filter'].append(filter_)

        if properties:
            LOGGER.debug('processing properties')
            for prop in properties:
                pf = {
                    'match': {
                        'properties.{}'.format(prop[0]): prop[1]
                    }
                }
                query['query']['bool']['filter'].append(pf)

        try:
            LOGGER.debug('querying Elasticsearch')
            if startindex + limit > 10000:
                gen = helpers.scan(client=self.es, query=query,
                                   preserve_order=True,
                                   index=self.index_name)
                results = {'hits': {'total': limit, 'hits': []}}
                for i in range(startindex + limit):
                    try:
                        if i >= startindex:
                            results['hits']['hits'].append(next(gen))
                        else:
                            next(gen)
                    except StopIteration:
                        break
                results['hits']['total'] = len(results['hits']['hits'])
            else:
                results = self.es.search(index=self.index_name,
                                         from_=startindex, size=limit,
                                         body=query)
        except exceptions.ConnectionError as err:
            LOGGER.error(err)
            raise ProviderConnectionError()
        except exceptions.RequestError as err:
            LOGGER.error(err)
            raise ProviderQueryError()
        except exceptions.NotFoundError as err:
            LOGGER.error(err)
            raise ProviderQueryError()

        feature_collection['numberMatched'] = results['hits']['total']

        if resulttype == 'hits':
            return feature_collection

        feature_collection['numberReturned'] = len(results['hits']['hits'])

        LOGGER.debug('serializing features')
        for feature in results['hits']['hits']:
            id_ = feature['_source']['properties'][self.id_field]
            LOGGER.debug('serializing id {}'.format(id_))
            feature['_source']['ID'] = id_
            feature_collection['features'].append(feature['_source'])

        return feature_collection
コード例 #16
0
    def query(self,
              startindex=0,
              limit=10,
              resulttype='results',
              bbox=[],
              datetime=None,
              properties=[],
              sortby=[]):
        """
        query Elasticsearch index

        :param startindex: starting record to return (default 0)
        :param limit: number of records to return (default 10)
        :param resulttype: return results or hit limit (default results)
        :param bbox: bounding box [minx,miny,maxx,maxy]
        :param datetime: temporal (datestamp or extent)
        :param properties: list of tuples (name, value)
        :param sortby: list of dicts (property, order)

        :returns: dict of 0..n GeoJSON features
        """

        query = {'query': {'bool': {'filter': []}}}
        filter_ = []

        feature_collection = {'type': 'FeatureCollection', 'features': []}

        if resulttype == 'hits':
            LOGGER.debug('hits only specified')
            limit = 0

        if bbox:
            LOGGER.debug('processing bbox parameter')
            minx, miny, maxx, maxy = bbox
            bbox_filter = {
                'geo_shape': {
                    'geometry': {
                        'shape': {
                            'type': 'envelope',
                            'coordinates': [[minx, miny], [maxx, maxy]]
                        },
                        'relation': 'intersects'
                    }
                }
            }

            query['query']['bool']['filter'].append(bbox_filter)

        if datetime is not None:
            LOGGER.debug('processing datetime parameter')
            if self.time_field is None:
                LOGGER.error('time_field not enabled for collection')
                raise ProviderQueryError()

            time_field = 'properties.{}'.format(self.time_field)

            if '/' in datetime:  # envelope
                LOGGER.debug('detected time range')
                time_begin, time_end = datetime.split('/')

                range_ = {
                    'range': {
                        time_field: {
                            'gte': time_begin,
                            'lte': time_end
                        }
                    }
                }
                if time_begin == '..':
                    range_['range'][time_field].pop('gte')
                elif time_end == '..':
                    range_['range'][time_field].pop('lte')

                filter_.append(range_)

            else:  # time instant
                LOGGER.debug('detected time instant')
                filter_.append({'match': {time_field: datetime}})

            LOGGER.debug(filter_)
            query['query']['bool']['filter'].append(filter_)

        if properties:
            LOGGER.debug('processing properties')
            for prop in properties:
                pf = {'match': {'properties.{}'.format(prop[0]): prop[1]}}
                query['query']['bool']['filter'].append(pf)

        if sortby:
            LOGGER.debug('processing sortby')
            query['sort'] = []
            for sort in sortby:
                LOGGER.debug('processing sort object: {}'.format(sort))

                sp = sort['property']

                if self.fields[sp]['type'] == 'string':
                    LOGGER.debug('setting ES .raw on property')
                    sort_property = 'properties.{}.raw'.format(sp)
                else:
                    sort_property = 'properties.{}'.format(sp)

                sort_order = 'asc'
                if sort['order'] == 'D':
                    sort_order = 'desc'

                sort_ = {sort_property: {'order': sort_order}}
                query['sort'].append(sort_)

        if self.properties:
            LOGGER.debug('including specified fields: {}'.format(
                self.properties))
            query['_source'] = {
                'includes': list(map('properties.{}'.format, self.properties))
            }
            query['_source']['includes'].append('properties.{}'.format(
                self.id_field))
            query['_source']['includes'].append('type')
            query['_source']['includes'].append('geometry')
        try:
            LOGGER.debug('querying Elasticsearch')
            if startindex + limit > 10000:
                gen = helpers.scan(client=self.es,
                                   query=query,
                                   preserve_order=True,
                                   index=self.index_name)
                results = {'hits': {'total': limit, 'hits': []}}
                for i in range(startindex + limit):
                    try:
                        if i >= startindex:
                            results['hits']['hits'].append(next(gen))
                        else:
                            next(gen)
                    except StopIteration:
                        break
                results['hits']['total'] = \
                    len(results['hits']['hits']) + startindex
            else:
                results = self.es.search(index=self.index_name,
                                         from_=startindex,
                                         size=limit,
                                         body=query)
        except exceptions.ConnectionError as err:
            LOGGER.error(err)
            raise ProviderConnectionError()
        except exceptions.RequestError as err:
            LOGGER.error(err)
            raise ProviderQueryError()
        except exceptions.NotFoundError as err:
            LOGGER.error(err)
            raise ProviderQueryError()

        feature_collection['numberMatched'] = results['hits']['total']

        if resulttype == 'hits':
            return feature_collection

        feature_collection['numberReturned'] = len(results['hits']['hits'])

        LOGGER.debug('serializing features')
        for feature in results['hits']['hits']:
            id_ = feature['_source']['properties'][self.id_field]
            LOGGER.debug('serializing id {}'.format(id_))
            feature['_source']['id'] = id_
            if self.properties:
                feature_thinned = {
                    'id': feature['_source']['properties'][self.id_field],
                    'type': feature['_source']['type'],
                    'geometry': feature['_source']['geometry'],
                    'properties': OrderedDict()
                }
                for p in self.properties:
                    try:
                        feature_thinned['properties'][p] = \
                            feature['_source']['properties'][p]
                    except KeyError as err:
                        LOGGER.error(err)
                        raise ProviderQueryError()

                feature_collection['features'].append(feature_thinned)
            else:
                feature_collection['features'].append(feature['_source'])

        return feature_collection
コード例 #17
0
    def gen_covjson(self, metadata, data):
        """
        Generate coverage as CoverageJSON representation
        :param metadata: coverage metadata
        :param data: rasterio DatasetReader object
        :returns: dict of CoverageJSON representation
        """

        LOGGER.debug('Creating CoverageJSON domain')
        minx, miny, maxx, maxy = metadata['bbox']

        cj = {
            'type': 'Coverage',
            'domain': {
                'type':
                'Domain',
                'domainType':
                'Grid',
                'axes': {
                    'x': {
                        'start': minx,
                        'stop': maxx,
                        'num': metadata['width']
                    },
                    'y': {
                        'start': maxy,
                        'stop': miny,
                        'num': metadata['height']
                    }
                },
                'referencing': [{
                    'coordinates': ['x', 'y'],
                    'system': {
                        'type': self._coverage_properties['crs_type'],
                        'id': self._coverage_properties['bbox_crs']
                    }
                }]
            },
            'parameters': {},
            'ranges': {}
        }

        if metadata['bands'] is None:  # all bands
            bands_select = range(1, len(self._data.dtypes) + 1)
        else:
            bands_select = metadata['bands']

        LOGGER.debug('bands selected: {}'.format(bands_select))
        for bs in bands_select:
            pm = _get_parameter_metadata(self._data.profile['driver'],
                                         self._data.tags(bs))

            parameter = {
                'type': 'Parameter',
                'description': pm['description'],
                'unit': {
                    'symbol': pm['unit_label']
                },
                'observedProperty': {
                    'id': pm['observed_property_id'],
                    'label': {
                        'en': pm['observed_property_name']
                    }
                }
            }

            cj['parameters'][pm['id']] = parameter

        try:
            for key in cj['parameters'].keys():
                cj['ranges'][key] = {
                    'type': 'NdArray',
                    # 'dataType': metadata.dtypes[0],
                    'dataType': 'float',
                    'axisNames': ['y', 'x'],
                    'shape': [metadata['height'], metadata['width']],
                }
                # TODO: deal with multi-band value output
                cj['ranges'][key]['values'] = data.flatten().tolist()
        except IndexError as err:
            LOGGER.warning(err)
            raise ProviderQueryError('Invalid query parameter')

        return cj
コード例 #18
0
    def query(self,
              range_subset=[],
              subsets={},
              bbox=[],
              datetime_=None,
              format_='json'):
        """
        Extract data from collection collection
        :param range_subset: list of bands
        :param subsets: dict of subset names with lists of ranges
        :param bbox: bounding box [minx,miny,maxx,maxy]
        :param datetime_: temporal (datestamp or extent)
        :param format_: data format of output

        :returns: coverage data as dict of CoverageJSON or native format
        """

        bands = range_subset
        LOGGER.debug('Bands: {}, subsets: {}'.format(bands, subsets))

        args = {'indexes': None}
        shapes = []

        if all([not bands, not subsets, not bbox, format_ != 'json']):
            LOGGER.debug('No parameters specified, returning native data')
            return read_data(self.data)

        if all([
                self._coverage_properties['x_axis_label'] in subsets,
                self._coverage_properties['y_axis_label'] in subsets,
                len(bbox) > 0
        ]):
            msg = 'bbox and subsetting by coordinates are exclusive'
            LOGGER.warning(msg)
            raise ProviderQueryError(msg)

        if len(bbox) > 0:
            minx, miny, maxx, maxy = bbox

            crs_src = CRS.from_epsg(4326)

            if 'crs' in self.options:
                crs_dest = CRS.from_string(self.options['crs'])
            else:
                crs_dest = self._data.crs

            if crs_src == crs_dest:
                LOGGER.debug('source bbox CRS and data CRS are the same')
                shapes = [{
                    'type':
                    'Polygon',
                    'coordinates': [[
                        [minx, miny],
                        [minx, maxy],
                        [maxx, maxy],
                        [maxx, miny],
                        [minx, miny],
                    ]]
                }]
            else:
                LOGGER.debug('source bbox CRS and data CRS are different')
                LOGGER.debug('reprojecting bbox into native coordinates')

                t = Transformer.from_crs(crs_src, crs_dest, always_xy=True)
                minx2, miny2 = t.transform(minx, miny)
                maxx2, maxy2 = t.transform(maxx, maxy)

                LOGGER.debug('Source coordinates: {}'.format(
                    [minx, miny, maxx, maxy]))
                LOGGER.debug('Destination coordinates: {}'.format(
                    [minx2, miny2, maxx2, maxy2]))

                shapes = [{
                    'type':
                    'Polygon',
                    'coordinates': [[
                        [minx2, miny2],
                        [minx2, maxy2],
                        [maxx2, maxy2],
                        [maxx2, miny2],
                        [minx2, miny2],
                    ]]
                }]

        elif (self._coverage_properties['x_axis_label'] in subsets
              and self._coverage_properties['y_axis_label'] in subsets):
            LOGGER.debug('Creating spatial subset')

            x = self._coverage_properties['x_axis_label']
            y = self._coverage_properties['y_axis_label']

            shapes = [{
                'type':
                'Polygon',
                'coordinates': [[[subsets[x][0], subsets[y][0]],
                                 [subsets[x][0], subsets[y][1]],
                                 [subsets[x][1], subsets[y][1]],
                                 [subsets[x][1], subsets[y][0]],
                                 [subsets[x][0], subsets[y][0]]]]
            }]

        if bands:
            LOGGER.debug('Selecting bands')
            args['indexes'] = list(map(int, bands))

        with rasterio.open(self.data) as _data:
            LOGGER.debug('Creating output coverage metadata')
            out_meta = _data.meta

            if self.options is not None:
                LOGGER.debug('Adding dataset options')
                for key, value in self.options.items():
                    out_meta[key] = value

            if shapes:  # spatial subset
                try:
                    LOGGER.debug('Clipping data with bbox')
                    out_image, out_transform = rasterio.mask.mask(
                        _data,
                        filled=False,
                        shapes=shapes,
                        crop=True,
                        indexes=args['indexes'])
                except ValueError as err:
                    LOGGER.error(err)
                    raise ProviderQueryError(err)

                out_meta.update({
                    'driver': self.native_format,
                    'height': out_image.shape[1],
                    'width': out_image.shape[2],
                    'transform': out_transform
                })
            else:  # no spatial subset
                LOGGER.debug('Creating data in memory with band selection')
                out_image = _data.read(indexes=args['indexes'])

            if bbox:
                out_meta['bbox'] = [bbox[0], bbox[1], bbox[2], bbox[3]]
            elif shapes:
                out_meta['bbox'] = [
                    subsets[x][0], subsets[y][0], subsets[x][1], subsets[y][1]
                ]
            else:
                out_meta['bbox'] = [
                    _data.bounds.left, _data.bounds.bottom, _data.bounds.right,
                    _data.bounds.top
                ]

            out_meta['units'] = _data.units

            LOGGER.debug('Serializing data in memory')
            with MemoryFile() as memfile:
                with memfile.open(**out_meta) as dest:
                    dest.write(out_image)

                if format_ == 'json':
                    LOGGER.debug('Creating output in CoverageJSON')
                    out_meta['bands'] = args['indexes']
                    return self.gen_covjson(out_meta, out_image)

                else:  # return data in native format
                    LOGGER.debug('Returning data in native format')
                    return memfile.read()
コード例 #19
0
    def query(self,
              startindex=0,
              limit=10,
              resulttype='results',
              bbox=[],
              datetime_=None,
              properties=[],
              sortby=[],
              select_properties=[],
              skip_geometry=False):
        """
        Query OGR source

        :param startindex: starting record to return (default 0)
        :param limit: number of records to return (default 10)
        :param resulttype: return results or hit limit (default results)
        :param bbox: bounding box [minx,miny,maxx,maxy]
        :param datetime_: temporal (datestamp or extent)
        :param properties: list of tuples (name, value)
        :param sortby: list of dicts (property, order)
        :param select_properties: list of property names
        :param skip_geometry: bool of whether to skip geometry (default False)

        :returns: dict of 0..n GeoJSON features
        """
        result = None
        try:
            if self.source_capabilities['paging']:
                self.source_helper.enable_paging(startindex, limit)

            layer = self._get_layer()

            if bbox:
                LOGGER.debug('processing bbox parameter')
                minx, miny, maxx, maxy = bbox

                wkt = "POLYGON (({minx} {miny},{minx} {maxy},{maxx} {maxy}," \
                      "{maxx} {miny},{minx} {miny}))".format(
                        minx=float(minx), miny=float(miny),
                        maxx=float(maxx), maxy=float(maxy))

                polygon = self.ogr.CreateGeometryFromWkt(wkt)
                if self.transform_in:
                    polygon.Transform(self.transform_in)

                layer.SetSpatialFilter(polygon)

                # layer.SetSpatialFilterRect(
                # float(minx), float(miny), float(maxx), float(maxy))

            if properties:
                LOGGER.debug('processing properties')

                attribute_filter = ' and '.join(
                    map(lambda x: '{} = \'{}\''.format(x[0], x[1]),
                        properties))

                LOGGER.debug(attribute_filter)

                layer.SetAttributeFilter(attribute_filter)

            # Make response based on resulttype specified
            if resulttype == 'hits':
                LOGGER.debug('hits only specified')
                result = self._response_feature_hits(layer)
            elif resulttype == 'results':
                LOGGER.debug('results specified')
                result = self._response_feature_collection(layer, limit)
            else:
                LOGGER.error('Invalid resulttype: %s' % resulttype)

        except RuntimeError as err:
            LOGGER.error(err)
            raise ProviderQueryError(err)
        except ProviderConnectionError as err:
            LOGGER.error(err)
            raise ProviderConnectionError(err)
        except Exception as err:
            LOGGER.error(err)
            raise ProviderGenericError(err)

        finally:
            self._close()

        return result
コード例 #20
0
ファイル: xarray_.py プロジェクト: jukkatolonen/pygeoapi
    def query(self, range_subset=[], subsets={}, format_='json'):
        """
         Extract data from collection collection

        :param range_subset: list of data variables to return (all if blank)
        :param subsets: dict of subset names with lists of ranges
        :param format_: data format of output

        :returns: coverage data as dict of CoverageJSON or native format
        """

        if not range_subset and not subsets and format_ != 'json':
            LOGGER.debug('No parameters specified, returning native data')
            return read_data(self.data)

        if len(range_subset) < 1:
            range_subset = self.fields

        data = self._data[[*range_subset]]

        if (self._coverage_properties['x_axis_label'] in subsets
                or self._coverage_properties['y_axis_label'] in subsets
                or self._coverage_properties['time_axis_label'] in subsets):

            LOGGER.debug('Creating spatio-temporal subset')

            query_params = {}
            for key, val in subsets.items():
                if data.coords[key].values[0] > data.coords[key].values[-1]:
                    LOGGER.debug('Reversing slicing low/high')
                    query_params[key] = slice(val[1], val[0])
                else:
                    query_params[key] = slice(val[0], val[1])

            LOGGER.debug('Query parameters: {}'.format(query_params))
            try:
                data = data.sel(query_params)
            except Exception as err:
                LOGGER.warning(err)
                raise ProviderQueryError(err)

        if (any([
                data.coords[self.x_field].size == 0,
                data.coords[self.y_field].size == 0
        ])):
            msg = 'No data found'
            LOGGER.warning(msg)
            raise ProviderNoDataError(msg)

        out_meta = {
            'bbox': [
                data.coords[self.x_field].values[0],
                data.coords[self.y_field].values[0],
                data.coords[self.x_field].values[-1],
                data.coords[self.y_field].values[-1]
            ],
            "time": [
                _to_datetime_string(data.coords[self.time_field].values[0]),
                _to_datetime_string(data.coords[self.time_field].values[-1])
            ],
            "driver":
            "xarray",
            "height":
            data.dims[self.y_field],
            "width":
            data.dims[self.x_field],
            "time_steps":
            data.dims[self.time_field],
            "variables":
            {var_name: var.attrs
             for var_name, var in data.variables.items()}
        }

        LOGGER.debug('Serializing data in memory')
        if format_ == 'json':
            LOGGER.debug('Creating output in CoverageJSON')
            return self.gen_covjson(out_meta, data, range_subset)

        else:  # return data in native format
            with tempfile.TemporaryFile() as fp:
                LOGGER.debug('Returning data in native format')
                fp.write(data.to_netcdf())
                fp.seek(0)
                return fp.read()
コード例 #21
0
    def _load(self, startindex=0, limit=10, resulttype='results',
              identifier=None, bbox=[], datetime_=None, properties=[],
              sortby=[], select_properties=[], skip_geometry=False, q=None):
        """
        Private function: Load STA data

        :param startindex: starting record to return (default 0)
        :param limit: number of records to return (default 10)
        :param resulttype: return results or hit limit (default results)
        :param bbox: bounding box [minx,miny,maxx,maxy]
        :param datetime_: temporal (datestamp or extent)
        :param properties: list of tuples (name, value)
        :param sortby: list of dicts (property, order)
        :param select_properties: list of property names
        :param skip_geometry: bool of whether to skip geometry (default False)
        :param q: full-text search term(s)

        :returns: dict of GeoJSON FeatureCollection
        """
        feature_collection = {
            'type': 'FeatureCollection', 'features': []
        }
        # Make params
        params = {
            '$expand': EXPAND[self.entity],
            '$skip': str(startindex),
            '$top': str(limit),
            '$count': 'true'
        }
        if properties or bbox or datetime_:
            params['$filter'] = self._make_filter(properties, bbox, datetime_)
        if sortby:
            params['$orderby'] = self._make_orderby(sortby)

        # Form URL for GET request
        LOGGER.debug('Sending query')
        if identifier:
            r = get(f'{self._url}({identifier})', params=params)
        else:
            r = get(self._url, params=params)

        if r.status_code == codes.bad:
            LOGGER.error('Bad http response code')
            raise ProviderConnectionError('Bad http response code')

        response = r.json()
        # if hits, return count
        if resulttype == 'hits':
            LOGGER.debug('Returning hits')
            feature_collection['numberMatched'] = response.get('@iot.count')
            return feature_collection

        v = [response, ] if identifier else response.get('value')
        # if values are less than expected, query for more
        hits_ = 1 if identifier else min(limit, response.get('@iot.count'))
        while len(v) < hits_:
            LOGGER.debug('Fetching next set of values')
            r = get(response.get('@iot.nextLink'), params={'$skip': len(v)})
            response = r.json()
            v.extend(response.get('value'))

        # properties filter & display
        keys = (() if not self.properties and not select_properties else
                set(self.properties) | set(select_properties))

        for entity in v[:hits_]:
            # Make feature
            id = entity.pop(self.id_field)
            id = f"'{id}'" if isinstance(id, str) else str(id)
            f = {
                'type': 'Feature', 'properties': {},
                'geometry': None, 'id': id
            }

            # Make geometry
            if not skip_geometry:
                f['geometry'] = self._geometry(entity)

            # Fill properties block
            try:
                f['properties'] = self._expand_properties(entity, keys)
            except KeyError as err:
                LOGGER.error(err)
                raise ProviderQueryError(err)

            feature_collection['features'].append(f)

        feature_collection['numberReturned'] = len(
            feature_collection['features'])

        if identifier:
            return f
        else:
            return feature_collection
コード例 #22
0
ファイル: tinydb_.py プロジェクト: ldesousa/pygeoapi
    def query(self,
              startindex=0,
              limit=10,
              resulttype='results',
              bbox=[],
              datetime_=None,
              properties=[],
              sortby=[],
              select_properties=[],
              skip_geometry=False,
              q=None):
        """
        query TinyDB document store

        :param startindex: starting record to return (default 0)
        :param limit: number of records to return (default 10)
        :param resulttype: return results or hit limit (default results)
        :param bbox: bounding box [minx,miny,maxx,maxy]
        :param datetime_: temporal (datestamp or extent)
        :param properties: list of tuples (name, value)
        :param sortby: list of dicts (property, order)
        :param select_properties: list of property names
        :param skip_geometry: bool of whether to skip geometry (default False)
        :param q: full-text search term(s)

        :returns: dict of 0..n GeoJSON feature collection
        """

        Q = Query()
        LOGGER.debug('Query initiated: {}'.format(Q))

        QUERY = []

        feature_collection = {'type': 'FeatureCollection', 'features': []}

        if resulttype == 'hits':
            LOGGER.debug('hits only specified')
            limit = 0

        if bbox:
            LOGGER.debug('processing bbox parameter')
            bbox_as_string = ','.join(str(s) for s in bbox)
            QUERY.append(
                "Q.properties.extent.spatial.bbox.test(bbox_intersects, '{}')".
                format(bbox_as_string))  # noqa

        if datetime_ is not None:
            LOGGER.debug('processing datetime parameter')
            if self.time_field is None:
                LOGGER.error('time_field not enabled for collection')
                raise ProviderQueryError()

            if '/' in datetime_:  # envelope
                LOGGER.debug('detected time range')
                time_begin, time_end = datetime_.split('/')

                if time_begin != '..':
                    QUERY.append(
                        "(Q.properties[self.time_field]>='{}')".format(
                            time_begin))  # noqa
                if time_end != '..':
                    QUERY.append(
                        "(Q.properties[self.time_field]<='{}')".format(
                            time_end))  # noqa

            else:  # time instant
                LOGGER.debug('detected time instant')
                QUERY.append("(Q.properties[self.time_field]=='{}')".format(
                    datetime_))  # noqa

        if properties:
            LOGGER.debug('processing properties')
            for prop in properties:
                QUERY.append("(Q.properties['{}']=='{}')".format(*prop))

        if q is not None:
            QUERY.append(
                "(Q.properties['_metadata-anytext'].search('{}'))".format(
                    q))  # noqa

        QUERY_STRING = '&'.join(QUERY)
        LOGGER.debug('QUERY_STRING: {}'.format(QUERY_STRING))
        SEARCH_STRING = 'self.db.search({})'.format(QUERY_STRING)
        LOGGER.debug('SEARCH_STRING: {}'.format(SEARCH_STRING))

        LOGGER.debug('querying database')
        if len(QUERY) > 0:
            LOGGER.debug('running eval on {}'.format(SEARCH_STRING))
            results = eval(SEARCH_STRING)
        else:
            results = self.db.all()

        feature_collection['numberMatched'] = len(results)

        if resulttype == 'hits':
            return feature_collection

        for r in results:
            for e in self.excludes:
                del r['properties'][e]

        len_results = len(results)

        LOGGER.debug('Results found: {}'.format(len_results))

        if len_results > limit:
            returned = limit
        else:
            returned = len_results

        feature_collection['numberReturned'] = returned

        if sortby:
            LOGGER.debug('Sorting results')
            if sortby[0]['order'] == '-':
                sort_reverse = True
            else:
                sort_reverse = False

            results.sort(key=lambda k: k['properties'][sortby[0]['property']],
                         reverse=sort_reverse)

        feature_collection['features'] = results[startindex:startindex + limit]

        return feature_collection
コード例 #23
0
    def query(self,
              startindex=0,
              limit=10,
              resulttype='results',
              bbox=[],
              datetime=None,
              properties=[],
              sortby=[]):
        """
        Query Postgis for all the content.
        e,g: http://localhost:5000/collections/hotosm_bdi_waterways/items?
        limit=1&resulttype=results

        :param startindex: starting record to return (default 0)
        :param limit: number of records to return (default 10)
        :param resulttype: return results or hit limit (default results)
        :param bbox: bounding box [minx,miny,maxx,maxy]
        :param datetime: temporal (datestamp or extent)
        :param properties: list of tuples (name, value)
        :param sortby: list of dicts (property, order)

        :returns: GeoJSON FeaturesCollection
        """
        LOGGER.debug('Querying PostGIS')

        if resulttype == 'hits':

            with DatabaseConnection(self.conn_dic, self.table,
                                    context="hits") as db:
                cursor = db.conn.cursor(cursor_factory=RealDictCursor)

                where_clause = self.__get_where_clauses(properties=properties,
                                                        bbox=bbox)
                sql_query = SQL("SELECT COUNT(*) as hits from {} {}").\
                    format(Identifier(self.table), where_clause)
                try:
                    cursor.execute(sql_query)
                except Exception as err:
                    LOGGER.error(
                        'Error executing sql_query: {}: {}'.format(
                            sql_query.as_string(cursor)), err)
                    raise ProviderQueryError()

                hits = cursor.fetchone()["hits"]

            return self.__response_feature_hits(hits)

        end_index = startindex + limit

        with DatabaseConnection(self.conn_dic, self.table) as db:
            cursor = db.conn.cursor(cursor_factory=RealDictCursor)

            where_clause = self.__get_where_clauses(properties=properties,
                                                    bbox=bbox)

            sql_query = SQL("DECLARE \"geo_cursor\" CURSOR FOR \
             SELECT DISTINCT {},ST_AsGeoJSON({}) FROM {}{}"                                                           ).\
                format(db.columns,
                       Identifier(self.geom),
                       Identifier(self.table),
                       where_clause)

            LOGGER.debug('SQL Query: {}'.format(sql_query.as_string(cursor)))
            LOGGER.debug('Start Index: {}'.format(startindex))
            LOGGER.debug('End Index: {}'.format(end_index))
            try:
                cursor.execute(sql_query)
                for index in [startindex, limit]:
                    cursor.execute(
                        "fetch forward {} from geo_cursor".format(index))
            except Exception as err:
                LOGGER.error('Error executing sql_query: {}'.format(
                    sql_query.as_string(cursor)))
                LOGGER.error(err)
                raise ProviderQueryError()

            row_data = cursor.fetchall()

            feature_collection = {'type': 'FeatureCollection', 'features': []}

            for rd in row_data:
                feature_collection['features'].append(
                    self.__response_feature(rd))

            return feature_collection
コード例 #24
0
ファイル: xarray_.py プロジェクト: ksonda/pygeoapi
    def gen_covjson(self, metadata, data, range_type):
        """
        Generate coverage as CoverageJSON representation

        :param metadata: coverage metadata
        :param data: rasterio DatasetReader object
        :param range_type: range type list

        :returns: dict of CoverageJSON representation
        """

        LOGGER.debug('Creating CoverageJSON domain')
        minx, miny, maxx, maxy = metadata['bbox']
        mint, maxt = metadata['time']

        try:
            tmp_min = data.coords[self.y_field].values[0]
        except IndexError:
            tmp_min = data.coords[self.y_field].values
        try:
            tmp_max = data.coords[self.y_field].values[-1]
        except IndexError:
            tmp_max = data.coords[self.y_field].values

        if tmp_min > tmp_max:
            LOGGER.debug('Reversing direction of {}'.format(self.y_field))
            miny = tmp_max
            maxy = tmp_min

        cj = {
            'type': 'Coverage',
            'domain': {
                'type':
                'Domain',
                'domainType':
                'Grid',
                'axes': {
                    'x': {
                        'start': minx,
                        'stop': maxx,
                        'num': metadata['width']
                    },
                    'y': {
                        'start': maxy,
                        'stop': miny,
                        'num': metadata['height']
                    },
                    self.time_field: {
                        'start': mint,
                        'stop': maxt,
                        'num': metadata['time_steps']
                    }
                },
                'referencing': [{
                    'coordinates': ['x', 'y'],
                    'system': {
                        'type': self._coverage_properties['crs_type'],
                        'id': self._coverage_properties['bbox_crs']
                    }
                }]
            },
            'parameters': {},
            'ranges': {}
        }

        for variable in range_type:
            pm = self._get_parameter_metadata(variable,
                                              self._data[variable].attrs)

            parameter = {
                'type': 'Parameter',
                'description': pm['description'],
                'unit': {
                    'symbol': pm['unit_label']
                },
                'observedProperty': {
                    'id': pm['observed_property_id'],
                    'label': {
                        'en': pm['observed_property_name']
                    }
                }
            }

            cj['parameters'][pm['id']] = parameter

        try:
            for key in cj['parameters'].keys():
                cj['ranges'][key] = {
                    'type':
                    'NdArray',
                    'dataType':
                    str(self._data[variable].dtype),
                    'axisNames':
                    ['y', 'x', self._coverage_properties['time_axis_label']],
                    'shape': [
                        metadata['height'], metadata['width'],
                        metadata['time_steps']
                    ]
                }

                data = data.fillna(None)
                cj['ranges'][key]['values'] = data[key].values.flatten(
                ).tolist()  # noqa
        except IndexError as err:
            LOGGER.warning(err)
            raise ProviderQueryError('Invalid query parameter')

        return cj
コード例 #25
0
    def query(self,
              startindex=0,
              limit=10,
              resulttype='results',
              bbox=[],
              datetime=None,
              properties=[],
              sortby=[]):
        """
        Query Postgis for all the content.
        e,g: http://localhost:5000/collections/hotosm_bdi_waterways/items?
        limit=1&resulttype=results
        :param startindex: starting record to return (default 0)
        :param limit: number of records to return (default 10)
        :param resulttype: return results or hit limit (default results)
        :param bbox: bounding box [minx,miny,maxx,maxy]
        :param datetime: temporal (datestamp or extent)
        :param properties: list of tuples (name, value)
        :param sortby: list of dicts (property, order)
        :returns: GeoJSON FeaturesCollection
        """
        LOGGER.debug('Querying PostGIS ')

        where_conditions = []
        if properties:
            property_clauses = \
                [SQL('{} = {}').format(
                    Identifier(self.properties_to_cols[k]),
                    Literal(v)) for k, v in properties]
            where_conditions += property_clauses
        if bbox:
            bbox_clause = SQL('{} && ST_MakeEnvelope({})').format(
                Identifier(self.geom),
                SQL(', ').join([Literal(bbox_coord) for bbox_coord in bbox]))
            where_conditions.append(bbox_clause)

        if where_conditions:
            where_clause = SQL(' WHERE {}').format(
                SQL(' AND ').join(where_conditions))
        else:
            where_clause = SQL('')

        if resulttype == 'hits':
            select_clause = SQL('SELECT count(*) as hits')
            with DatabaseConnection(self.conn_dic) as db:
                cursor = db.conn.cursor(cursor_factory=RealDictCursor)
                sql_query = SQL('{} FROM {}{}').format(select_clause,
                                                       SQL(self.table),
                                                       where_clause)
                try:
                    cursor.execute(sql_query)
                except Exception as err:
                    LOGGER.error(
                        'Error executing sql_query: {}: {}'.format(
                            sql_query.as_string(cursor)), err)
                    raise ProviderQueryError()

                hits = cursor.fetchone()["hits"]

            return self.__response_feature_hits(hits)
        else:
            select_clause = SQL('SELECT {}, {}, ST_AsGeoJSON({})').format(
                self.columns, Identifier(self.id_field), Identifier(self.geom))

            with DatabaseConnection(self.conn_dic) as db:
                cursor = db.conn.cursor(cursor_factory=RealDictCursor)
                sql_query = \
                    SQL('DECLARE "geo_cursor" CURSOR FOR {} FROM {} {}'
                        ).format(select_clause,
                                 SQL(self.table),
                                 where_clause)

                LOGGER.debug('SQL Query: {}'.format(
                    sql_query.as_string(cursor)))
                LOGGER.debug('Start Index: {}'.format(startindex))
                LOGGER.debug('End Index: {}'.format(startindex + limit))
                try:
                    cursor.execute(sql_query)
                    for index in [startindex, limit]:
                        cursor.execute(
                            'fetch forward {} from geo_cursor'.format(index))
                except Exception as err:
                    LOGGER.error('Error executing sql_query: {}'.format(
                        sql_query.as_string(cursor)))
                    LOGGER.error(err)
                    raise ProviderQueryError()

                row_data = cursor.fetchall()

            return {
                'type': 'FeatureCollection',
                'features': [self.__response_feature(rd) for rd in row_data]
            }
コード例 #26
0
    def query(self,
              range_subset=[1],
              subsets={},
              bbox=[],
              datetime_=None,
              format_='json',
              **kwargs):
        """
        Extract data from collection collection
        :param range_subset: variable
        :param subsets: dict of subset names with lists of ranges
        :param bbox: bounding box [minx,miny,maxx,maxy]
        :param datetime_: temporal (datestamp or extent)
        :param format_: data format of output
        :returns: coverage data as dict of CoverageJSON or native format
        """

        nbits = 16

        bands = range_subset
        LOGGER.debug('Bands: {}, subsets: {}'.format(bands, subsets))

        args = {'indexes': None}
        shapes = []

        if all([
                self._coverage_properties['x_axis_label'] in subsets,
                self._coverage_properties['y_axis_label'] in subsets,
                len(bbox) > 0
        ]):
            msg = 'bbox and subsetting by coordinates are exclusive'
            LOGGER.warning(msg)
            raise ProviderQueryError(msg)

        if len(bbox) > 0:
            minx, miny, maxx, maxy = bbox

            crs_src = CRS.from_epsg(4326)
            crs_dest = CRS.from_string(self.crs)

            LOGGER.debug('source bbox CRS and data CRS are different')
            LOGGER.debug('reprojecting bbox into native coordinates')

            temp_geom_min = {"type": "Point", "coordinates": [minx, miny]}
            temp_geom_max = {"type": "Point", "coordinates": [maxx, maxy]}
            temp_geom_minup = {"type": "Point", "coordinates": [minx, maxy]}
            temp_geom_maxdown = {"type": "Point", "coordinates": [maxx, miny]}

            min_coord = rasterio.warp.transform_geom(crs_src, crs_dest,
                                                     temp_geom_min)
            minx2, miny2 = min_coord['coordinates']

            max_coord = rasterio.warp.transform_geom(crs_src, crs_dest,
                                                     temp_geom_max)
            maxx2, maxy2 = max_coord['coordinates']

            upleft_coord = rasterio.warp.transform_geom(
                crs_src, crs_dest, temp_geom_minup)
            minx2up, maxy2up = upleft_coord['coordinates']

            downright_coord = rasterio.warp.transform_geom(
                crs_src, crs_dest, temp_geom_maxdown)
            maxx2down, miny2down = downright_coord['coordinates']

            LOGGER.debug('Source coordinates: {}'.format(
                [minx, miny, maxx, maxy]))
            LOGGER.debug('Destination coordinates: {}'.format(
                [minx2, miny2, maxx2, maxy2]))

            shapes = [{
                'type':
                'Polygon',
                'coordinates': [[
                    [minx2, miny2],
                    [minx2up, maxy2up],
                    [maxx2, maxy2],
                    [maxx2down, miny2down],
                    [minx2, miny2],
                ]]
            }]

        elif (self._coverage_properties['x_axis_label'] in subsets
              and self._coverage_properties['y_axis_label'] in subsets):
            LOGGER.debug('Creating spatial subset')

            x = self._coverage_properties['x_axis_label']
            y = self._coverage_properties['y_axis_label']

            shapes = [{
                'type':
                'Polygon',
                'coordinates': [[[subsets[x][0], subsets[y][0]],
                                 [subsets[x][0], subsets[y][1]],
                                 [subsets[x][1], subsets[y][1]],
                                 [subsets[x][1], subsets[y][0]],
                                 [subsets[x][0], subsets[y][0]]]]
            }]

        date_file_list = False

        if datetime_:

            if '/' not in datetime_:
                try:
                    period = datetime.strptime(
                        datetime_, '%Y-%m-%dT%HZ').strftime('%Y%m%d%H')
                    self.data = [v for v in self.file_list if period in v][0]
                except IndexError as err:
                    msg = 'Datetime value invalid or out of time domain'
                    LOGGER.error(err)
                    raise ProviderQueryError(msg)

            else:
                self.get_file_list(self.var, datetime_)
                date_file_list = self.file_list

        if bands:
            LOGGER.debug('Selecting bands')
            args['indexes'] = list(map(int, bands))

        with rasterio.open(self.data) as _data:
            LOGGER.debug('Creating output coverage metadata')
            _data._crs = self.crs
            _data._transform = self.transform

            out_meta = _data.meta

            if self.options is not None:
                LOGGER.debug('Adding dataset options')
                for key, value in self.options.items():
                    out_meta[key] = value

            if shapes:  # spatial subset
                try:
                    LOGGER.debug('Clipping data with bbox')
                    out_image, out_transform = rasterio.mask.mask(
                        _data,
                        filled=False,
                        shapes=shapes,
                        crop=True,
                        indexes=args['indexes'])
                except ValueError as err:
                    LOGGER.error(err)
                    raise ProviderQueryError(err)

                out_meta.update({
                    'driver': self.native_format,
                    'height': out_image.shape[1],
                    'width': out_image.shape[2],
                    'transform': out_transform
                })
            else:  # no spatial subset
                LOGGER.debug('Creating data in memory with band selection')
                out_image = _data.read(indexes=args['indexes'])

            if bbox:
                out_meta['bbox'] = [bbox[0], bbox[1], bbox[2], bbox[3]]
            elif shapes:
                out_meta['bbox'] = [
                    subsets[x][0], subsets[y][0], subsets[x][1], subsets[y][1]
                ]
            else:
                out_meta['bbox'] = [
                    _data.bounds.left, _data.bounds.bottom, _data.bounds.right,
                    _data.bounds.top
                ]

            out_meta['units'] = _data.units

            self.filename = self.data.split('/')[-1].replace('*', '')

            # CovJSON output does not support multiple bands yet
            # Only the first timestep is returned
            if format_ == 'json':

                if date_file_list:
                    err = 'Date range not yet supported for CovJSON output'
                    LOGGER.error(err)
                    raise ProviderQueryError(err)
                else:
                    LOGGER.debug('Creating output in CoverageJSON')
                    out_meta['bands'] = [1]
                    return self.gen_covjson(out_meta, out_image)
            else:
                if date_file_list:
                    out_meta.update(count=len(date_file_list))

                    LOGGER.debug('Serializing data in memory')
                    with MemoryFile() as memfile:
                        with memfile.open(**out_meta, nbits=nbits) as dest:
                            for id, layer in enumerate(date_file_list,
                                                       start=1):
                                with rasterio.open(layer) as src1:
                                    src1._crs = self.crs
                                    src1._transform = self.transform
                                    if shapes:  # spatial subset
                                        try:
                                            LOGGER.debug('Clipping data')
                                            out_image, out_transform = \
                                                rasterio.mask.mask(
                                                    src1,
                                                    filled=False,
                                                    shapes=shapes,
                                                    crop=True,
                                                    indexes=args['indexes'])
                                        except ValueError as err:
                                            LOGGER.error(err)
                                            raise ProviderQueryError(err)
                                    else:
                                        out_image = src1.read(
                                            indexes=args['indexes'])

                                    dest.write_band(id, out_image[0])

                        # return data in native format
                        LOGGER.debug('Returning data in native format')
                        return memfile.read()
                else:
                    LOGGER.debug('Serializing data in memory')
                    out_meta.update(count=len(args['indexes']))
                    with MemoryFile() as memfile:
                        with memfile.open(**out_meta, nbits=nbits) as dest:
                            dest.write(out_image)

                        # return data in native format
                        LOGGER.debug('Returning data in native format')
                        return memfile.read()
コード例 #27
0
ファイル: csv_.py プロジェクト: tomkralidis/pygeoapi
    def _load(self, startindex=0, limit=10, resulttype='results',
              identifier=None, bbox=[], datetime_=None, properties=[],
              select_properties=[], skip_geometry=False, q=None):
        """
        Load CSV data

        :param startindex: starting record to return (default 0)
        :param limit: number of records to return (default 10)
        :param datetime_: temporal (datestamp or extent)
        :param resulttype: return results or hit limit (default results)
        :param properties: list of tuples (name, value)
        :param select_properties: list of property names
        :param skip_geometry: bool of whether to skip geometry (default False)
        :param q: full-text search term(s)

        :returns: dict of GeoJSON FeatureCollection
        """

        found = False
        result = None
        feature_collection = {
            'type': 'FeatureCollection',
            'features': []
        }

        with open(self.data) as ff:
            LOGGER.debug('Serializing DictReader')
            data_ = csv.DictReader(ff)
            if resulttype == 'hits':
                LOGGER.debug('Returning hits only')
                feature_collection['numberMatched'] = len(list(data_))
                return feature_collection
            LOGGER.debug('Slicing CSV rows')
            for row in itertools.islice(data_, startindex, startindex+limit):
                feature = {'type': 'Feature'}
                feature['id'] = row.pop(self.id_field)
                if not skip_geometry:
                    feature['geometry'] = {
                        'type': 'Point',
                        'coordinates': [
                            float(row.pop(self.geometry_x)),
                            float(row.pop(self.geometry_y))
                        ]
                    }
                else:
                    feature['geometry'] = None
                if self.properties or select_properties:
                    feature['properties'] = OrderedDict()
                    for p in set(self.properties) | set(select_properties):
                        try:
                            feature['properties'][p] = row[p]
                        except KeyError as err:
                            LOGGER.error(err)
                            raise ProviderQueryError()
                else:
                    feature['properties'] = row

                if identifier is not None and feature['id'] == identifier:
                    found = True
                    result = feature
                feature_collection['features'].append(feature)
                feature_collection['numberMatched'] = \
                    len(feature_collection['features'])

        if identifier is not None and not found:
            return None
        elif identifier is not None and found:
            return result

        feature_collection['numberReturned'] = len(
            feature_collection['features'])

        return feature_collection
コード例 #28
0
    def query(self,
              offset=0,
              limit=10,
              resulttype='results',
              bbox=[],
              datetime_=None,
              properties=[],
              sortby=[],
              select_properties=[],
              skip_geometry=False,
              q=None,
              **kwargs):
        """
        Query Postgis for all the content.
        e,g: http://localhost:5000/collections/hotosm_bdi_waterways/items?
        limit=1&resulttype=results

        :param offset: starting record to return (default 0)
        :param limit: number of records to return (default 10)
        :param resulttype: return results or hit limit (default results)
        :param bbox: bounding box [minx,miny,maxx,maxy]
        :param datetime_: temporal (datestamp or extent)
        :param properties: list of tuples (name, value)
        :param sortby: list of dicts (property, order)
        :param select_properties: list of property names
        :param skip_geometry: bool of whether to skip geometry (default False)
        :param q: full-text search term(s)

        :returns: GeoJSON FeaturesCollection
        """
        LOGGER.debug('Querying PostGIS')

        if resulttype == 'hits':

            with DatabaseConnection(self.conn_dic,
                                    self.table,
                                    properties=self.properties,
                                    context="hits") as db:
                cursor = db.conn.cursor(cursor_factory=RealDictCursor)

                where_clause = self.__get_where_clauses(properties=properties,
                                                        bbox=bbox)
                sql_query = SQL("SELECT COUNT(*) as hits from {} {}").\
                    format(Identifier(self.table), where_clause)
                try:
                    cursor.execute(sql_query)
                except Exception as err:
                    LOGGER.error('Error executing sql_query: {}: {}'.format(
                        sql_query.as_string(cursor), err))
                    raise ProviderQueryError()

                hits = cursor.fetchone()["hits"]

            return self.__response_feature_hits(hits)

        end_index = offset + limit

        with DatabaseConnection(self.conn_dic,
                                self.table,
                                properties=self.properties) as db:
            cursor = db.conn.cursor(cursor_factory=RealDictCursor)

            props = db.columns if select_properties == [] else \
                SQL(', ').join([Identifier(p) for p in select_properties])

            geom = SQL('') if skip_geometry else \
                SQL(",ST_AsGeoJSON({})").format(Identifier(self.geom))

            where_clause = self.__get_where_clauses(properties=properties,
                                                    bbox=bbox)

            orderby = self._make_orderby(sortby) if sortby else SQL('')

            sql_query = SQL("DECLARE \"geo_cursor\" CURSOR FOR \
             SELECT DISTINCT {} {} FROM {} {} {}"                                                 ).\
                format(props,
                       geom,
                       Identifier(self.table),
                       where_clause,
                       orderby)

            LOGGER.debug('SQL Query: {}'.format(sql_query.as_string(cursor)))
            LOGGER.debug('Start Index: {}'.format(offset))
            LOGGER.debug('End Index: {}'.format(end_index))
            try:
                cursor.execute(sql_query)
                for index in [offset, limit]:
                    cursor.execute(
                        "fetch forward {} from geo_cursor".format(index))
            except Exception as err:
                LOGGER.error('Error executing sql_query: {}'.format(
                    sql_query.as_string(cursor)))
                LOGGER.error(err)
                raise ProviderQueryError()

            row_data = cursor.fetchall()

            feature_collection = {'type': 'FeatureCollection', 'features': []}

            for rd in row_data:
                feature_collection['features'].append(
                    self.__response_feature(rd))

            return feature_collection
コード例 #29
0
ファイル: xarray_.py プロジェクト: ksonda/pygeoapi
    def query(self,
              range_subset=[],
              subsets={},
              bbox=[],
              datetime_=None,
              format_='json'):
        """
         Extract data from collection collection

        :param range_subset: list of data variables to return (all if blank)
        :param subsets: dict of subset names with lists of ranges
        :param bbox: bounding box [minx,miny,maxx,maxy]
        :param datetime_: temporal (datestamp or extent)
        :param format_: data format of output

        :returns: coverage data as dict of CoverageJSON or native format
        """

        if not range_subset and not subsets and format_ != 'json':
            LOGGER.debug('No parameters specified, returning native data')
            if format_ == 'zarr':
                return _get_zarr_data(self._data)
            else:
                return read_data(self.data)

        if len(range_subset) < 1:
            range_subset = self.fields

        data = self._data[[*range_subset]]

        if any([
                self._coverage_properties['x_axis_label'] in subsets,
                self._coverage_properties['y_axis_label'] in subsets,
                self._coverage_properties['time_axis_label'] in subsets,
                datetime_ is not None
        ]):

            LOGGER.debug('Creating spatio-temporal subset')

            query_params = {}
            for key, val in subsets.items():
                LOGGER.debug('Processing subset: {}'.format(key))
                if data.coords[key].values[0] > data.coords[key].values[-1]:
                    LOGGER.debug('Reversing slicing from high to low')
                    query_params[key] = slice(val[1], val[0])
                else:
                    query_params[key] = slice(val[0], val[1])

            if bbox:
                if all([
                        self._coverage_properties['x_axis_label'] in subsets,
                        self._coverage_properties['y_axis_label'] in subsets,
                        len(bbox) > 0
                ]):
                    msg = 'bbox and subsetting by coordinates are exclusive'
                    LOGGER.warning(msg)
                    raise ProviderQueryError(msg)
                else:
                    query_params['x_axis_label'] = slice(bbox[0], bbox[2])
                    query_params['y_axis_label'] = slice(bbox[1], bbox[3])

            if datetime_ is not None:
                if self._coverage_properties['time_axis_label'] in subsets:
                    msg = 'datetime and temporal subsetting are exclusive'
                    LOGGER.error(msg)
                    raise ProviderQueryError(msg)
                else:
                    if '/' in datetime_:
                        begin, end = datetime_.split('/')
                        if begin < end:
                            query_params[self.time_field] = slice(begin, end)
                        else:
                            LOGGER.debug('Reversing slicing from high to low')
                            query_params[self.time_field] = slice(end, begin)
                    else:
                        query_params[self.time_field] = datetime_

            LOGGER.debug('Query parameters: {}'.format(query_params))
            try:
                data = data.sel(query_params)
            except Exception as err:
                LOGGER.warning(err)
                raise ProviderQueryError(err)

        if (any([
                data.coords[self.x_field].size == 0,
                data.coords[self.y_field].size == 0,
                data.coords[self.time_field].size == 0
        ])):
            msg = 'No data found'
            LOGGER.warning(msg)
            raise ProviderNoDataError(msg)

        out_meta = {
            'bbox': [
                data.coords[self.x_field].values[0],
                data.coords[self.y_field].values[0],
                data.coords[self.x_field].values[-1],
                data.coords[self.y_field].values[-1]
            ],
            "time": [
                _to_datetime_string(data.coords[self.time_field].values[0]),
                _to_datetime_string(data.coords[self.time_field].values[-1])
            ],
            "driver":
            "xarray",
            "height":
            data.dims[self.y_field],
            "width":
            data.dims[self.x_field],
            "time_steps":
            data.dims[self.time_field],
            "variables":
            {var_name: var.attrs
             for var_name, var in data.variables.items()}
        }

        LOGGER.debug('Serializing data in memory')
        if format_ == 'json':
            LOGGER.debug('Creating output in CoverageJSON')
            return self.gen_covjson(out_meta, data, range_subset)
        elif format_ == 'zarr':
            LOGGER.debug('Returning data in native zarr format')
            return _get_zarr_data(data)
        else:  # return data in native format
            with tempfile.TemporaryFile() as fp:
                LOGGER.debug('Returning data in native NetCDF format')
                fp.write(data.to_netcdf())
                fp.seek(0)
                return fp.read()
コード例 #30
0
    def query(self,
              range_subset=['spei'],
              subsets={},
              bbox=[],
              datetime_=None,
              format_='json'):
        """
         Extract data from collection collection

        :param range_subset: empty for SPEI
        :param subsets: dict of subset names with lists of ranges
        :param bbox: bounding box [minx,miny,maxx,maxy]
        :param datetime: temporal (datestamp or extent)
        :param format_: data format of output

        :returns: coverage data as dict of CoverageJSON or native format
        """

        if 'scenario' in subsets:
            scenario = subsets['scenario']
            try:
                if len(scenario) > 1:
                    msg = 'multiple scenario are not supported'
                    LOGGER.error(msg)
                    raise ProviderQueryError(msg)
                elif scenario[0] not in ['RCP2.6', 'hist']:
                    scenario_value = scenario[0].replace('RCP', '')
                    self.data = self.data.replace('2.6', scenario_value)
            except Exception as err:
                LOGGER.error(err)
                raise ProviderQueryError(err)

            subsets.pop('scenario')

        if 'percentile' in subsets:
            percentile = subsets['percentile']

            try:
                if percentile != [50]:
                    pctl = str(percentile[0])
                    self.data = self.data.replace('pctl50',
                                                  'pctl{}'.format(pctl))

            except Exception as err:
                LOGGER.error(err)
                raise ProviderQueryError(err)

            subsets.pop('percentile')

        self._data = open_data(self.data)

        if not range_subset and not subsets and format_ != 'json':
            LOGGER.debug('No parameters specified, returning native data')
            if format_ == 'zarr':
                return _get_zarr_data(self._data)
            else:
                return read_data(self.data)

        data = self._data[[*range_subset]]

        if any([
                self._coverage_properties['x_axis_label'] in subsets,
                self._coverage_properties['y_axis_label'] in subsets,
                self._coverage_properties['time_axis_label'] in subsets, bbox,
                datetime_ is not None
        ]):

            LOGGER.debug('Creating spatio-temporal subset')

            query_params = {}
            for key, val in subsets.items():
                val_0 = self._data.coords[key].values[0]
                val_1 = self._data.coords[key].values[-1]
                if val_0 > val_1:
                    LOGGER.debug('Reversing slicing low/high')
                    query_params[key] = slice(val[1], val[0])
                else:
                    query_params[key] = slice(val[0], val[1])

            if bbox:
                if all([
                        self._coverage_properties['x_axis_label'] in subsets,
                        self._coverage_properties['y_axis_label'] in subsets,
                        len(bbox) > 0
                ]):
                    msg = 'bbox and subsetting by coordinates are exclusive'
                    LOGGER.warning(msg)
                    raise ProviderQueryError(msg)
                else:
                    query_params[self._coverage_properties['x_axis_label']] = \
                        slice(bbox[0], bbox[2])
                    query_params[self._coverage_properties['y_axis_label']] = \
                        slice(bbox[3], bbox[1])

            if datetime_ is not None:
                if self._coverage_properties['time_axis_label'] in subsets:
                    msg = 'datetime and temporal subsetting are exclusive'
                    LOGGER.error(msg)
                else:
                    if '/' in datetime_:

                        begin, end = datetime_.split('/')

                        if begin < end:
                            query_params[self.time_field] = slice(begin, end)
                        else:
                            LOGGER.debug('Reversing slicing from high to low')
                            query_params[self.time_field] = slice(end, begin)
                    else:
                        query_params[self.time_field] = datetime_

            LOGGER.debug('Query parameters: {}'.format(query_params))
            try:
                data = self._data.loc[query_params]
            except Exception as err:
                LOGGER.warning(err)
                raise ProviderQueryError(err)

        if (any([
                data.coords[self.x_field].size == 0,
                data.coords[self.y_field].size == 0
        ])):
            msg = 'No data found'
            LOGGER.warning(msg)
            raise ProviderNoDataError(msg)

        out_meta = {
            'bbox': [
                data.coords[self.x_field].values[0],
                data.coords[self.y_field].values[0],
                data.coords[self.x_field].values[-1],
                data.coords[self.y_field].values[-1]
            ],
            "time": [
                self._to_datetime_string(
                    data.coords[self.time_field].values[0]),
                self._to_datetime_string(
                    data.coords[self.time_field].values[-1])
            ],
            "driver":
            "xarray",
            "height":
            data.dims[self.y_field],
            "width":
            data.dims[self.x_field],
            "time_steps":
            data.dims[self.time_field],
            "variables":
            {var_name: var.attrs
             for var_name, var in data.variables.items()}
        }

        LOGGER.debug('Serializing data in memory')
        if format_ == 'json':
            LOGGER.debug('Creating output in CoverageJSON')
            return self.gen_covjson(out_meta, data, range_subset)
        elif format_ == 'zarr':
            LOGGER.debug('Returning data in native zarr format')
            return _get_zarr_data(data)
        else:  # return data in native format
            with tempfile.TemporaryFile() as fp:
                LOGGER.debug('Returning data in native NetCDF format')
                fp.write(data.to_netcdf())
                fp.seek(0)
                return fp.read()