Ejemplo n.º 1
0
    def __init__(self, provider_def):
        """
        Initialize object

        :param provider_def: provider definition

        :returns: pygeoapi.provider.elasticsearch_.ElasticsearchProvider
        """

        super().__init__(provider_def)

        self.es_host, self.index_name = self.data.rsplit('/', 1)

        LOGGER.debug('Setting Elasticsearch properties')
        self.is_gdal = False

        LOGGER.debug('host: {}'.format(self.es_host))
        LOGGER.debug('index: {}'.format(self.index_name))

        self.type_name = 'FeatureCollection'
        self.url_parsed = urlparse(self.es_host)

        LOGGER.debug('Connecting to Elasticsearch')

        if self.url_parsed.port is None:  # proxy to default HTTP(S) port
            if self.url_parsed.scheme == 'https':
                port = 443
            else:
                port = 80
        else:  # was set explictly
            port = self.url_parsed.port

        url_settings = {
            'scheme': self.url_parsed.scheme,
            'host': self.url_parsed.hostname,
            'port': port
        }

        if self.url_parsed.path:
            url_settings['url_prefix'] = self.url_parsed.path

        LOGGER.debug('URL settings: {}'.format(url_settings))
        LOGGER.debug('Connecting to Elasticsearch')
        self.es = Elasticsearch([url_settings])
        if not self.es.ping():
            msg = 'Cannot connect to Elasticsearch'
            LOGGER.error(msg)
            raise ProviderConnectionError(msg)

        LOGGER.debug('Determining ES version')
        v = self.es.info()['version']['number'][:3]
        if float(v) < 7:
            msg = 'only ES 7+ supported'
            LOGGER.error(msg)
            raise ProviderConnectionError(msg)

        LOGGER.debug('Grabbing field information')
        try:
            self.fields = self.get_fields()
        except exceptions.NotFoundError as err:
            LOGGER.error(err)
            raise ProviderQueryError(err)
Ejemplo n.º 2
0
    def query(self, startindex=0, limit=10, resulttype='results',
              bbox=[], datetime_=None, properties=[], sortby=[],
              select_properties=[], skip_geometry=False):
        """
        Query Postgis for all the content.
        e,g: http://localhost:5000/collections/hotosm_bdi_waterways/items?
        limit=1&resulttype=results

        :param startindex: starting record to return (default 0)
        :param limit: number of records to return (default 10)
        :param resulttype: return results or hit limit (default results)
        :param bbox: bounding box [minx,miny,maxx,maxy]
        :param datetime_: temporal (datestamp or extent)
        :param properties: list of tuples (name, value)
        :param sortby: list of dicts (property, order)
        :param select_properties: list of property names
        :param skip_geometry: bool of whether to skip geometry (default False)

        :returns: GeoJSON FeaturesCollection
        """
        LOGGER.debug('Querying PostGIS')

        if resulttype == 'hits':

            with DatabaseConnection(self.conn_dic,
                                    self.table, context="hits") as db:
                cursor = db.conn.cursor(cursor_factory=RealDictCursor)

                where_clause = self.__get_where_clauses(
                    properties=properties, bbox=bbox)
                sql_query = SQL("SELECT COUNT(*) as hits from {} {}").\
                    format(Identifier(self.table), where_clause)
                try:
                    cursor.execute(sql_query)
                except Exception as err:
                    LOGGER.error('Error executing sql_query: {}: {}'.format(
                        sql_query.as_string(cursor), err))
                    raise ProviderQueryError()

                hits = cursor.fetchone()["hits"]

            return self.__response_feature_hits(hits)

        end_index = startindex + limit

        with DatabaseConnection(self.conn_dic, self.table) as db:
            cursor = db.conn.cursor(cursor_factory=RealDictCursor)

            where_clause = self.__get_where_clauses(
                properties=properties, bbox=bbox)

            sql_query = SQL("DECLARE \"geo_cursor\" CURSOR FOR \
             SELECT DISTINCT {},ST_AsGeoJSON({}) FROM {}{}").\
                format(db.columns,
                       Identifier(self.geom),
                       Identifier(self.table),
                       where_clause)

            LOGGER.debug('SQL Query: {}'.format(sql_query.as_string(cursor)))
            LOGGER.debug('Start Index: {}'.format(startindex))
            LOGGER.debug('End Index: {}'.format(end_index))
            try:
                cursor.execute(sql_query)
                for index in [startindex, limit]:
                    cursor.execute("fetch forward {} from geo_cursor"
                                   .format(index))
            except Exception as err:
                LOGGER.error('Error executing sql_query: {}'.format(
                    sql_query.as_string(cursor)))
                LOGGER.error(err)
                raise ProviderQueryError()

            row_data = cursor.fetchall()

            feature_collection = {
                'type': 'FeatureCollection',
                'features': []
            }

            for rd in row_data:
                feature_collection['features'].append(
                    self.__response_feature(rd))

            return feature_collection
Ejemplo n.º 3
0
    def query(self, startindex=0, limit=10, resulttype='results',
              bbox=[], datetime_=None, properties=[], sortby=[],
              select_properties=[], skip_geometry=False, q=None,
              filterq=None, **kwargs):
        """
        query Elasticsearch index

        :param startindex: starting record to return (default 0)
        :param limit: number of records to return (default 10)
        :param resulttype: return results or hit limit (default results)
        :param bbox: bounding box [minx,miny,maxx,maxy]
        :param datetime_: temporal (datestamp or extent)
        :param properties: list of tuples (name, value)
        :param sortby: list of dicts (property, order)
        :param select_properties: list of property names
        :param skip_geometry: bool of whether to skip geometry (default False)
        :param q: full-text search term(s)
        :param filterq: filter object

        :returns: dict of 0..n GeoJSON features
        """

        query = {'track_total_hits': True, 'query': {'bool': {'filter': []}}}
        filter_ = []

        feature_collection = {
            'type': 'FeatureCollection',
            'features': []
        }

        if resulttype == 'hits':
            LOGGER.debug('hits only specified')
            limit = 0

        if bbox:
            LOGGER.debug('processing bbox parameter')
            minx, miny, maxx, maxy = bbox
            bbox_filter = {
                'geo_shape': {
                    'geometry': {
                        'shape': {
                            'type': 'envelope',
                            'coordinates': [[minx, maxy], [maxx, miny]]
                        },
                        'relation': 'intersects'
                    }
                }
            }

            query['query']['bool']['filter'].append(bbox_filter)

        if datetime_ is not None:
            LOGGER.debug('processing datetime parameter')
            if self.time_field is None:
                LOGGER.error('time_field not enabled for collection')
                raise ProviderQueryError()

            time_field = self.mask_prop(self.time_field)

            if '/' in datetime_:  # envelope
                LOGGER.debug('detected time range')
                time_begin, time_end = datetime_.split('/')

                range_ = {
                    'range': {
                        time_field: {
                            'gte': time_begin,
                            'lte': time_end
                        }
                    }
                }
                if time_begin == '..':
                    range_['range'][time_field].pop('gte')
                elif time_end == '..':
                    range_['range'][time_field].pop('lte')

                filter_.append(range_)

            else:  # time instant
                LOGGER.debug('detected time instant')
                filter_.append({'match': {time_field: datetime_}})

            LOGGER.debug(filter_)
            query['query']['bool']['filter'].append(*filter_)

        if properties:
            LOGGER.debug('processing properties')
            for prop in properties:
                prop_name = self.mask_prop(prop[0])
                pf = {
                    'match': {
                        prop_name: {
                            'query': prop[1]
                        }
                    }
                }
                query['query']['bool']['filter'].append(pf)

            if '|' not in prop[1]:
                pf['match'][prop_name]['minimum_should_match'] = '100%'

        if sortby:
            LOGGER.debug('processing sortby')
            query['sort'] = []
            for sort in sortby:
                LOGGER.debug('processing sort object: {}'.format(sort))

                sp = sort['property']

                if self.fields[sp]['type'] == 'string':
                    LOGGER.debug('setting ES .raw on property')
                    sort_property = '{}.raw'.format(self.mask_prop(sp))
                else:
                    sort_property = self.mask_prop(sp)

                sort_order = 'asc'
                if sort['order'] == '-':
                    sort_order = 'desc'

                sort_ = {
                    sort_property: {
                        'order': sort_order
                    }
                }
                query['sort'].append(sort_)

        if q is not None:
            LOGGER.debug('Adding free-text search')
            query['query']['bool']['must'] = {'query_string': {'query': q}}

            query['_source'] = {
                'excludes': [
                    'properties._metadata-payload',
                    'properties._metadata-schema',
                    'properties._metadata-format'
                ]
            }

        if self.properties or select_properties:
            LOGGER.debug('including specified fields: {}'.format(
                self.properties))
            query['_source'] = {
                'includes': list(map(self.mask_prop,
                                 set(self.properties) | set(select_properties)))  # noqa
            }
            query['_source']['includes'].append(self.mask_prop(self.id_field))
            query['_source']['includes'].append('type')
            query['_source']['includes'].append('geometry')
        if skip_geometry:
            LOGGER.debug('limiting to specified fields: {}'.format(
                select_properties))
            try:
                query['_source']['excludes'] = ['geometry']
            except KeyError:
                query['_source'] = {'excludes': ['geometry']}
        try:
            LOGGER.debug('querying Elasticsearch')
            if filterq:
                LOGGER.debug('adding cql object: {}'.format(filterq.json()))
                query = update_query(input_query=query, cql=filterq)
            LOGGER.debug(json.dumps(query, indent=4))

            LOGGER.debug('Testing for ES scrolling')
            if startindex + limit > 10000:
                gen = helpers.scan(client=self.es, query=query,
                                   preserve_order=True,
                                   index=self.index_name)
                results = {'hits': {'total': limit, 'hits': []}}
                for i in range(startindex + limit):
                    try:
                        if i >= startindex:
                            results['hits']['hits'].append(next(gen))
                        else:
                            next(gen)
                    except StopIteration:
                        break
                results['hits']['total'] = \
                    len(results['hits']['hits']) + startindex
            else:
                results = self.es.search(index=self.index_name,
                                         from_=startindex, size=limit,
                                         body=query)
                results['hits']['total'] = results['hits']['total']['value']

        except exceptions.ConnectionError as err:
            LOGGER.error(err)
            raise ProviderConnectionError()
        except exceptions.RequestError as err:
            LOGGER.error(err)
            raise ProviderQueryError()
        except exceptions.NotFoundError as err:
            LOGGER.error(err)
            raise ProviderQueryError()

        feature_collection['numberMatched'] = results['hits']['total']

        if resulttype == 'hits':
            return feature_collection

        feature_collection['numberReturned'] = len(results['hits']['hits'])

        LOGGER.debug('serializing features')
        for feature in results['hits']['hits']:
            feature_ = self.esdoc2geojson(feature)
            feature_collection['features'].append(feature_)

        return feature_collection
Ejemplo n.º 4
0
    def query(self, range_subset=[], subsets={},
              bbox=[], datetime_=None, format_='json'):
        """
         Extract data from collection collection

        :param range_subset: list of data variables to return
        :param subsets: dict of subset names with lists of ranges
        :param bbox: bounding box [minx,miny,maxx,maxy]
        :param datetime: temporal (datestamp or extent)
        :param format_: data format of output

        :returns: coverage data as dict of CoverageJSON or native format
        """

        if 'scenario' in subsets:
            scenario = subsets['scenario']
            try:
                if len(scenario) > 1:
                    msg = 'multiple scenario are not supported'
                    LOGGER.error(msg)
                    raise ProviderQueryError(msg)
                elif scenario[0] not in ['RCP2.6', 'hist']:
                    scenario_value = scenario[0].replace('RCP', '')
                    self.data = self.data.replace('2.6', scenario_value)
            except Exception as err:
                LOGGER.error(err)
                raise ProviderQueryError(err)

            subsets.pop('scenario')

        if 'percentile' in subsets:
            percentile = subsets['percentile']

            try:
                if percentile != [50]:
                    pctl = str(percentile[0])
                    self.data = self.data.replace('pctl50',
                                                  'pctl{}'.format(pctl))

            except Exception as err:
                LOGGER.error(err)
                raise ProviderQueryError(err)

            subsets.pop('percentile')

        if 'season' in subsets:
            seasonal = subsets['season']

            try:
                if len(seasonal) > 1:
                    msg = 'multiple seasons are not supported'
                    LOGGER.error(msg)
                    raise ProviderQueryError(msg)
                elif seasonal != ['DJF']:
                    season = str(seasonal[0])
                    self.data = self.data.replace('DJF',
                                                  season)

            except Exception as err:
                LOGGER.error(err)
                raise ProviderQueryError(err)

            subsets.pop('season')

        self._data = open_data(self.data)

        if not range_subset and not subsets and format_ != 'json':
            LOGGER.debug('No parameters specified, returning native data')
            if format_ == 'zarr':
                return _get_zarr_data(self._data)
            else:
                return read_data(self.data)

        data = self._data[[*range_subset]]

        if any([self._coverage_properties['x_axis_label'] in subsets,
                self._coverage_properties['y_axis_label'] in subsets,
                self._coverage_properties['time_axis_label'] in subsets,
                bbox,
                datetime_ is not None]):

            LOGGER.debug('Creating spatio-temporal subset')

            query_params = {}
            for key, val in subsets.items():
                if data.coords[key].values[0] > data.coords[key].values[-1]:
                    LOGGER.debug('Reversing slicing low/high')
                    query_params[key] = slice(val[1], val[0])
                else:
                    query_params[key] = slice(val[0], val[1])

            if bbox:
                if all([self._coverage_properties['x_axis_label'] in subsets,
                        self._coverage_properties['y_axis_label'] in subsets,
                        len(bbox) > 0]):
                    msg = 'bbox and subsetting by coordinates are exclusive'
                    LOGGER.warning(msg)
                    raise ProviderQueryError(msg)
                else:
                    query_params[self._coverage_properties['x_axis_label']] = \
                        slice(bbox[0], bbox[2])
                    query_params[self._coverage_properties['y_axis_label']] = \
                        slice(bbox[3], bbox[1])

            if datetime_ is not None:
                if 'avg_20years' in self.data:
                    msg = 'datetime not suported for 20 years average layers'
                    LOGGER.error(msg)
                    raise ProviderQueryError(msg)
                elif self._coverage_properties['time_axis_label'] in subsets:
                    msg = 'datetime and temporal subsetting are exclusive'
                    LOGGER.error(msg)
                else:
                    if '/' in datetime_:

                        begin, end = datetime_.split('/')

                        if begin < end:
                            query_params[self.time_field] = slice(begin, end)
                        else:
                            LOGGER.debug('Reversing slicing from high to low')
                            query_params[self.time_field] = slice(end, begin)
                    else:
                        query_params[self.time_field] = datetime_

            LOGGER.debug('Query parameters: {}'.format(query_params))
            try:
                data = data.loc[query_params]
            except Exception as err:
                LOGGER.warning(err)
                raise ProviderQueryError(err)

        if (any([data.coords[self.x_field].size == 0,
                data.coords[self.y_field].size == 0])):
            msg = 'No data found'
            LOGGER.warning(msg)
            raise ProviderNoDataError(msg)

        out_meta = {
            'bbox': [
                data.coords[self.x_field].values[0],
                data.coords[self.y_field].values[0],
                data.coords[self.x_field].values[-1],
                data.coords[self.y_field].values[-1]
            ],
            "time": [
                self._to_datetime_string(
                    data.coords[self.time_field].values[0]),
                self._to_datetime_string(
                    data.coords[self.time_field].values[-1])
            ],
            "driver": "xarray",
            "height": data.dims[self.y_field],
            "width": data.dims[self.x_field],
            "time_steps": data.dims[self.time_field],
            "variables": {var_name: var.attrs
                          for var_name, var in data.variables.items()}
        }

        LOGGER.debug('Serializing data in memory')
        if format_ == 'json':
            LOGGER.debug('Creating output in CoverageJSON')
            return self.gen_covjson(out_meta, data, range_subset)
        elif format_ == 'zarr':
            LOGGER.debug('Returning data in native zarr format')
            return _get_zarr_data(data)
        # elif format_.lower() == 'geotiff':
        #     if len(range_subset) == 1:
        #         import rioxarray
        #         with tempfile.TemporaryFile() as fp:
        #             LOGGER.debug('Returning data in GeoTIFF format')
        #             data.rio.write_crs("epsg:4326", inplace=True)
        #             data[range_subset[0]].rio.to_raster('/tmp/tmp.tif')
        #             with open('/tmp/tmp.tif') as fp:
        #                 fp.seek(0)
        #                 return fp
        #     else:
        #         err = 'Only one range subset supoported for GeoTIFF'
        #         LOGGER.error(err)
        #         raise ProviderQueryError(err)

        else:  # return data in native format
            with tempfile.TemporaryFile() as fp:
                LOGGER.debug('Returning data in native NetCDF format')
                fp.write(data.to_netcdf())
                fp.seek(0)
                return fp.read()
Ejemplo n.º 5
0
    def _load(self,
              startindex=0,
              limit=10,
              resulttype='results',
              identifier=None,
              bbox=[],
              datetime_=None,
              properties=[],
              select_properties=[],
              skip_geometry=False):
        """
        Load CSV data

        :param startindex: starting record to return (default 0)
        :param limit: number of records to return (default 10)
        :param datetime_: temporal (datestamp or extent)
        :param resulttype: return results or hit limit (default results)
        :param properties: list of tuples (name, value)
        :param select_properties: list of property names
        :param skip_geometry: bool of whether to skip geometry (default False)

        :returns: dict of GeoJSON FeatureCollection
        """

        found = False
        result = None
        feature_collection = {'type': 'FeatureCollection', 'features': []}

        with open(self.data) as ff:
            LOGGER.debug('Serializing DictReader')
            data_ = csv.DictReader(ff)
            if resulttype == 'hits':
                LOGGER.debug('Returning hits only')
                feature_collection['numberMatched'] = len(list(data_))
                return feature_collection
            LOGGER.debug('Slicing CSV rows')
            for row in itertools.islice(data_, startindex, startindex + limit):
                feature = {'type': 'Feature'}
                feature['id'] = row.pop(self.id_field)
                if not skip_geometry:
                    feature['geometry'] = {
                        'type':
                        'Point',
                        'coordinates': [
                            float(row.pop(self.geometry_x)),
                            float(row.pop(self.geometry_y))
                        ]
                    }
                else:
                    feature['geometry'] = None
                if self.properties or select_properties:
                    feature['properties'] = OrderedDict()
                    for p in set(self.properties) | set(select_properties):
                        try:
                            feature['properties'][p] = row[p]
                        except KeyError as err:
                            LOGGER.error(err)
                            raise ProviderQueryError()
                else:
                    feature['properties'] = row

                if identifier is not None and feature['id'] == identifier:
                    found = True
                    result = feature
                feature_collection['features'].append(feature)
                feature_collection['numberMatched'] = \
                    len(feature_collection['features'])

        if identifier is not None and not found:
            return None
        elif identifier is not None and found:
            return result

        feature_collection['numberReturned'] = len(
            feature_collection['features'])

        return feature_collection
Ejemplo n.º 6
0
    def query(self,
              startindex=0,
              limit=10,
              resulttype='results',
              bbox=[],
              datetime=None,
              properties=[],
              sortby=[]):
        """
        query Elasticsearch index

        :param startindex: starting record to return (default 0)
        :param limit: number of records to return (default 10)
        :param resulttype: return results or hit limit (default results)
        :param bbox: bounding box [minx,miny,maxx,maxy]
        :param datetime: temporal (datestamp or extent)
        :param properties: list of tuples (name, value)
        :param sortby: list of dicts (property, order)

        :returns: dict of 0..n GeoJSON features
        """

        query = {'track_total_hits': True, 'query': {'bool': {'filter': []}}}
        filter_ = []

        feature_collection = {'type': 'FeatureCollection', 'features': []}

        if resulttype == 'hits':
            LOGGER.debug('hits only specified')
            limit = 0

        if bbox:
            LOGGER.debug('processing bbox parameter')
            minx, miny, maxx, maxy = bbox
            bbox_filter = {
                'geo_shape': {
                    'geometry': {
                        'shape': {
                            'type': 'envelope',
                            'coordinates': [[minx, maxy], [maxx, miny]]
                        },
                        'relation': 'intersects'
                    }
                }
            }

            query['query']['bool']['filter'].append(bbox_filter)

        if datetime is not None:
            LOGGER.debug('processing datetime parameter')
            if self.time_field is None:
                LOGGER.error('time_field not enabled for collection')
                raise ProviderQueryError()

            time_field = self.mask_prop(self.time_field)

            if '/' in datetime:  # envelope
                LOGGER.debug('detected time range')
                time_begin, time_end = datetime.split('/')

                range_ = {
                    'range': {
                        time_field: {
                            'gte': time_begin,
                            'lte': time_end
                        }
                    }
                }
                if time_begin == '..':
                    range_['range'][time_field].pop('gte')
                elif time_end == '..':
                    range_['range'][time_field].pop('lte')

                filter_.append(range_)

            else:  # time instant
                LOGGER.debug('detected time instant')
                filter_.append({'match': {time_field: datetime}})

            LOGGER.debug(filter_)
            query['query']['bool']['filter'].append(filter_)

        if properties:
            LOGGER.debug('processing properties')
            for prop in properties:
                pf = {'match': {self.mask_prop(prop[0]): prop[1]}}
                query['query']['bool']['filter'].append(pf)

        if sortby:
            LOGGER.debug('processing sortby')
            query['sort'] = []
            for sort in sortby:
                LOGGER.debug('processing sort object: {}'.format(sort))

                sp = sort['property']

                if self.fields[sp]['type'] == 'string':
                    LOGGER.debug('setting ES .raw on property')
                    sort_property = '{}.raw'.format(self.mask_prop(sp))
                else:
                    sort_property = self.mask_prop(sp)

                sort_order = 'asc'
                if sort['order'] == 'D':
                    sort_order = 'desc'

                sort_ = {sort_property: {'order': sort_order}}
                query['sort'].append(sort_)

        if self.properties:
            LOGGER.debug('including specified fields: {}'.format(
                self.properties))
            query['_source'] = {
                'includes': list(map(self.mask_prop, self.properties))
            }
            query['_source']['includes'].append(self.mask_prop(self.id_field))
            query['_source']['includes'].append('type')
            query['_source']['includes'].append('geometry')
        try:
            LOGGER.debug('querying Elasticsearch')

            LOGGER.debug('Setting ES paging zero-based')
            if startindex > 0:
                startindex2 = startindex - 1
            else:
                startindex2 = startindex

            if startindex2 + limit > 10000:
                gen = helpers.scan(client=self.es,
                                   query=query,
                                   preserve_order=True,
                                   index=self.index_name)
                results = {'hits': {'total': limit, 'hits': []}}
                for i in range(startindex2 + limit):
                    try:
                        if i >= startindex2:
                            results['hits']['hits'].append(next(gen))
                        else:
                            next(gen)
                    except StopIteration:
                        break
                results['hits']['total'] = \
                    len(results['hits']['hits']) + startindex2
            else:
                results = self.es.search(index=self.index_name,
                                         from_=startindex2,
                                         size=limit,
                                         body=query)
                results['hits']['total'] = results['hits']['total']['value']

        except exceptions.ConnectionError as err:
            LOGGER.error(err)
            raise ProviderConnectionError()
        except exceptions.RequestError as err:
            LOGGER.error(err)
            raise ProviderQueryError()
        except exceptions.NotFoundError as err:
            LOGGER.error(err)
            raise ProviderQueryError()

        feature_collection['numberMatched'] = results['hits']['total']

        if resulttype == 'hits':
            return feature_collection

        feature_collection['numberReturned'] = len(results['hits']['hits'])

        LOGGER.debug('serializing features')
        for feature in results['hits']['hits']:
            feature_ = self.esdoc2geojson(feature)
            feature_collection['features'].append(feature_)

        return feature_collection
Ejemplo n.º 7
0
    def query(self,
              startindex=0,
              limit=10,
              resulttype='results',
              bbox=[],
              datetime=None,
              properties=[],
              sortby=[]):
        """
        Query Postgis for all the content.
        e,g: http://localhost:5000/collections/hotosm_bdi_waterways/items?
        limit=1&resulttype=results

        :param startindex: starting record to return (default 0)
        :param limit: number of records to return (default 10)
        :param resulttype: return results or hit limit (default results)
        :param bbox: bounding box [minx,miny,maxx,maxy]
        :param datetime: temporal (datestamp or extent)
        :param properties: list of tuples (name, value)
        :param sortby: list of dicts (property, order)

        :returns: GeoJSON FeaturesCollection
        """
        LOGGER.debug('Querying PostGIS')

        if resulttype == 'hits':

            with DatabaseConnection(self.conn_dic, self.table,
                                    context="hits") as db:
                cursor = db.conn.cursor(cursor_factory=RealDictCursor)
                sql_query = SQL("select count(*) as hits from {}").\
                    format(Identifier(self.table))
                try:
                    cursor.execute(sql_query)
                except Exception as err:
                    LOGGER.error(
                        'Error executing sql_query: {}: {}'.format(
                            sql_query.as_string(cursor)), err)
                    raise ProviderQueryError()

                hits = cursor.fetchone()["hits"]

            return self.__response_feature_hits(hits)

        end_index = startindex + limit

        with DatabaseConnection(self.conn_dic, self.table) as db:
            cursor = db.conn.cursor(cursor_factory=RealDictCursor)
            where_conditions = []
            if properties:
                property_clauses = \
                    [SQL('{} = {}').format(
                        Identifier(k), Literal(v)) for k, v in properties]
                where_conditions += property_clauses
            if bbox:
                bbox_clause = SQL('{} && ST_MakeEnvelope({})').format(
                    Identifier(self.geom),
                    SQL(', ').join(
                        [Literal(bbox_coord) for bbox_coord in bbox]))
                where_conditions.append(bbox_clause)

            if where_conditions:
                where_clause = SQL(' WHERE {}').format(
                    SQL(' AND ').join(where_conditions))
            else:
                where_clause = SQL('')
            sql_query = SQL("DECLARE \"geo_cursor\" CURSOR FOR \
             SELECT {},ST_AsGeoJSON({}) FROM {}{}"                                                  ).\
                format(db.columns,
                       Identifier(self.geom),
                       Identifier(self.table),
                       where_clause)

            LOGGER.debug('SQL Query: {}'.format(sql_query.as_string(cursor)))
            LOGGER.debug('Start Index: {}'.format(startindex))
            LOGGER.debug('End Index: {}'.format(end_index))
            try:
                cursor.execute(sql_query)
                for index in [startindex, limit]:
                    cursor.execute(
                        "fetch forward {} from geo_cursor".format(index))
            except Exception as err:
                LOGGER.error('Error executing sql_query: {}'.format(
                    sql_query.as_string(cursor)))
                LOGGER.error(err)
                raise ProviderQueryError()

            row_data = cursor.fetchall()

            feature_collection = {'type': 'FeatureCollection', 'features': []}

            for rd in row_data:
                feature_collection['features'].append(
                    self.__response_feature(rd))

            return feature_collection
Ejemplo n.º 8
0
    def gen_covjson(self, metadata, data):
        """
        Generate coverage as CoverageJSON representation
        :param metadata: coverage metadata
        :param data: rasterio DatasetReader object
        :returns: dict of CoverageJSON representation
        """

        LOGGER.debug('Creating CoverageJSON domain')
        minx, miny, maxx, maxy = metadata['bbox']

        cj = {
            'type': 'Coverage',
            'domain': {
                'type':
                'Domain',
                'domainType':
                'Grid',
                'axes': {
                    'x': {
                        'start': minx,
                        'stop': maxx,
                        'num': metadata['width']
                    },
                    'y': {
                        'start': maxy,
                        'stop': miny,
                        'num': metadata['height']
                    }
                },
                'referencing': [{
                    'coordinates': ['x', 'y'],
                    'system': {
                        'type': self._coverage_properties['crs_type'],
                        'id': self._coverage_properties['bbox_crs']
                    }
                }]
            },
            'parameters': {},
            'ranges': {}
        }

        if metadata['bands'] is None:  # all bands
            bands_select = range(1, len(self._data.dtypes) + 1)
        else:
            bands_select = metadata['bands']

        LOGGER.debug('bands selected: {}'.format(bands_select))
        for bs in bands_select:
            pm = _get_parameter_metadata(self._data.profile['driver'],
                                         self._data.tags(bs))

            parameter = {
                'type': 'Parameter',
                'description': pm['description'],
                'unit': {
                    'symbol': pm['unit_label']
                },
                'observedProperty': {
                    'id': pm['observed_property_id'],
                    'label': {
                        'en': pm['observed_property_name']
                    }
                }
            }

            cj['parameters'][pm['id']] = parameter

        try:
            for key in cj['parameters'].keys():
                cj['ranges'][key] = {
                    'type': 'NdArray',
                    # 'dataType': metadata.dtypes[0],
                    'dataType': 'float',
                    'axisNames': ['y', 'x'],
                    'shape': [metadata['height'], metadata['width']],
                }
                # TODO: deal with multi-band value output
                cj['ranges'][key]['values'] = data.flatten().tolist()
        except IndexError as err:
            LOGGER.warning(err)
            raise ProviderQueryError('Invalid query parameter')

        return cj
Ejemplo n.º 9
0
    def query(self,
              range_subset=[],
              subsets={},
              bbox=[],
              datetime_=None,
              format_='json',
              **kwargs):
        """
         Extract data from collection collection

        :param range_subset: list of data variables to return (all if blank)
        :param subsets: dict of subset names with lists of ranges
        :param bbox: bounding box [minx,miny,maxx,maxy]
        :param datetime_: temporal (datestamp or extent)
        :param format_: data format of output

        :returns: coverage data as dict of CoverageJSON or native format
        """

        if not range_subset and not subsets and format_ != 'json':
            LOGGER.debug('No parameters specified, returning native data')
            if format_ == 'zarr':
                return _get_zarr_data(self._data)
            else:
                return read_data(self.data)

        if len(range_subset) < 1:
            range_subset = self.fields

        data = self._data[[*range_subset]]

        if any([
                self._coverage_properties['x_axis_label'] in subsets,
                self._coverage_properties['y_axis_label'] in subsets,
                self._coverage_properties['time_axis_label'] in subsets,
                datetime_ is not None
        ]):

            LOGGER.debug('Creating spatio-temporal subset')

            query_params = {}
            for key, val in subsets.items():
                LOGGER.debug('Processing subset: {}'.format(key))
                if data.coords[key].values[0] > data.coords[key].values[-1]:
                    LOGGER.debug('Reversing slicing from high to low')
                    query_params[key] = slice(val[1], val[0])
                else:
                    query_params[key] = slice(val[0], val[1])

            if bbox:
                if all([
                        self._coverage_properties['x_axis_label'] in subsets,
                        self._coverage_properties['y_axis_label'] in subsets,
                        len(bbox) > 0
                ]):
                    msg = 'bbox and subsetting by coordinates are exclusive'
                    LOGGER.warning(msg)
                    raise ProviderQueryError(msg)
                else:
                    query_params['x_axis_label'] = slice(bbox[0], bbox[2])
                    query_params['y_axis_label'] = slice(bbox[1], bbox[3])

            if datetime_ is not None:
                if self._coverage_properties['time_axis_label'] in subsets:
                    msg = 'datetime and temporal subsetting are exclusive'
                    LOGGER.error(msg)
                    raise ProviderQueryError(msg)
                else:
                    if '/' in datetime_:
                        begin, end = datetime_.split('/')
                        if begin < end:
                            query_params[self.time_field] = slice(begin, end)
                        else:
                            LOGGER.debug('Reversing slicing from high to low')
                            query_params[self.time_field] = slice(end, begin)
                    else:
                        query_params[self.time_field] = datetime_

            LOGGER.debug('Query parameters: {}'.format(query_params))
            try:
                data = data.sel(query_params)
            except Exception as err:
                LOGGER.warning(err)
                raise ProviderQueryError(err)

        if (any([
                data.coords[self.x_field].size == 0,
                data.coords[self.y_field].size == 0,
                data.coords[self.time_field].size == 0
        ])):
            msg = 'No data found'
            LOGGER.warning(msg)
            raise ProviderNoDataError(msg)

        out_meta = {
            'bbox': [
                data.coords[self.x_field].values[0],
                data.coords[self.y_field].values[0],
                data.coords[self.x_field].values[-1],
                data.coords[self.y_field].values[-1]
            ],
            "time": [
                _to_datetime_string(data.coords[self.time_field].values[0]),
                _to_datetime_string(data.coords[self.time_field].values[-1])
            ],
            "driver":
            "xarray",
            "height":
            data.dims[self.y_field],
            "width":
            data.dims[self.x_field],
            "time_steps":
            data.dims[self.time_field],
            "variables":
            {var_name: var.attrs
             for var_name, var in data.variables.items()}
        }

        LOGGER.debug('Serializing data in memory')
        if format_ == 'json':
            LOGGER.debug('Creating output in CoverageJSON')
            return self.gen_covjson(out_meta, data, range_subset)
        elif format_ == 'zarr':
            LOGGER.debug('Returning data in native zarr format')
            return _get_zarr_data(data)
        else:  # return data in native format
            with tempfile.TemporaryFile() as fp:
                LOGGER.debug('Returning data in native NetCDF format')
                fp.write(data.to_netcdf())
                fp.seek(0)
                return fp.read()
Ejemplo n.º 10
0
    def _load(self, startindex=0, limit=10, resulttype='results',
              identifier=None, bbox=[], datetime_=None, properties=[],
              sortby=[], select_properties=[], skip_geometry=False, q=None):
        """
        Private function: Load STA data

        :param startindex: starting record to return (default 0)
        :param limit: number of records to return (default 10)
        :param resulttype: return results or hit limit (default results)
        :param bbox: bounding box [minx,miny,maxx,maxy]
        :param datetime_: temporal (datestamp or extent)
        :param properties: list of tuples (name, value)
        :param sortby: list of dicts (property, order)
        :param select_properties: list of property names
        :param skip_geometry: bool of whether to skip geometry (default False)
        :param q: full-text search term(s)

        :returns: dict of GeoJSON FeatureCollection
        """
        feature_collection = {
            'type': 'FeatureCollection', 'features': []
        }
        # Make params
        params = {
            '$expand': EXPAND[self.entity],
            '$skip': str(startindex),
            '$top': str(limit),
            '$count': 'true'
        }
        if properties or bbox or datetime_:
            params['$filter'] = self._make_filter(properties, bbox, datetime_)
        if sortby:
            params['$orderby'] = self._make_orderby(sortby)

        # Start session
        s = Session()

        # Form URL for GET request
        LOGGER.debug('Sending query')
        if identifier:
            r = s.get(f'{self._url}({identifier})', params=params)
        else:
            r = s.get(self._url, params=params)

        if r.status_code == codes.bad:
            LOGGER.error('Bad http response code')
            raise ProviderConnectionError('Bad http response code')
        response = r.json()

        # if hits, return count
        if resulttype == 'hits':
            LOGGER.debug('Returning hits')
            feature_collection['numberMatched'] = response.get('@iot.count')
            return feature_collection

        # Query if values are less than expected
        v = [response, ] if identifier else response.get('value')
        hits_ = 1 if identifier else min(limit, response.get('@iot.count'))
        while len(v) < hits_:
            LOGGER.debug('Fetching next set of values')
            next_ = response.get('@iot.nextLink', None)
            if next_ is None:
                break
            else:
                with s.get(next_) as r:
                    response = r.json()
                    v.extend(response.get('value'))

        # End session
        s.close()

        # Properties filter & display
        keys = (() if not self.properties and not select_properties else
                set(self.properties) | set(select_properties))

        for entity in v[:hits_]:
            # Make feature
            id = entity.pop(self.id_field)
            id = f"'{id}'" if isinstance(id, str) else str(id)
            f = {
                'type': 'Feature', 'properties': {},
                'geometry': None, 'id': id
            }

            # Make geometry
            if not skip_geometry:
                f['geometry'] = self._geometry(entity)

            # Fill properties block
            try:
                f['properties'] = self._expand_properties(entity, keys)
            except KeyError as err:
                LOGGER.error(err)
                raise ProviderQueryError(err)

            feature_collection['features'].append(f)

        feature_collection['numberReturned'] = len(
            feature_collection['features'])

        if identifier:
            return f
        else:
            return feature_collection
Ejemplo n.º 11
0
    def query(self,
              offset=0,
              limit=10,
              resulttype='results',
              bbox=[],
              datetime_=None,
              properties=[],
              sortby=[],
              select_properties=[],
              skip_geometry=False,
              q=None,
              **kwargs):
        """
        query TinyDB document store

        :param offset: starting record to return (default 0)
        :param limit: number of records to return (default 10)
        :param resulttype: return results or hit limit (default results)
        :param bbox: bounding box [minx,miny,maxx,maxy]
        :param datetime_: temporal (datestamp or extent)
        :param properties: list of tuples (name, value)
        :param sortby: list of dicts (property, order)
        :param select_properties: list of property names
        :param skip_geometry: bool of whether to skip geometry (default False)
        :param q: full-text search term(s)

        :returns: dict of 0..n GeoJSON feature collection
        """

        Q = Query()
        LOGGER.debug('Query initiated: {}'.format(Q))

        QUERY = []

        feature_collection = {'type': 'FeatureCollection', 'features': []}

        if resulttype == 'hits':
            LOGGER.debug('hits only specified')
            limit = 0

        if bbox:
            LOGGER.debug('processing bbox parameter')
            bbox_as_string = ','.join(str(s) for s in bbox)
            QUERY.append(
                "Q.properties.extent.spatial.bbox.test(bbox_intersects, '{}')".
                format(bbox_as_string))  # noqa

        if datetime_ is not None:
            LOGGER.debug('processing datetime parameter')
            if self.time_field is None:
                LOGGER.error('time_field not enabled for collection')
                raise ProviderQueryError()

            if '/' in datetime_:  # envelope
                LOGGER.debug('detected time range')
                time_begin, time_end = datetime_.split('/')

                if time_begin != '..':
                    QUERY.append(
                        "(Q.properties[self.time_field]>='{}')".format(
                            time_begin))  # noqa
                if time_end != '..':
                    QUERY.append(
                        "(Q.properties[self.time_field]<='{}')".format(
                            time_end))  # noqa

            else:  # time instant
                LOGGER.debug('detected time instant')
                QUERY.append("(Q.properties[self.time_field]=='{}')".format(
                    datetime_))  # noqa

        if properties:
            LOGGER.debug('processing properties')
            for prop in properties:
                QUERY.append("(Q.properties['{}']=='{}')".format(*prop))

        if q is not None:
            for t in q.split():
                QUERY.append(
                    "(Q.properties['_metadata-anytext'].search('{}', flags=re.IGNORECASE))"
                    .format(t))  # noqa

        QUERY_STRING = '&'.join(QUERY)
        LOGGER.debug('QUERY_STRING: {}'.format(QUERY_STRING))
        SEARCH_STRING = 'self.db.search({})'.format(QUERY_STRING)
        LOGGER.debug('SEARCH_STRING: {}'.format(SEARCH_STRING))

        LOGGER.debug('querying database')
        if len(QUERY) > 0:
            LOGGER.debug('running eval on {}'.format(SEARCH_STRING))
            results = eval(SEARCH_STRING)
        else:
            results = self.db.all()

        feature_collection['numberMatched'] = len(results)

        if resulttype == 'hits':
            return feature_collection

        for r in results:
            for e in self.excludes:
                del r['properties'][e]

        len_results = len(results)

        LOGGER.debug('Results found: {}'.format(len_results))

        if len_results > limit:
            returned = limit
        else:
            returned = len_results

        feature_collection['numberReturned'] = returned

        if sortby:
            LOGGER.debug('Sorting results')
            if sortby[0]['order'] == '-':
                sort_reverse = True
            else:
                sort_reverse = False

            results.sort(key=lambda k: k['properties'][sortby[0]['property']],
                         reverse=sort_reverse)

        feature_collection['features'] = results[offset:offset + limit]

        return feature_collection
Ejemplo n.º 12
0
    def query(self,
              range_subset=[],
              subsets={},
              bbox=[],
              datetime_=None,
              format_='json'):
        """
        Extract data from collection collection
        :param range_subset: list of bands
        :param subsets: dict of subset names with lists of ranges
        :param bbox: bounding box [minx,miny,maxx,maxy]
        :param datetime_: temporal (datestamp or extent)
        :param format_: data format of output

        :returns: coverage data as dict of CoverageJSON or native format
        """

        bands = range_subset
        LOGGER.debug('Bands: {}, subsets: {}'.format(bands, subsets))

        args = {'indexes': None}
        shapes = []

        if all([not bands, not subsets, not bbox, format_ != 'json']):
            LOGGER.debug('No parameters specified, returning native data')
            return read_data(self.data)

        if all([
                self._coverage_properties['x_axis_label'] in subsets,
                self._coverage_properties['y_axis_label'] in subsets,
                len(bbox) > 0
        ]):
            msg = 'bbox and subsetting by coordinates are exclusive'
            LOGGER.warning(msg)
            raise ProviderQueryError(msg)

        if len(bbox) > 0:
            minx, miny, maxx, maxy = bbox

            crs_src = CRS.from_epsg(4326)

            if 'crs' in self.options:
                crs_dest = CRS.from_string(self.options['crs'])
            else:
                crs_dest = self._data.crs

            if crs_src == crs_dest:
                LOGGER.debug('source bbox CRS and data CRS are the same')
                shapes = [{
                    'type':
                    'Polygon',
                    'coordinates': [[
                        [minx, miny],
                        [minx, maxy],
                        [maxx, maxy],
                        [maxx, miny],
                        [minx, miny],
                    ]]
                }]
            else:
                LOGGER.debug('source bbox CRS and data CRS are different')
                LOGGER.debug('reprojecting bbox into native coordinates')

                t = Transformer.from_crs(crs_src, crs_dest, always_xy=True)
                minx2, miny2 = t.transform(minx, miny)
                maxx2, maxy2 = t.transform(maxx, maxy)

                LOGGER.debug('Source coordinates: {}'.format(
                    [minx, miny, maxx, maxy]))
                LOGGER.debug('Destination coordinates: {}'.format(
                    [minx2, miny2, maxx2, maxy2]))

                shapes = [{
                    'type':
                    'Polygon',
                    'coordinates': [[
                        [minx2, miny2],
                        [minx2, maxy2],
                        [maxx2, maxy2],
                        [maxx2, miny2],
                        [minx2, miny2],
                    ]]
                }]

        elif (self._coverage_properties['x_axis_label'] in subsets
              and self._coverage_properties['y_axis_label'] in subsets):
            LOGGER.debug('Creating spatial subset')

            x = self._coverage_properties['x_axis_label']
            y = self._coverage_properties['y_axis_label']

            shapes = [{
                'type':
                'Polygon',
                'coordinates': [[[subsets[x][0], subsets[y][0]],
                                 [subsets[x][0], subsets[y][1]],
                                 [subsets[x][1], subsets[y][1]],
                                 [subsets[x][1], subsets[y][0]],
                                 [subsets[x][0], subsets[y][0]]]]
            }]

        if bands:
            LOGGER.debug('Selecting bands')
            args['indexes'] = list(map(int, bands))

        with rasterio.open(self.data) as _data:
            LOGGER.debug('Creating output coverage metadata')
            out_meta = _data.meta

            if self.options is not None:
                LOGGER.debug('Adding dataset options')
                for key, value in self.options.items():
                    out_meta[key] = value

            if shapes:  # spatial subset
                try:
                    LOGGER.debug('Clipping data with bbox')
                    out_image, out_transform = rasterio.mask.mask(
                        _data,
                        filled=False,
                        shapes=shapes,
                        crop=True,
                        indexes=args['indexes'])
                except ValueError as err:
                    LOGGER.error(err)
                    raise ProviderQueryError(err)

                out_meta.update({
                    'driver': self.native_format,
                    'height': out_image.shape[1],
                    'width': out_image.shape[2],
                    'transform': out_transform
                })
            else:  # no spatial subset
                LOGGER.debug('Creating data in memory with band selection')
                out_image = _data.read(indexes=args['indexes'])

            if bbox:
                out_meta['bbox'] = [bbox[0], bbox[1], bbox[2], bbox[3]]
            elif shapes:
                out_meta['bbox'] = [
                    subsets[x][0], subsets[y][0], subsets[x][1], subsets[y][1]
                ]
            else:
                out_meta['bbox'] = [
                    _data.bounds.left, _data.bounds.bottom, _data.bounds.right,
                    _data.bounds.top
                ]

            out_meta['units'] = _data.units

            LOGGER.debug('Serializing data in memory')
            with MemoryFile() as memfile:
                with memfile.open(**out_meta) as dest:
                    dest.write(out_image)

                if format_ == 'json':
                    LOGGER.debug('Creating output in CoverageJSON')
                    out_meta['bands'] = args['indexes']
                    return self.gen_covjson(out_meta, out_image)

                else:  # return data in native format
                    LOGGER.debug('Returning data in native format')
                    return memfile.read()
Ejemplo n.º 13
0
    def query(self, startindex=0, limit=10, resulttype='results',
              bbox=[], time=None, properties=[]):
        """
        query Elasticsearch index

        :param startindex: starting record to return (default 0)
        :param limit: number of records to return (default 10)
        :param resulttype: return results or hit limit (default results)
        :param bbox: bounding box [minx,miny,maxx,maxy]
        :param time: temporal (datestamp or extent)
        :param properties: list of tuples (name, value)

        :returns: dict of 0..n GeoJSON features
        """

        query = {'query': {'bool': {'filter': []}}}
        filter_ = []

        feature_collection = {
            'type': 'FeatureCollection',
            'features': []
        }

        if resulttype == 'hits':
            LOGGER.debug('hits only specified')
            limit = 0

        if bbox:
            LOGGER.debug('processing bbox parameter')
            minx, miny, maxx, maxy = bbox
            bbox_filter = {
                'geo_shape': {
                    'geometry': {
                        'shape': {
                            'type': 'envelope',
                            'coordinates': [[minx, miny], [maxx, maxy]]
                        },
                        'relation': 'intersects'
                    }
                }
            }

            query['query']['bool']['filter'].append(bbox_filter)

        if time is not None:
            LOGGER.debug('processing time parameter')
            if self.time_field is None:
                LOGGER.error('time_field not enabled for collection')
                raise ProviderQueryError()

            time_field = 'properties.{}'.format(self.time_field)

            if '/' in time:  # envelope
                LOGGER.debug('detected time range')
                time_begin, time_end = time.split('/')

                range_ = {
                    'range': {
                        time_field: {
                            'gte': time_begin,
                            'lte': time_end,
                        }
                    }
                }

                filter_.append(range_)

            else:  # time instant
                LOGGER.debug('detected time instant')
                filter_.append({'match': {time_field: time}})

            LOGGER.debug(filter_)
            query['query']['bool']['filter'].append(filter_)

        if properties:
            LOGGER.debug('processing properties')
            for prop in properties:
                pf = {
                    'match': {
                        'properties.{}'.format(prop[0]): prop[1]
                    }
                }
                query['query']['bool']['filter'].append(pf)

        try:
            LOGGER.debug('querying Elasticsearch')
            if startindex + limit > 10000:
                gen = helpers.scan(client=self.es, query=query,
                                   preserve_order=True,
                                   index=self.index_name)
                results = {'hits': {'total': limit, 'hits': []}}
                for i in range(startindex + limit):
                    try:
                        if i >= startindex:
                            results['hits']['hits'].append(next(gen))
                        else:
                            next(gen)
                    except StopIteration:
                        break
                results['hits']['total'] = len(results['hits']['hits'])
            else:
                results = self.es.search(index=self.index_name,
                                         from_=startindex, size=limit,
                                         body=query)
        except exceptions.ConnectionError as err:
            LOGGER.error(err)
            raise ProviderConnectionError()
        except exceptions.RequestError as err:
            LOGGER.error(err)
            raise ProviderQueryError()
        except exceptions.NotFoundError as err:
            LOGGER.error(err)
            raise ProviderQueryError()

        feature_collection['numberMatched'] = results['hits']['total']

        if resulttype == 'hits':
            return feature_collection

        feature_collection['numberReturned'] = len(results['hits']['hits'])

        LOGGER.debug('serializing features')
        for feature in results['hits']['hits']:
            id_ = feature['_source']['properties'][self.id_field]
            LOGGER.debug('serializing id {}'.format(id_))
            feature['_source']['ID'] = id_
            feature_collection['features'].append(feature['_source'])

        return feature_collection
Ejemplo n.º 14
0
    def query(self, range_subset=[1], subsets={'member': [1]}, bbox=[],
              datetime_=None, format_='json', **kwargs):
        """
        Extract data from collection collection
        :param range_subset: variable
        :param subsets: dict of subset names with lists of ranges
        :param bbox: bounding box [minx,miny,maxx,maxy]
        :param datetime_: temporal (datestamp or extent)
        :param format_: data format of output
        :returns: coverage data as dict of CoverageJSON or native format
        """

        nbits = 20

        if len(range_subset) > 1:
            err = 'Only one range-subset value is supported'
            LOGGER.error(err)
            raise ProviderQueryError(err)

        range_subset[0] = int(range_subset[0])
        try:
            var_list = self.var_list[range_subset[0] - 1]
        except IndexError as err:
            LOGGER.error(err)
            raise ProviderQueryError(err)

        self.get_file_list(var_list)
        self.member = subsets['member']

        args = {
            'indexes': None
        }
        shapes = []

        if all([self._coverage_properties['x_axis_label'] in subsets,
                self._coverage_properties['y_axis_label'] in subsets,
                len(bbox) > 0]):
            msg = 'bbox and subsetting by coordinates are exclusive'
            LOGGER.warning(msg)
            raise ProviderQueryError(msg)

        if len(bbox) > 0:
            minx, miny, maxx, maxy = bbox

            LOGGER.debug('Source coordinates: {}'.format(
                [minx, miny, maxx, maxy]))

            # because cansips long is from 0 to 360
            minx += 180
            maxx += 180

            LOGGER.debug('Destination coordinates: {}'.format(
                [minx, miny, maxx, maxy]))

            shapes = [{
                'type': 'Polygon',
                'coordinates': [[
                    [minx, miny],
                    [minx, maxy],
                    [maxx, maxy],
                    [maxx, miny],
                    [minx, miny],
                ]]
            }]

        elif (self._coverage_properties['x_axis_label'] in subsets and
                self._coverage_properties['y_axis_label'] in subsets):
            LOGGER.debug('Creating spatial subset')

            x = self._coverage_properties['x_axis_label']
            y = self._coverage_properties['y_axis_label']

            shapes = [{
               'type': 'Polygon',
               'coordinates': [[
                   [subsets[x][0], subsets[y][0]],
                   [subsets[x][0], subsets[y][1]],
                   [subsets[x][1], subsets[y][1]],
                   [subsets[x][1], subsets[y][0]],
                   [subsets[x][0], subsets[y][0]]
               ]]
            }]

        bands = []

        if 'dim_reference_time' in subsets:
            year, month = subsets['dim_reference_time'][0].split('-')
        else:
            year, month = self.get_latest_dim_reference_time()

        self.data = self.data.replace('2013', year)
        self.data = self.data.replace('04', month)

        if datetime_:
            bands = self.get_band_datetime(datetime_, year, month)
        else:
            num_months_1 = 1 + 12 * (self.member[0] - 1)
            num_months_2 = 12 + 12 * (self.member[0] - 1)
            bands = list(range(num_months_1, num_months_2 + 1))

        LOGGER.debug('Selecting bands')
        args['indexes'] = bands

        var = self.var_list[range_subset[0] - 1]
        self.data = self.data.replace(
            'cansips_forecast_raw_latlon2.5x2.5_TMP_TGL_2m', var)

        if not os.path.isfile(self.data):
            msg = 'No such file'
            LOGGER.error(msg)
            raise ProviderQueryError(msg)

        with rasterio.open(self.data) as self._data:
            LOGGER.debug('Creating output coverage metadata')

            out_meta = self._data.meta

            if self.options is not None:
                LOGGER.debug('Adding dataset options')
                for key, value in self.options.items():
                    out_meta[key] = value

            if shapes:  # spatial subset
                try:
                    LOGGER.debug('Clipping data with bbox')
                    out_image, out_transform = rasterio.mask.mask(
                        self._data,
                        filled=False,
                        shapes=shapes,
                        crop=True,
                        indexes=args['indexes'])
                except ValueError as err:
                    LOGGER.error(err)
                    raise ProviderQueryError(err)

                out_meta.update({'driver': self.native_format,
                                 'height': out_image.shape[1],
                                 'width': out_image.shape[2],
                                 'transform': out_transform})
            else:  # no spatial subset
                LOGGER.debug('Creating data in memory with band selection')
                out_image = self._data.read(indexes=args['indexes'])

            if bbox:
                out_meta['bbox'] = [bbox[0], bbox[1], bbox[2], bbox[3]]
            elif shapes:
                out_meta['bbox'] = [
                    subsets[x][0], subsets[y][0],
                    subsets[x][1], subsets[y][1]
                ]
            else:
                out_meta['bbox'] = [
                    self._data.bounds.left,
                    self._data.bounds.bottom,
                    self._data.bounds.right,
                    self._data.bounds.top
                ]

            out_meta['units'] = self._data.units

            self.filename = self.data.split('/')[-1]

            # CovJSON output does not support multiple bands yet
            # Only the first timestep is returned
            if format_ == 'json':

                if '/' in datetime_:
                    err = 'Date range not yet supported for CovJSON output'
                    LOGGER.error(err)
                    raise ProviderQueryError(err)
                else:
                    LOGGER.debug('Creating output in CoverageJSON')
                    out_meta['bands'] = [1]
                    return self.gen_covjson(out_meta, out_image)
            else:
                LOGGER.debug('Serializing data in memory')
                out_meta.update(count=len(args['indexes']))
                with MemoryFile() as memfile:
                    with memfile.open(**out_meta, nbits=nbits) as dest:
                        dest.write(out_image)

                    # return data in native format
                    LOGGER.debug('Returning data in native format')
                    return memfile.read()
Ejemplo n.º 15
0
    def query(self, startindex=0, limit=10, resulttype='results',
              bbox=[], time=None, properties=[], sortby=[]):
        """
        Query Postgis for all the content.
        e,g: http://localhost:5000/collections/hotosm_bdi_waterways/items?
        limit=1&resulttype=results

        :param startindex: starting record to return (default 0)
        :param limit: number of records to return (default 10)
        :param resulttype: return results or hit limit (default results)
        :param bbox: bounding box [minx,miny,maxx,maxy]
        :param time: temporal (datestamp or extent)
        :param properties: list of tuples (name, value)
        :param sortby: list of dicts (property, order)

        :returns: GeoJSON FeaturesCollection
        """
        LOGGER.debug('Querying PostGIS')

        if resulttype == 'hits':

            with DatabaseConnection(self.conn_dic,
                                    self.table, context="hits") as db:
                cursor = db.conn.cursor(cursor_factory=RealDictCursor)
                sql_query = SQL("select count(*) as hits from {}").\
                    format(Identifier(self.table))
                try:
                    cursor.execute(sql_query)
                except Exception as err:
                    LOGGER.error('Error executing sql_query: {}'.format(
                        sql_query.as_string(cursor)))
                    LOGGER.error('Using public schema: {}'.format(db.schema))
                    raise ProviderQueryError()

                hits = cursor.fetchone()["hits"]

            return self.__response_feature_hits(hits)

        end_index = startindex + limit

        with DatabaseConnection(self.conn_dic, self.table) as db:
            cursor = db.conn.cursor(cursor_factory=RealDictCursor)
            sql_query = SQL("DECLARE \"geo_cursor\" CURSOR FOR \
             SELECT {0},ST_AsGeoJSON({1}) FROM {2}").\
                format(db.columns,
                       Identifier('geom'),
                       Identifier(self.table))

            LOGGER.debug('SQL Query:{}'.format(sql_query))
            LOGGER.debug('Start Index:{}'.format(startindex))
            LOGGER.debug('End Index'.format(end_index))
            try:
                cursor.execute(sql_query)
                for index in [startindex, limit]:
                    cursor.execute("fetch forward {} from geo_cursor"
                                   .format(index))
            except Exception as err:
                LOGGER.error('Error executing sql_query: {}'.format(
                    sql_query.as_string(cursor)))
                LOGGER.error('Using public schema: {}'.format(db.schema))
                LOGGER.error(err)
                raise ProviderQueryError()

            self.dataDB = cursor.fetchall()
            feature_collection = self.__response_feature_collection()
            return feature_collection
Ejemplo n.º 16
0
    def gen_covjson(self, metadata, data, range_type):
        """
        Generate coverage as CoverageJSON representation

        :param metadata: coverage metadata
        :param data: rasterio DatasetReader object
        :param range_type: range type list

        :returns: dict of CoverageJSON representation
        """

        LOGGER.debug('Creating CoverageJSON domain')
        minx, miny, maxx, maxy = metadata['bbox']
        mint, maxt = metadata['time']

        try:
            tmp_min = data.coords[self.y_field].values[0]
        except IndexError:
            tmp_min = data.coords[self.y_field].values
        try:
            tmp_max = data.coords[self.y_field].values[-1]
        except IndexError:
            tmp_max = data.coords[self.y_field].values

        if tmp_min > tmp_max:
            LOGGER.debug('Reversing direction of {}'.format(self.y_field))
            miny = tmp_max
            maxy = tmp_min

        cj = {
            'type': 'Coverage',
            'domain': {
                'type':
                'Domain',
                'domainType':
                'Grid',
                'axes': {
                    'x': {
                        'start': minx,
                        'stop': maxx,
                        'num': metadata['width']
                    },
                    'y': {
                        'start': maxy,
                        'stop': miny,
                        'num': metadata['height']
                    },
                    self.time_field: {
                        'start': mint,
                        'stop': maxt,
                        'num': metadata['time_steps']
                    }
                },
                'referencing': [{
                    'coordinates': ['x', 'y'],
                    'system': {
                        'type': self._coverage_properties['crs_type'],
                        'id': self._coverage_properties['bbox_crs']
                    }
                }]
            },
            'parameters': {},
            'ranges': {}
        }

        for variable in range_type:
            pm = self._get_parameter_metadata(variable,
                                              self._data[variable].attrs)

            parameter = {
                'type': 'Parameter',
                'description': pm['description'],
                'unit': {
                    'symbol': pm['unit_label']
                },
                'observedProperty': {
                    'id': pm['observed_property_id'],
                    'label': {
                        'en': pm['observed_property_name']
                    }
                }
            }

            cj['parameters'][pm['id']] = parameter

        try:
            for key in cj['parameters'].keys():
                cj['ranges'][key] = {
                    'type':
                    'NdArray',
                    'dataType':
                    str(self._data[variable].dtype),
                    'axisNames':
                    ['y', 'x', self._coverage_properties['time_axis_label']],
                    'shape': [
                        metadata['height'], metadata['width'],
                        metadata['time_steps']
                    ]
                }

                data = data.fillna(None)
                cj['ranges'][key]['values'] = data[key].values.flatten(
                ).tolist()  # noqa
        except IndexError as err:
            LOGGER.warning(err)
            raise ProviderQueryError('Invalid query parameter')

        return cj
Ejemplo n.º 17
0
    def query(self,
              startindex=0,
              limit=10,
              resulttype='results',
              bbox=[],
              datetime=None,
              properties=[],
              sortby=[]):
        """
        Query OGR source

        :param startindex: starting record to return (default 0)
        :param limit: number of records to return (default 10)
        :param resulttype: return results or hit limit (default results)
        :param bbox: bounding box [minx,miny,maxx,maxy]
        :param datetime: temporal (datestamp or extent)
        :param properties: list of tuples (name, value)
        :param sortby: list of dicts (property, order)

        :returns: dict of 0..n GeoJSON features
        """
        result = None
        try:
            if self.source_capabilities['paging']:
                self.source_helper.enable_paging(startindex, limit)

            layer = self._get_layer()

            if bbox:
                LOGGER.debug('processing bbox parameter')
                minx, miny, maxx, maxy = bbox

                wkt = "POLYGON (({minx} {miny},{minx} {maxy},{maxx} {maxy}," \
                      "{maxx} {miny},{minx} {miny}))".format(
                        minx=float(minx), miny=float(miny),
                        maxx=float(maxx), maxy=float(maxy))

                polygon = self.ogr.CreateGeometryFromWkt(wkt)
                if self.transform_in:
                    polygon.Transform(self.transform_in)

                layer.SetSpatialFilter(polygon)

                # layer.SetSpatialFilterRect(
                # float(minx), float(miny), float(maxx), float(maxy))

            if properties:
                LOGGER.debug('processing properties')

                attribute_filter = ' and '.join(
                    map(lambda x: '{} = \'{}\''.format(x[0], x[1]),
                        properties))

                LOGGER.debug(attribute_filter)

                layer.SetAttributeFilter(attribute_filter)

            # Make response based on resulttype specified
            if resulttype == 'hits':
                LOGGER.debug('hits only specified')
                result = self._response_feature_hits(layer)
            elif resulttype == 'results':
                LOGGER.debug('results specified')
                result = self._response_feature_collection(layer, limit)
            else:
                LOGGER.error('Invalid resulttype: %s' % resulttype)

        except RuntimeError as err:
            LOGGER.error(err)
            raise ProviderQueryError(err)
        except ProviderConnectionError as err:
            LOGGER.error(err)
            raise ProviderConnectionError(err)
        except Exception as err:
            LOGGER.error(err)
            raise ProviderGenericError(err)

        finally:
            self._close()

        return result
Ejemplo n.º 18
0
    def query(self,
              range_subset=['TMEAN'],
              subsets={},
              bbox=[],
              datetime_=None,
              format_='json',
              **kwargs):
        """
        Extract data from collection collection
        :param range_subset: variable
        :param subsets: dict of subset names with lists of ranges
        :param bbox: bounding box [minx,miny,maxx,maxy]
        :param datetime_: temporal (datestamp or extent)
        :param format_: data format of output
        :returns: coverage data as dict of CoverageJSON or native format
        """

        args = {'indexes': None}
        shapes = []

        if all([
                self._coverage_properties['x_axis_label'] in subsets,
                self._coverage_properties['y_axis_label'] in subsets,
                len(bbox) > 0
        ]):
            msg = 'bbox and subsetting by coordinates are exclusive'
            LOGGER.warning(msg)
            raise ProviderQueryError(msg)

        if len(bbox) > 0:
            minx, miny, maxx, maxy = bbox

            crs_src = CRS.from_epsg(4326)
            crs_dest = self._data.crs

            if crs_src == crs_dest:
                LOGGER.debug('source bbox CRS and data CRS are the same')
                shapes = [{
                    'type':
                    'Polygon',
                    'coordinates': [[
                        [minx, miny],
                        [minx, maxy],
                        [maxx, maxy],
                        [maxx, miny],
                        [minx, miny],
                    ]]
                }]
            else:
                LOGGER.debug('source bbox CRS and data CRS are different')
                LOGGER.debug('reprojecting bbox into native coordinates')

                temp_geom_min = {"type": "Point", "coordinates": [minx, miny]}
                temp_geom_max = {"type": "Point", "coordinates": [maxx, maxy]}

                min_coord = rasterio.warp.transform_geom(
                    crs_src, crs_dest, temp_geom_min)
                minx2, miny2 = min_coord['coordinates']

                max_coord = rasterio.warp.transform_geom(
                    crs_src, crs_dest, temp_geom_max)
                maxx2, maxy2 = max_coord['coordinates']

                LOGGER.debug('Source coordinates: {}'.format(
                    [minx, miny, maxx, maxy]))
                LOGGER.debug('Destination coordinates: {}'.format(
                    [minx2, miny2, maxx2, maxy2]))

                shapes = [{
                    'type':
                    'Polygon',
                    'coordinates': [[
                        [minx2, miny2],
                        [minx2, maxy2],
                        [maxx2, maxy2],
                        [maxx2, miny2],
                        [minx2, miny2],
                    ]]
                }]

        elif (self._coverage_properties['x_axis_label'] in subsets
              and self._coverage_properties['y_axis_label'] in subsets):
            LOGGER.debug('Creating spatial subset')

            x = self._coverage_properties['x_axis_label']
            y = self._coverage_properties['y_axis_label']

            shapes = [{
                'type':
                'Polygon',
                'coordinates': [[[subsets[x][0], subsets[y][0]],
                                 [subsets[x][0], subsets[y][1]],
                                 [subsets[x][1], subsets[y][1]],
                                 [subsets[x][1], subsets[y][0]],
                                 [subsets[x][0], subsets[y][0]]]]
            }]

        if range_subset[0].upper() != 'TMEAN':
            var = range_subset[0].upper()
            try:
                self.data = self.get_file_list(var)[-1]
            except IndexError as err:
                LOGGER.error(err)
                raise ProviderQueryError(err)

        if 'season' in subsets:
            seasonal = subsets['season']

            try:
                if len(seasonal) > 1:
                    msg = 'multiple seasons are not supported'
                    LOGGER.error(msg)
                    raise ProviderQueryError(msg)
                elif seasonal != ['DJF']:
                    season = str(seasonal[0])
                    self.data = self.data.replace('DJF', season)

            except Exception as err:
                LOGGER.error(err)
                raise ProviderQueryError(err)

        if datetime_ and 'trend' in self.data:
            msg = 'Datetime is not supported for trend'
            LOGGER.error(msg)
            raise ProviderQueryError(msg)

        date_file_list = False

        if datetime_:
            if '/' not in datetime_:
                if 'month' in self.data:
                    month = search('_{:d}-{:d}.tif', self.data)
                    period = '{}-{}'.format(month[0], str(month[1]).zfill(2))
                    self.data = self.data.replace(str(month), str(datetime_))
                else:
                    period = search('_{:d}.tif', self.data)[0]
                self.data = self.data.replace(str(period), str(datetime_))
            else:
                date_file_list = self.get_file_list(range_subset[0].upper(),
                                                    datetime_)
                args['indexes'] = list(range(1, len(date_file_list) + 1))

        if not os.path.isfile(self.data):
            msg = 'No such file'
            LOGGER.error(msg)
            raise ProviderQueryError(msg)

        with rasterio.open(self.data) as _data:
            LOGGER.debug('Creating output coverage metadata')
            out_meta = _data.meta

            if self.options is not None:
                LOGGER.debug('Adding dataset options')
                for key, value in self.options.items():
                    out_meta[key] = value

            if shapes:  # spatial subset
                try:
                    LOGGER.debug('Clipping data with bbox')
                    out_image, out_transform = rasterio.mask.mask(
                        _data,
                        filled=False,
                        shapes=shapes,
                        crop=True,
                        indexes=None)
                except ValueError as err:
                    LOGGER.error(err)
                    raise ProviderQueryError(err)

                out_meta.update({
                    'driver': self.native_format,
                    'height': out_image.shape[1],
                    'width': out_image.shape[2],
                    'transform': out_transform
                })
            else:  # no spatial subset
                LOGGER.debug('Creating data in memory with band selection')
                out_image = _data.read(indexes=[1])

            if bbox:
                out_meta['bbox'] = [bbox[0], bbox[1], bbox[2], bbox[3]]
            elif shapes:
                out_meta['bbox'] = [
                    subsets[x][0], subsets[y][0], subsets[x][1], subsets[y][1]
                ]
            else:
                out_meta['bbox'] = [
                    _data.bounds.left, _data.bounds.bottom, _data.bounds.right,
                    _data.bounds.top
                ]

            out_meta['units'] = _data.units

            self.filename = self.data.split('/')[-1]
            if 'trend' not in self.data and datetime_:
                self.filename = self.filename.split('_')
                self.filename[-1] = '{}.tif'.format(datetime_.replace(
                    '/', '-'))
                self.filename = '_'.join(self.filename)

            # CovJSON output does not support multiple bands yet
            # Only the first timestep is returned
            if format_ == 'json':
                if date_file_list:
                    err = 'Date range not yet supported for CovJSON output'
                    LOGGER.error(err)
                    raise ProviderQueryError(err)
                else:
                    LOGGER.debug('Creating output in CoverageJSON')
                    out_meta['bands'] = args['indexes']
                    return self.gen_covjson(out_meta, out_image)
            else:
                if date_file_list:
                    LOGGER.debug('Serializing data in memory')
                    with MemoryFile() as memfile:

                        out_meta.update(count=len(date_file_list))

                        with memfile.open(**out_meta) as dest:
                            for id, layer in enumerate(date_file_list,
                                                       start=1):
                                with rasterio.open(layer) as src1:
                                    if shapes:  # spatial subset
                                        try:
                                            LOGGER.debug('Clipping data')
                                            out_image, out_transform = \
                                                rasterio.mask.mask(
                                                    src1,
                                                    filled=False,
                                                    shapes=shapes,
                                                    crop=True,
                                                    indexes=1)
                                        except ValueError as err:
                                            LOGGER.error(err)
                                            raise ProviderQueryError(err)
                                    else:
                                        out_image = src1.read(indexes=1)
                                    dest.write_band(id, out_image)

                        # return data in native format
                        LOGGER.debug('Returning data in native format')
                        return memfile.read()
                else:
                    LOGGER.debug('Serializing data in memory')
                    with MemoryFile() as memfile:
                        with memfile.open(**out_meta) as dest:
                            dest.write(out_image)

                        # return data in native format
                        LOGGER.debug('Returning data in native format')
                        return memfile.read()