def get(self, identifier): """ Get Feature by id :param identifier: feature id :returns: feature collection """ result = None try: LOGGER.debug('Fetching identifier {}'.format(identifier)) layer = self._get_layer() layer.SetAttributeFilter("{field} = '{id}'".format( field=self.id_field, id=identifier)) ogr_feature = self._get_next_feature(layer) result = self._ogr_feature_to_json(ogr_feature) except RuntimeError as err: LOGGER.error(err) raise ProviderQueryError(err) except ProviderConnectionError as err: LOGGER.error(err) raise ProviderConnectionError(err) except Exception as err: LOGGER.error(err) raise ProviderGenericError(err) finally: self._close() return result
def get(self, identifier): """ Query the provider for a specific feature id e.g: /collections/hotosm_bdi_waterways/items/13990765 :param identifier: feature id :returns: GeoJSON FeaturesCollection """ LOGGER.debug('Get item from Postgis') with DatabaseConnection(self.conn_dic, self.table) as db: cursor = db.conn.cursor(cursor_factory=RealDictCursor) sql_query = SQL("select {0},ST_AsGeoJSON({1}) \ from {2} WHERE {3}=%s").format(db.columns, Identifier('geom'), Identifier(self.table), Identifier(self.id_field)) LOGGER.debug('SQL Query:{}'.format(sql_query.as_string(db.conn))) LOGGER.debug('Identifier:{}'.format(identifier)) try: cursor.execute(sql_query, (identifier, )) except Exception as err: LOGGER.error('Error executing sql_query: {}'.format( sql_query.as_string(cursor))) LOGGER.error('Using public schema: {}'.format(db.schema)) LOGGER.error(err) raise ProviderQueryError() self.dataDB = cursor.fetchall() feature_collection = self.__response_feature_collection() return feature_collection
def get(self, identifier): """ Query the provider for a specific feature id e.g: /collections/hotosm_bdi_waterways/items/13990765 :param identifier: feature id :returns: GeoJSON FeaturesCollection """ LOGGER.debug('Get item from Postgis') with DatabaseConnection(self.conn_dic, self.table) as db: cursor = db.conn.cursor(cursor_factory=RealDictCursor) sql_query = SQL("select {},ST_AsGeoJSON({}) \ from {} WHERE {}=%s").format(db.columns, Identifier(self.geom), Identifier(self.table), Identifier(self.id_field)) LOGGER.debug('SQL Query: {}'.format(sql_query.as_string(db.conn))) LOGGER.debug('Identifier: {}'.format(identifier)) try: cursor.execute(sql_query, (identifier, )) except Exception as err: LOGGER.error('Error executing sql_query: {}'.format( sql_query.as_string(cursor))) LOGGER.error(err) raise ProviderQueryError() row_data = cursor.fetchall()[0] feature = self.__response_feature(row_data) feature['prev'] = self.get_previous(cursor, identifier) feature['next'] = self.get_next(cursor, identifier) return feature
def __init__(self, provider_def): """ Initialize object :param provider_def: provider definition :returns: pygeoapi.providers.elasticsearch_.ElasticsearchProvider """ BaseProvider.__init__(self, provider_def) url_tokens = self.data.split('/') LOGGER.debug('Setting Elasticsearch properties') self.es_host = url_tokens[2] self.index_name = url_tokens[-2] self.type_name = url_tokens[-1] LOGGER.debug('host: {}'.format(self.es_host)) LOGGER.debug('index: {}'.format(self.index_name)) LOGGER.debug('type: {}'.format(self.type_name)) LOGGER.debug('Connecting to Elasticsearch') self.es = Elasticsearch(self.es_host) if not self.es.ping(): msg = 'Cannot connect to Elasticsearch' LOGGER.error(msg) raise ProviderConnectionError(msg) LOGGER.debug('Grabbing field information') try: self.fields = self.get_fields() except exceptions.NotFoundError as err: LOGGER.error(err) raise ProviderQueryError(err)
def get_months_number(self, possible_time, year, month, datetime_): """ Get the difference in number of months between dim_reference_time (year, month) and datetime_ :param possible_time: list of possible time from dim_refenrence_time :param year: year from dim_refenrence_time :param month: month from dim_refenrence_time :param datetime_: forecast time from the query :returns: number of months as integer """ if datetime_ not in possible_time: err = 'Not a valid datetime' LOGGER.error(err) raise ProviderQueryError(err) else: # from dim_ref_time begin_date = datetime(int(year), int(month), 1) # from datetime_ year2, month2 = datetime_.split('-') end_date = datetime(int(year2), int(month2), 1) num_months = (end_date.year - begin_date.year) \ * 12 + (end_date.month - begin_date.month) return num_months
def __init__(self, provider_def): """ Initialize object :param provider_def: provider definition :returns: pygeoapi.provider.elasticsearch_.ElasticsearchProvider """ super().__init__(provider_def) self.es_host, self.index_name = self.data.rsplit('/', 1) LOGGER.debug('Setting Elasticsearch properties') self.is_gdal = False LOGGER.debug('host: {}'.format(self.es_host)) LOGGER.debug('index: {}'.format(self.index_name)) self.type_name = 'FeatureCollection' self.url_parsed = urlparse(self.es_host) LOGGER.debug('Connecting to Elasticsearch') if self.url_parsed.port is None: # proxy to default HTTP(S) port if self.url_parsed.scheme == 'https': port = 443 else: port = 80 else: # was set explictly port = self.url_parsed.port url_settings = { 'scheme': self.url_parsed.scheme, 'host': self.url_parsed.hostname, 'port': port } if self.url_parsed.path: url_settings['url_prefix'] = self.url_parsed.path LOGGER.debug('URL settings: {}'.format(url_settings)) LOGGER.debug('Connecting to Elasticsearch') self.es = Elasticsearch([url_settings]) if not self.es.ping(): msg = 'Cannot connect to Elasticsearch' LOGGER.error(msg) raise ProviderConnectionError(msg) LOGGER.debug('Determining ES version') v = self.es.info()['version']['number'][:3] if float(v) < 7: msg = 'only ES 7+ supported' LOGGER.error(msg) raise ProviderConnectionError(msg) LOGGER.debug('Grabbing field information') try: self.fields = self.get_fields() except exceptions.NotFoundError as err: LOGGER.error(err) raise ProviderQueryError(err)
def get_band_datetime(self, datetime_, year, month): """ generate list of bands from dim_refenrece_time and datetime_ :param datetime_: forecast time from the query :param year: year from dim_refenrence_time :param month: month from dim_refenrence_time :returns: list of bands """ # making a list of the datetime for the given dim_ref_time possible_time = [] for i in range(1, 13): possible_time.append(self.get_time_from_dim( '{}-{}'.format(year, month), i)) if '/' not in datetime_: if datetime_ not in possible_time: err = 'Not a valid datetime' LOGGER.error(err) raise ProviderQueryError(err) else: num_months = self.get_months_number( possible_time, year, month, datetime_) return [num_months + 12 * (self.member[0] - 1)] else: datetime1, datetime2 = datetime_.split('/') if datetime1 not in possible_time or \ datetime2 not in possible_time: err = 'Not a valid datetime' LOGGER.error(err) raise ProviderQueryError(err) num_months_1 = self.get_months_number( possible_time, year, month, datetime1) num_months_2 = self.get_months_number( possible_time, year, month, datetime2) num_months_1 = num_months_1 + 12 * (self.member[0] - 1) num_months_2 = num_months_2 + 12 * (self.member[0] - 1) return (list(range(num_months_1, num_months_2 + 1)))
def esdoc2geojson(self, doc): """ generate GeoJSON `dict` from ES document :param doc: `dict` of ES document :returns: GeoJSON `dict` """ feature_ = {} feature_thinned = {} if 'properties' not in doc['_source']: LOGGER.debug('Looks like a GDAL ES 7 document') id_ = doc['_source'][self.id_field] if 'type' not in doc['_source']: feature_['id'] = id_ feature_['type'] = 'Feature' feature_['geometry'] = doc['_source'].get('geometry') feature_['properties'] = {} for key, value in doc['_source'].items(): if key == 'geometry': continue feature_['properties'][key] = value else: LOGGER.debug('Looks like true GeoJSON document') feature_ = doc['_source'] id_ = doc['_source']['properties'][self.id_field] feature_['id'] = id_ feature_['geometry'] = doc['_source'].get('geometry') if self.properties or self.select_properties: LOGGER.debug('Filtering properties') all_properties = self.get_properties() feature_thinned = { 'id': id_, 'type': feature_['type'], 'geometry': feature_.get('geometry'), 'properties': OrderedDict() } for p in all_properties: try: feature_thinned['properties'][p] = feature_['properties'][ p] # noqa except KeyError as err: LOGGER.error(err) raise ProviderQueryError() if feature_thinned: return feature_thinned else: return feature_
def _request_json(self, url, params): """ Performs a GET request on `url` and returns the JSON response. """ response = None try: response = requests.get(url, params) response.raise_for_status() except requests.HTTPError as err: LOGGER.error(err) raise ProviderQueryError( f'failed to query {response.url if response else url}') except requests.ConnectionError as err: LOGGER.error(err) raise ProviderConnectionError( f'failed to connect to {response.url if response else url}') return self._parse_json(response.text)
def _make_filter(self, properties, bbox=[], datetime_=None): """ Private function: Make STA filter from query properties :param properties: list of tuples (name, value) :param bbox: bounding box [minx,miny,maxx,maxy] :param datetime_: temporal (datestamp or extent) :returns: STA $filter string of properties """ ret = [] for (name, value) in properties: if name in ENTITY: ret.append(f'{name}/@iot.id eq {value}') else: ret.append(f'{name} eq {value}') if bbox: minx, miny, maxx, maxy = bbox bbox_ = f'POLYGON (({minx} {miny}, {maxx} {miny}, \ {maxx} {maxy}, {minx} {maxy}, {minx} {miny}))' if self.entity == 'Things': loc = 'Locations/location' elif self.entity == 'Datastreams': loc = 'Thing/Locations/location' elif self.entity == 'Observations': loc = 'FeatureOfInterest/feature' ret.append(f"st_within({loc}, geography'{bbox_}')") if datetime_ is not None: if self.time_field is None: LOGGER.error('time_field not enabled for collection') raise ProviderQueryError() if '/' in datetime_: time_start, time_end = datetime_.split('/') if time_start != '..': ret.append(f'{self.time_field} ge {time_start}') if time_end != '..': ret.append(f'{self.time_field} le {time_end}') else: ret.append(f'{self.time_field} eq {datetime_}') return ' and '.join(ret)
def __init__(self, provider_def): """ Initialize object :param provider_def: provider definition :returns: pygeoapi.providers.elasticsearch_.ElasticsearchProvider """ BaseProvider.__init__(self, provider_def) url_tokens = self.data.split('/') LOGGER.debug('Setting Elasticsearch properties') self.es_host = url_tokens[2] self.index_name = url_tokens[-1] self.is_gdal = False LOGGER.debug('host: {}'.format(self.es_host)) LOGGER.debug('index: {}'.format(self.index_name)) LOGGER.debug('Connecting to Elasticsearch') self.es = Elasticsearch(self.es_host) if not self.es.ping(): msg = 'Cannot connect to Elasticsearch' LOGGER.error(msg) raise ProviderConnectionError(msg) LOGGER.debug('Determining ES version') v = self.es.info()['version']['number'][:3] if float(v) < 7: msg = 'only ES 7+ supported' LOGGER.error(msg) raise ProviderConnectionError(msg) LOGGER.debug('Grabbing field information') try: self.fields = self.get_fields() except exceptions.NotFoundError as err: LOGGER.error(err) raise ProviderQueryError(err)
def _request_json(self, url, params): """ Performs a GET request on `url` and returns the JSON response. """ response = None if 'lang' not in params and self.locale: # Add language parameter, if missing (geoCore wants ISO 639-1 codes) # noqa LOGGER.debug( f"Requesting geoCore response in '{self.locale.language}'" ) # noqa params['lang'] = self.locale.language try: response = requests.get(url, params) response.raise_for_status() except requests.HTTPError as err: LOGGER.error(err) raise ProviderQueryError( f'failed to query {response.url if response else url}') except requests.ConnectionError as err: LOGGER.error(err) raise ProviderConnectionError( f'failed to connect to {response.url if response else url}') return self._parse_json(response.text)
def get_fields(self): """ Get fields of STA Provider :returns: dict of fields """ if not self.fields: p = {'$expand': EXPAND[self.entity], '$top': 1} r = get(self._url, params=p) try: results = r.json()['value'][0] except JSONDecodeError as err: LOGGER.error('Entity {} error: {}'.format(self.entity, err)) LOGGER.error('Bad url response at {}'.format(r.url)) raise ProviderQueryError(err) for (n, v) in results.items(): if isinstance(v, (int, float)) or \ (isinstance(v, (dict, list)) and n in ENTITY): self.fields[n] = {'type': 'number'} elif isinstance(v, str): self.fields[n] = {'type': 'string'} return self.fields
def query(self, startindex=0, limit=10, resulttype='results', bbox=[], datetime_=None, properties=[], sortby=[], select_properties=[], skip_geometry=False, q=None, filterq=None, **kwargs): """ query Elasticsearch index :param startindex: starting record to return (default 0) :param limit: number of records to return (default 10) :param resulttype: return results or hit limit (default results) :param bbox: bounding box [minx,miny,maxx,maxy] :param datetime_: temporal (datestamp or extent) :param properties: list of tuples (name, value) :param sortby: list of dicts (property, order) :param select_properties: list of property names :param skip_geometry: bool of whether to skip geometry (default False) :param q: full-text search term(s) :param filterq: filter object :returns: dict of 0..n GeoJSON features """ query = {'track_total_hits': True, 'query': {'bool': {'filter': []}}} filter_ = [] feature_collection = {'type': 'FeatureCollection', 'features': []} if resulttype == 'hits': LOGGER.debug('hits only specified') limit = 0 if bbox: LOGGER.debug('processing bbox parameter') minx, miny, maxx, maxy = bbox bbox_filter = { 'geo_shape': { 'geometry': { 'shape': { 'type': 'envelope', 'coordinates': [[minx, maxy], [maxx, miny]] }, 'relation': 'intersects' } } } query['query']['bool']['filter'].append(bbox_filter) if datetime_ is not None: LOGGER.debug('processing datetime parameter') if self.time_field is None: LOGGER.error('time_field not enabled for collection') raise ProviderQueryError() time_field = self.mask_prop(self.time_field) if '/' in datetime_: # envelope LOGGER.debug('detected time range') time_begin, time_end = datetime_.split('/') range_ = { 'range': { time_field: { 'gte': time_begin, 'lte': time_end } } } if time_begin == '..': range_['range'][time_field].pop('gte') elif time_end == '..': range_['range'][time_field].pop('lte') filter_.append(range_) else: # time instant LOGGER.debug('detected time instant') filter_.append({'match': {time_field: datetime_}}) LOGGER.debug(filter_) query['query']['bool']['filter'].append(*filter_) if properties: LOGGER.debug('processing properties') for prop in properties: prop_name = self.mask_prop(prop[0]) pf = {'match': {prop_name: {'query': prop[1]}}} query['query']['bool']['filter'].append(pf) if '|' not in prop[1]: pf['match'][prop_name]['minimum_should_match'] = '100%' if sortby: LOGGER.debug('processing sortby') query['sort'] = [] for sort in sortby: LOGGER.debug('processing sort object: {}'.format(sort)) sp = sort['property'] if self.fields[sp]['type'] == 'string': LOGGER.debug('setting ES .raw on property') sort_property = '{}.raw'.format(self.mask_prop(sp)) else: sort_property = self.mask_prop(sp) sort_order = 'asc' if sort['order'] == '-': sort_order = 'desc' sort_ = {sort_property: {'order': sort_order}} query['sort'].append(sort_) if q is not None: LOGGER.debug('Adding free-text search') query['query']['bool']['must'] = {'query_string': {'query': q}} query['_source'] = { 'excludes': [ 'properties._metadata-payload', 'properties._metadata-schema', 'properties._metadata-format' ] } if self.properties or select_properties: LOGGER.debug('including specified fields: {}'.format( self.properties)) query['_source'] = { 'includes': list( map(self.mask_prop, set(self.properties) | set(select_properties))) # noqa } query['_source']['includes'].append(self.mask_prop(self.id_field)) query['_source']['includes'].append('type') query['_source']['includes'].append('geometry') if skip_geometry: LOGGER.debug( 'limiting to specified fields: {}'.format(select_properties)) try: query['_source']['excludes'] = ['geometry'] except KeyError: query['_source'] = {'excludes': ['geometry']} try: LOGGER.debug('querying Elasticsearch') if filterq: LOGGER.debug('adding cql object: {}'.format(filterq.json())) query = update_query(input_query=query, cql=filterq) LOGGER.debug(json.dumps(query, indent=4)) LOGGER.debug('Setting ES paging zero-based') if startindex > 0: startindex2 = startindex - 1 else: startindex2 = startindex if startindex2 + limit > 10000: gen = helpers.scan(client=self.es, query=query, preserve_order=True, index=self.index_name) results = {'hits': {'total': limit, 'hits': []}} for i in range(startindex2 + limit): try: if i >= startindex2: results['hits']['hits'].append(next(gen)) else: next(gen) except StopIteration: break results['hits']['total'] = \ len(results['hits']['hits']) + startindex2 else: results = self.es.search(index=self.index_name, from_=startindex2, size=limit, body=query) results['hits']['total'] = results['hits']['total']['value'] except exceptions.ConnectionError as err: LOGGER.error(err) raise ProviderConnectionError() except exceptions.RequestError as err: LOGGER.error(err) raise ProviderQueryError() except exceptions.NotFoundError as err: LOGGER.error(err) raise ProviderQueryError() feature_collection['numberMatched'] = results['hits']['total'] if resulttype == 'hits': return feature_collection feature_collection['numberReturned'] = len(results['hits']['hits']) LOGGER.debug('serializing features') for feature in results['hits']['hits']: feature_ = self.esdoc2geojson(feature) feature_collection['features'].append(feature_) return feature_collection
def query(self, startindex=0, limit=10, resulttype='results', bbox=[], time=None, properties=[]): """ query Elasticsearch index :param startindex: starting record to return (default 0) :param limit: number of records to return (default 10) :param resulttype: return results or hit limit (default results) :param bbox: bounding box [minx,miny,maxx,maxy] :param time: temporal (datestamp or extent) :param properties: list of tuples (name, value) :returns: dict of 0..n GeoJSON features """ query = {'query': {'bool': {'filter': []}}} filter_ = [] feature_collection = { 'type': 'FeatureCollection', 'features': [] } if resulttype == 'hits': LOGGER.debug('hits only specified') limit = 0 if bbox: LOGGER.debug('processing bbox parameter') minx, miny, maxx, maxy = bbox bbox_filter = { 'geo_shape': { 'geometry': { 'shape': { 'type': 'envelope', 'coordinates': [[minx, miny], [maxx, maxy]] }, 'relation': 'intersects' } } } query['query']['bool']['filter'].append(bbox_filter) if time is not None: LOGGER.debug('processing time parameter') if self.time_field is None: LOGGER.error('time_field not enabled for collection') raise ProviderQueryError() time_field = 'properties.{}'.format(self.time_field) if '/' in time: # envelope LOGGER.debug('detected time range') time_begin, time_end = time.split('/') range_ = { 'range': { time_field: { 'gte': time_begin, 'lte': time_end, } } } filter_.append(range_) else: # time instant LOGGER.debug('detected time instant') filter_.append({'match': {time_field: time}}) LOGGER.debug(filter_) query['query']['bool']['filter'].append(filter_) if properties: LOGGER.debug('processing properties') for prop in properties: pf = { 'match': { 'properties.{}'.format(prop[0]): prop[1] } } query['query']['bool']['filter'].append(pf) try: LOGGER.debug('querying Elasticsearch') if startindex + limit > 10000: gen = helpers.scan(client=self.es, query=query, preserve_order=True, index=self.index_name) results = {'hits': {'total': limit, 'hits': []}} for i in range(startindex + limit): try: if i >= startindex: results['hits']['hits'].append(next(gen)) else: next(gen) except StopIteration: break results['hits']['total'] = len(results['hits']['hits']) else: results = self.es.search(index=self.index_name, from_=startindex, size=limit, body=query) except exceptions.ConnectionError as err: LOGGER.error(err) raise ProviderConnectionError() except exceptions.RequestError as err: LOGGER.error(err) raise ProviderQueryError() except exceptions.NotFoundError as err: LOGGER.error(err) raise ProviderQueryError() feature_collection['numberMatched'] = results['hits']['total'] if resulttype == 'hits': return feature_collection feature_collection['numberReturned'] = len(results['hits']['hits']) LOGGER.debug('serializing features') for feature in results['hits']['hits']: id_ = feature['_source']['properties'][self.id_field] LOGGER.debug('serializing id {}'.format(id_)) feature['_source']['ID'] = id_ feature_collection['features'].append(feature['_source']) return feature_collection
def query(self, startindex=0, limit=10, resulttype='results', bbox=[], datetime=None, properties=[], sortby=[]): """ query Elasticsearch index :param startindex: starting record to return (default 0) :param limit: number of records to return (default 10) :param resulttype: return results or hit limit (default results) :param bbox: bounding box [minx,miny,maxx,maxy] :param datetime: temporal (datestamp or extent) :param properties: list of tuples (name, value) :param sortby: list of dicts (property, order) :returns: dict of 0..n GeoJSON features """ query = {'query': {'bool': {'filter': []}}} filter_ = [] feature_collection = {'type': 'FeatureCollection', 'features': []} if resulttype == 'hits': LOGGER.debug('hits only specified') limit = 0 if bbox: LOGGER.debug('processing bbox parameter') minx, miny, maxx, maxy = bbox bbox_filter = { 'geo_shape': { 'geometry': { 'shape': { 'type': 'envelope', 'coordinates': [[minx, miny], [maxx, maxy]] }, 'relation': 'intersects' } } } query['query']['bool']['filter'].append(bbox_filter) if datetime is not None: LOGGER.debug('processing datetime parameter') if self.time_field is None: LOGGER.error('time_field not enabled for collection') raise ProviderQueryError() time_field = 'properties.{}'.format(self.time_field) if '/' in datetime: # envelope LOGGER.debug('detected time range') time_begin, time_end = datetime.split('/') range_ = { 'range': { time_field: { 'gte': time_begin, 'lte': time_end } } } if time_begin == '..': range_['range'][time_field].pop('gte') elif time_end == '..': range_['range'][time_field].pop('lte') filter_.append(range_) else: # time instant LOGGER.debug('detected time instant') filter_.append({'match': {time_field: datetime}}) LOGGER.debug(filter_) query['query']['bool']['filter'].append(filter_) if properties: LOGGER.debug('processing properties') for prop in properties: pf = {'match': {'properties.{}'.format(prop[0]): prop[1]}} query['query']['bool']['filter'].append(pf) if sortby: LOGGER.debug('processing sortby') query['sort'] = [] for sort in sortby: LOGGER.debug('processing sort object: {}'.format(sort)) sp = sort['property'] if self.fields[sp]['type'] == 'string': LOGGER.debug('setting ES .raw on property') sort_property = 'properties.{}.raw'.format(sp) else: sort_property = 'properties.{}'.format(sp) sort_order = 'asc' if sort['order'] == 'D': sort_order = 'desc' sort_ = {sort_property: {'order': sort_order}} query['sort'].append(sort_) if self.properties: LOGGER.debug('including specified fields: {}'.format( self.properties)) query['_source'] = { 'includes': list(map('properties.{}'.format, self.properties)) } query['_source']['includes'].append('properties.{}'.format( self.id_field)) query['_source']['includes'].append('type') query['_source']['includes'].append('geometry') try: LOGGER.debug('querying Elasticsearch') if startindex + limit > 10000: gen = helpers.scan(client=self.es, query=query, preserve_order=True, index=self.index_name) results = {'hits': {'total': limit, 'hits': []}} for i in range(startindex + limit): try: if i >= startindex: results['hits']['hits'].append(next(gen)) else: next(gen) except StopIteration: break results['hits']['total'] = \ len(results['hits']['hits']) + startindex else: results = self.es.search(index=self.index_name, from_=startindex, size=limit, body=query) except exceptions.ConnectionError as err: LOGGER.error(err) raise ProviderConnectionError() except exceptions.RequestError as err: LOGGER.error(err) raise ProviderQueryError() except exceptions.NotFoundError as err: LOGGER.error(err) raise ProviderQueryError() feature_collection['numberMatched'] = results['hits']['total'] if resulttype == 'hits': return feature_collection feature_collection['numberReturned'] = len(results['hits']['hits']) LOGGER.debug('serializing features') for feature in results['hits']['hits']: id_ = feature['_source']['properties'][self.id_field] LOGGER.debug('serializing id {}'.format(id_)) feature['_source']['id'] = id_ if self.properties: feature_thinned = { 'id': feature['_source']['properties'][self.id_field], 'type': feature['_source']['type'], 'geometry': feature['_source']['geometry'], 'properties': OrderedDict() } for p in self.properties: try: feature_thinned['properties'][p] = \ feature['_source']['properties'][p] except KeyError as err: LOGGER.error(err) raise ProviderQueryError() feature_collection['features'].append(feature_thinned) else: feature_collection['features'].append(feature['_source']) return feature_collection
def gen_covjson(self, metadata, data): """ Generate coverage as CoverageJSON representation :param metadata: coverage metadata :param data: rasterio DatasetReader object :returns: dict of CoverageJSON representation """ LOGGER.debug('Creating CoverageJSON domain') minx, miny, maxx, maxy = metadata['bbox'] cj = { 'type': 'Coverage', 'domain': { 'type': 'Domain', 'domainType': 'Grid', 'axes': { 'x': { 'start': minx, 'stop': maxx, 'num': metadata['width'] }, 'y': { 'start': maxy, 'stop': miny, 'num': metadata['height'] } }, 'referencing': [{ 'coordinates': ['x', 'y'], 'system': { 'type': self._coverage_properties['crs_type'], 'id': self._coverage_properties['bbox_crs'] } }] }, 'parameters': {}, 'ranges': {} } if metadata['bands'] is None: # all bands bands_select = range(1, len(self._data.dtypes) + 1) else: bands_select = metadata['bands'] LOGGER.debug('bands selected: {}'.format(bands_select)) for bs in bands_select: pm = _get_parameter_metadata(self._data.profile['driver'], self._data.tags(bs)) parameter = { 'type': 'Parameter', 'description': pm['description'], 'unit': { 'symbol': pm['unit_label'] }, 'observedProperty': { 'id': pm['observed_property_id'], 'label': { 'en': pm['observed_property_name'] } } } cj['parameters'][pm['id']] = parameter try: for key in cj['parameters'].keys(): cj['ranges'][key] = { 'type': 'NdArray', # 'dataType': metadata.dtypes[0], 'dataType': 'float', 'axisNames': ['y', 'x'], 'shape': [metadata['height'], metadata['width']], } # TODO: deal with multi-band value output cj['ranges'][key]['values'] = data.flatten().tolist() except IndexError as err: LOGGER.warning(err) raise ProviderQueryError('Invalid query parameter') return cj
def query(self, range_subset=[], subsets={}, bbox=[], datetime_=None, format_='json'): """ Extract data from collection collection :param range_subset: list of bands :param subsets: dict of subset names with lists of ranges :param bbox: bounding box [minx,miny,maxx,maxy] :param datetime_: temporal (datestamp or extent) :param format_: data format of output :returns: coverage data as dict of CoverageJSON or native format """ bands = range_subset LOGGER.debug('Bands: {}, subsets: {}'.format(bands, subsets)) args = {'indexes': None} shapes = [] if all([not bands, not subsets, not bbox, format_ != 'json']): LOGGER.debug('No parameters specified, returning native data') return read_data(self.data) if all([ self._coverage_properties['x_axis_label'] in subsets, self._coverage_properties['y_axis_label'] in subsets, len(bbox) > 0 ]): msg = 'bbox and subsetting by coordinates are exclusive' LOGGER.warning(msg) raise ProviderQueryError(msg) if len(bbox) > 0: minx, miny, maxx, maxy = bbox crs_src = CRS.from_epsg(4326) if 'crs' in self.options: crs_dest = CRS.from_string(self.options['crs']) else: crs_dest = self._data.crs if crs_src == crs_dest: LOGGER.debug('source bbox CRS and data CRS are the same') shapes = [{ 'type': 'Polygon', 'coordinates': [[ [minx, miny], [minx, maxy], [maxx, maxy], [maxx, miny], [minx, miny], ]] }] else: LOGGER.debug('source bbox CRS and data CRS are different') LOGGER.debug('reprojecting bbox into native coordinates') t = Transformer.from_crs(crs_src, crs_dest, always_xy=True) minx2, miny2 = t.transform(minx, miny) maxx2, maxy2 = t.transform(maxx, maxy) LOGGER.debug('Source coordinates: {}'.format( [minx, miny, maxx, maxy])) LOGGER.debug('Destination coordinates: {}'.format( [minx2, miny2, maxx2, maxy2])) shapes = [{ 'type': 'Polygon', 'coordinates': [[ [minx2, miny2], [minx2, maxy2], [maxx2, maxy2], [maxx2, miny2], [minx2, miny2], ]] }] elif (self._coverage_properties['x_axis_label'] in subsets and self._coverage_properties['y_axis_label'] in subsets): LOGGER.debug('Creating spatial subset') x = self._coverage_properties['x_axis_label'] y = self._coverage_properties['y_axis_label'] shapes = [{ 'type': 'Polygon', 'coordinates': [[[subsets[x][0], subsets[y][0]], [subsets[x][0], subsets[y][1]], [subsets[x][1], subsets[y][1]], [subsets[x][1], subsets[y][0]], [subsets[x][0], subsets[y][0]]]] }] if bands: LOGGER.debug('Selecting bands') args['indexes'] = list(map(int, bands)) with rasterio.open(self.data) as _data: LOGGER.debug('Creating output coverage metadata') out_meta = _data.meta if self.options is not None: LOGGER.debug('Adding dataset options') for key, value in self.options.items(): out_meta[key] = value if shapes: # spatial subset try: LOGGER.debug('Clipping data with bbox') out_image, out_transform = rasterio.mask.mask( _data, filled=False, shapes=shapes, crop=True, indexes=args['indexes']) except ValueError as err: LOGGER.error(err) raise ProviderQueryError(err) out_meta.update({ 'driver': self.native_format, 'height': out_image.shape[1], 'width': out_image.shape[2], 'transform': out_transform }) else: # no spatial subset LOGGER.debug('Creating data in memory with band selection') out_image = _data.read(indexes=args['indexes']) if bbox: out_meta['bbox'] = [bbox[0], bbox[1], bbox[2], bbox[3]] elif shapes: out_meta['bbox'] = [ subsets[x][0], subsets[y][0], subsets[x][1], subsets[y][1] ] else: out_meta['bbox'] = [ _data.bounds.left, _data.bounds.bottom, _data.bounds.right, _data.bounds.top ] out_meta['units'] = _data.units LOGGER.debug('Serializing data in memory') with MemoryFile() as memfile: with memfile.open(**out_meta) as dest: dest.write(out_image) if format_ == 'json': LOGGER.debug('Creating output in CoverageJSON') out_meta['bands'] = args['indexes'] return self.gen_covjson(out_meta, out_image) else: # return data in native format LOGGER.debug('Returning data in native format') return memfile.read()
def query(self, startindex=0, limit=10, resulttype='results', bbox=[], datetime_=None, properties=[], sortby=[], select_properties=[], skip_geometry=False): """ Query OGR source :param startindex: starting record to return (default 0) :param limit: number of records to return (default 10) :param resulttype: return results or hit limit (default results) :param bbox: bounding box [minx,miny,maxx,maxy] :param datetime_: temporal (datestamp or extent) :param properties: list of tuples (name, value) :param sortby: list of dicts (property, order) :param select_properties: list of property names :param skip_geometry: bool of whether to skip geometry (default False) :returns: dict of 0..n GeoJSON features """ result = None try: if self.source_capabilities['paging']: self.source_helper.enable_paging(startindex, limit) layer = self._get_layer() if bbox: LOGGER.debug('processing bbox parameter') minx, miny, maxx, maxy = bbox wkt = "POLYGON (({minx} {miny},{minx} {maxy},{maxx} {maxy}," \ "{maxx} {miny},{minx} {miny}))".format( minx=float(minx), miny=float(miny), maxx=float(maxx), maxy=float(maxy)) polygon = self.ogr.CreateGeometryFromWkt(wkt) if self.transform_in: polygon.Transform(self.transform_in) layer.SetSpatialFilter(polygon) # layer.SetSpatialFilterRect( # float(minx), float(miny), float(maxx), float(maxy)) if properties: LOGGER.debug('processing properties') attribute_filter = ' and '.join( map(lambda x: '{} = \'{}\''.format(x[0], x[1]), properties)) LOGGER.debug(attribute_filter) layer.SetAttributeFilter(attribute_filter) # Make response based on resulttype specified if resulttype == 'hits': LOGGER.debug('hits only specified') result = self._response_feature_hits(layer) elif resulttype == 'results': LOGGER.debug('results specified') result = self._response_feature_collection(layer, limit) else: LOGGER.error('Invalid resulttype: %s' % resulttype) except RuntimeError as err: LOGGER.error(err) raise ProviderQueryError(err) except ProviderConnectionError as err: LOGGER.error(err) raise ProviderConnectionError(err) except Exception as err: LOGGER.error(err) raise ProviderGenericError(err) finally: self._close() return result
def query(self, range_subset=[], subsets={}, format_='json'): """ Extract data from collection collection :param range_subset: list of data variables to return (all if blank) :param subsets: dict of subset names with lists of ranges :param format_: data format of output :returns: coverage data as dict of CoverageJSON or native format """ if not range_subset and not subsets and format_ != 'json': LOGGER.debug('No parameters specified, returning native data') return read_data(self.data) if len(range_subset) < 1: range_subset = self.fields data = self._data[[*range_subset]] if (self._coverage_properties['x_axis_label'] in subsets or self._coverage_properties['y_axis_label'] in subsets or self._coverage_properties['time_axis_label'] in subsets): LOGGER.debug('Creating spatio-temporal subset') query_params = {} for key, val in subsets.items(): if data.coords[key].values[0] > data.coords[key].values[-1]: LOGGER.debug('Reversing slicing low/high') query_params[key] = slice(val[1], val[0]) else: query_params[key] = slice(val[0], val[1]) LOGGER.debug('Query parameters: {}'.format(query_params)) try: data = data.sel(query_params) except Exception as err: LOGGER.warning(err) raise ProviderQueryError(err) if (any([ data.coords[self.x_field].size == 0, data.coords[self.y_field].size == 0 ])): msg = 'No data found' LOGGER.warning(msg) raise ProviderNoDataError(msg) out_meta = { 'bbox': [ data.coords[self.x_field].values[0], data.coords[self.y_field].values[0], data.coords[self.x_field].values[-1], data.coords[self.y_field].values[-1] ], "time": [ _to_datetime_string(data.coords[self.time_field].values[0]), _to_datetime_string(data.coords[self.time_field].values[-1]) ], "driver": "xarray", "height": data.dims[self.y_field], "width": data.dims[self.x_field], "time_steps": data.dims[self.time_field], "variables": {var_name: var.attrs for var_name, var in data.variables.items()} } LOGGER.debug('Serializing data in memory') if format_ == 'json': LOGGER.debug('Creating output in CoverageJSON') return self.gen_covjson(out_meta, data, range_subset) else: # return data in native format with tempfile.TemporaryFile() as fp: LOGGER.debug('Returning data in native format') fp.write(data.to_netcdf()) fp.seek(0) return fp.read()
def _load(self, startindex=0, limit=10, resulttype='results', identifier=None, bbox=[], datetime_=None, properties=[], sortby=[], select_properties=[], skip_geometry=False, q=None): """ Private function: Load STA data :param startindex: starting record to return (default 0) :param limit: number of records to return (default 10) :param resulttype: return results or hit limit (default results) :param bbox: bounding box [minx,miny,maxx,maxy] :param datetime_: temporal (datestamp or extent) :param properties: list of tuples (name, value) :param sortby: list of dicts (property, order) :param select_properties: list of property names :param skip_geometry: bool of whether to skip geometry (default False) :param q: full-text search term(s) :returns: dict of GeoJSON FeatureCollection """ feature_collection = { 'type': 'FeatureCollection', 'features': [] } # Make params params = { '$expand': EXPAND[self.entity], '$skip': str(startindex), '$top': str(limit), '$count': 'true' } if properties or bbox or datetime_: params['$filter'] = self._make_filter(properties, bbox, datetime_) if sortby: params['$orderby'] = self._make_orderby(sortby) # Form URL for GET request LOGGER.debug('Sending query') if identifier: r = get(f'{self._url}({identifier})', params=params) else: r = get(self._url, params=params) if r.status_code == codes.bad: LOGGER.error('Bad http response code') raise ProviderConnectionError('Bad http response code') response = r.json() # if hits, return count if resulttype == 'hits': LOGGER.debug('Returning hits') feature_collection['numberMatched'] = response.get('@iot.count') return feature_collection v = [response, ] if identifier else response.get('value') # if values are less than expected, query for more hits_ = 1 if identifier else min(limit, response.get('@iot.count')) while len(v) < hits_: LOGGER.debug('Fetching next set of values') r = get(response.get('@iot.nextLink'), params={'$skip': len(v)}) response = r.json() v.extend(response.get('value')) # properties filter & display keys = (() if not self.properties and not select_properties else set(self.properties) | set(select_properties)) for entity in v[:hits_]: # Make feature id = entity.pop(self.id_field) id = f"'{id}'" if isinstance(id, str) else str(id) f = { 'type': 'Feature', 'properties': {}, 'geometry': None, 'id': id } # Make geometry if not skip_geometry: f['geometry'] = self._geometry(entity) # Fill properties block try: f['properties'] = self._expand_properties(entity, keys) except KeyError as err: LOGGER.error(err) raise ProviderQueryError(err) feature_collection['features'].append(f) feature_collection['numberReturned'] = len( feature_collection['features']) if identifier: return f else: return feature_collection
def query(self, startindex=0, limit=10, resulttype='results', bbox=[], datetime_=None, properties=[], sortby=[], select_properties=[], skip_geometry=False, q=None): """ query TinyDB document store :param startindex: starting record to return (default 0) :param limit: number of records to return (default 10) :param resulttype: return results or hit limit (default results) :param bbox: bounding box [minx,miny,maxx,maxy] :param datetime_: temporal (datestamp or extent) :param properties: list of tuples (name, value) :param sortby: list of dicts (property, order) :param select_properties: list of property names :param skip_geometry: bool of whether to skip geometry (default False) :param q: full-text search term(s) :returns: dict of 0..n GeoJSON feature collection """ Q = Query() LOGGER.debug('Query initiated: {}'.format(Q)) QUERY = [] feature_collection = {'type': 'FeatureCollection', 'features': []} if resulttype == 'hits': LOGGER.debug('hits only specified') limit = 0 if bbox: LOGGER.debug('processing bbox parameter') bbox_as_string = ','.join(str(s) for s in bbox) QUERY.append( "Q.properties.extent.spatial.bbox.test(bbox_intersects, '{}')". format(bbox_as_string)) # noqa if datetime_ is not None: LOGGER.debug('processing datetime parameter') if self.time_field is None: LOGGER.error('time_field not enabled for collection') raise ProviderQueryError() if '/' in datetime_: # envelope LOGGER.debug('detected time range') time_begin, time_end = datetime_.split('/') if time_begin != '..': QUERY.append( "(Q.properties[self.time_field]>='{}')".format( time_begin)) # noqa if time_end != '..': QUERY.append( "(Q.properties[self.time_field]<='{}')".format( time_end)) # noqa else: # time instant LOGGER.debug('detected time instant') QUERY.append("(Q.properties[self.time_field]=='{}')".format( datetime_)) # noqa if properties: LOGGER.debug('processing properties') for prop in properties: QUERY.append("(Q.properties['{}']=='{}')".format(*prop)) if q is not None: QUERY.append( "(Q.properties['_metadata-anytext'].search('{}'))".format( q)) # noqa QUERY_STRING = '&'.join(QUERY) LOGGER.debug('QUERY_STRING: {}'.format(QUERY_STRING)) SEARCH_STRING = 'self.db.search({})'.format(QUERY_STRING) LOGGER.debug('SEARCH_STRING: {}'.format(SEARCH_STRING)) LOGGER.debug('querying database') if len(QUERY) > 0: LOGGER.debug('running eval on {}'.format(SEARCH_STRING)) results = eval(SEARCH_STRING) else: results = self.db.all() feature_collection['numberMatched'] = len(results) if resulttype == 'hits': return feature_collection for r in results: for e in self.excludes: del r['properties'][e] len_results = len(results) LOGGER.debug('Results found: {}'.format(len_results)) if len_results > limit: returned = limit else: returned = len_results feature_collection['numberReturned'] = returned if sortby: LOGGER.debug('Sorting results') if sortby[0]['order'] == '-': sort_reverse = True else: sort_reverse = False results.sort(key=lambda k: k['properties'][sortby[0]['property']], reverse=sort_reverse) feature_collection['features'] = results[startindex:startindex + limit] return feature_collection
def query(self, startindex=0, limit=10, resulttype='results', bbox=[], datetime=None, properties=[], sortby=[]): """ Query Postgis for all the content. e,g: http://localhost:5000/collections/hotosm_bdi_waterways/items? limit=1&resulttype=results :param startindex: starting record to return (default 0) :param limit: number of records to return (default 10) :param resulttype: return results or hit limit (default results) :param bbox: bounding box [minx,miny,maxx,maxy] :param datetime: temporal (datestamp or extent) :param properties: list of tuples (name, value) :param sortby: list of dicts (property, order) :returns: GeoJSON FeaturesCollection """ LOGGER.debug('Querying PostGIS') if resulttype == 'hits': with DatabaseConnection(self.conn_dic, self.table, context="hits") as db: cursor = db.conn.cursor(cursor_factory=RealDictCursor) where_clause = self.__get_where_clauses(properties=properties, bbox=bbox) sql_query = SQL("SELECT COUNT(*) as hits from {} {}").\ format(Identifier(self.table), where_clause) try: cursor.execute(sql_query) except Exception as err: LOGGER.error( 'Error executing sql_query: {}: {}'.format( sql_query.as_string(cursor)), err) raise ProviderQueryError() hits = cursor.fetchone()["hits"] return self.__response_feature_hits(hits) end_index = startindex + limit with DatabaseConnection(self.conn_dic, self.table) as db: cursor = db.conn.cursor(cursor_factory=RealDictCursor) where_clause = self.__get_where_clauses(properties=properties, bbox=bbox) sql_query = SQL("DECLARE \"geo_cursor\" CURSOR FOR \ SELECT DISTINCT {},ST_AsGeoJSON({}) FROM {}{}" ).\ format(db.columns, Identifier(self.geom), Identifier(self.table), where_clause) LOGGER.debug('SQL Query: {}'.format(sql_query.as_string(cursor))) LOGGER.debug('Start Index: {}'.format(startindex)) LOGGER.debug('End Index: {}'.format(end_index)) try: cursor.execute(sql_query) for index in [startindex, limit]: cursor.execute( "fetch forward {} from geo_cursor".format(index)) except Exception as err: LOGGER.error('Error executing sql_query: {}'.format( sql_query.as_string(cursor))) LOGGER.error(err) raise ProviderQueryError() row_data = cursor.fetchall() feature_collection = {'type': 'FeatureCollection', 'features': []} for rd in row_data: feature_collection['features'].append( self.__response_feature(rd)) return feature_collection
def gen_covjson(self, metadata, data, range_type): """ Generate coverage as CoverageJSON representation :param metadata: coverage metadata :param data: rasterio DatasetReader object :param range_type: range type list :returns: dict of CoverageJSON representation """ LOGGER.debug('Creating CoverageJSON domain') minx, miny, maxx, maxy = metadata['bbox'] mint, maxt = metadata['time'] try: tmp_min = data.coords[self.y_field].values[0] except IndexError: tmp_min = data.coords[self.y_field].values try: tmp_max = data.coords[self.y_field].values[-1] except IndexError: tmp_max = data.coords[self.y_field].values if tmp_min > tmp_max: LOGGER.debug('Reversing direction of {}'.format(self.y_field)) miny = tmp_max maxy = tmp_min cj = { 'type': 'Coverage', 'domain': { 'type': 'Domain', 'domainType': 'Grid', 'axes': { 'x': { 'start': minx, 'stop': maxx, 'num': metadata['width'] }, 'y': { 'start': maxy, 'stop': miny, 'num': metadata['height'] }, self.time_field: { 'start': mint, 'stop': maxt, 'num': metadata['time_steps'] } }, 'referencing': [{ 'coordinates': ['x', 'y'], 'system': { 'type': self._coverage_properties['crs_type'], 'id': self._coverage_properties['bbox_crs'] } }] }, 'parameters': {}, 'ranges': {} } for variable in range_type: pm = self._get_parameter_metadata(variable, self._data[variable].attrs) parameter = { 'type': 'Parameter', 'description': pm['description'], 'unit': { 'symbol': pm['unit_label'] }, 'observedProperty': { 'id': pm['observed_property_id'], 'label': { 'en': pm['observed_property_name'] } } } cj['parameters'][pm['id']] = parameter try: for key in cj['parameters'].keys(): cj['ranges'][key] = { 'type': 'NdArray', 'dataType': str(self._data[variable].dtype), 'axisNames': ['y', 'x', self._coverage_properties['time_axis_label']], 'shape': [ metadata['height'], metadata['width'], metadata['time_steps'] ] } data = data.fillna(None) cj['ranges'][key]['values'] = data[key].values.flatten( ).tolist() # noqa except IndexError as err: LOGGER.warning(err) raise ProviderQueryError('Invalid query parameter') return cj
def query(self, startindex=0, limit=10, resulttype='results', bbox=[], datetime=None, properties=[], sortby=[]): """ Query Postgis for all the content. e,g: http://localhost:5000/collections/hotosm_bdi_waterways/items? limit=1&resulttype=results :param startindex: starting record to return (default 0) :param limit: number of records to return (default 10) :param resulttype: return results or hit limit (default results) :param bbox: bounding box [minx,miny,maxx,maxy] :param datetime: temporal (datestamp or extent) :param properties: list of tuples (name, value) :param sortby: list of dicts (property, order) :returns: GeoJSON FeaturesCollection """ LOGGER.debug('Querying PostGIS ') where_conditions = [] if properties: property_clauses = \ [SQL('{} = {}').format( Identifier(self.properties_to_cols[k]), Literal(v)) for k, v in properties] where_conditions += property_clauses if bbox: bbox_clause = SQL('{} && ST_MakeEnvelope({})').format( Identifier(self.geom), SQL(', ').join([Literal(bbox_coord) for bbox_coord in bbox])) where_conditions.append(bbox_clause) if where_conditions: where_clause = SQL(' WHERE {}').format( SQL(' AND ').join(where_conditions)) else: where_clause = SQL('') if resulttype == 'hits': select_clause = SQL('SELECT count(*) as hits') with DatabaseConnection(self.conn_dic) as db: cursor = db.conn.cursor(cursor_factory=RealDictCursor) sql_query = SQL('{} FROM {}{}').format(select_clause, SQL(self.table), where_clause) try: cursor.execute(sql_query) except Exception as err: LOGGER.error( 'Error executing sql_query: {}: {}'.format( sql_query.as_string(cursor)), err) raise ProviderQueryError() hits = cursor.fetchone()["hits"] return self.__response_feature_hits(hits) else: select_clause = SQL('SELECT {}, {}, ST_AsGeoJSON({})').format( self.columns, Identifier(self.id_field), Identifier(self.geom)) with DatabaseConnection(self.conn_dic) as db: cursor = db.conn.cursor(cursor_factory=RealDictCursor) sql_query = \ SQL('DECLARE "geo_cursor" CURSOR FOR {} FROM {} {}' ).format(select_clause, SQL(self.table), where_clause) LOGGER.debug('SQL Query: {}'.format( sql_query.as_string(cursor))) LOGGER.debug('Start Index: {}'.format(startindex)) LOGGER.debug('End Index: {}'.format(startindex + limit)) try: cursor.execute(sql_query) for index in [startindex, limit]: cursor.execute( 'fetch forward {} from geo_cursor'.format(index)) except Exception as err: LOGGER.error('Error executing sql_query: {}'.format( sql_query.as_string(cursor))) LOGGER.error(err) raise ProviderQueryError() row_data = cursor.fetchall() return { 'type': 'FeatureCollection', 'features': [self.__response_feature(rd) for rd in row_data] }
def query(self, range_subset=[1], subsets={}, bbox=[], datetime_=None, format_='json', **kwargs): """ Extract data from collection collection :param range_subset: variable :param subsets: dict of subset names with lists of ranges :param bbox: bounding box [minx,miny,maxx,maxy] :param datetime_: temporal (datestamp or extent) :param format_: data format of output :returns: coverage data as dict of CoverageJSON or native format """ nbits = 16 bands = range_subset LOGGER.debug('Bands: {}, subsets: {}'.format(bands, subsets)) args = {'indexes': None} shapes = [] if all([ self._coverage_properties['x_axis_label'] in subsets, self._coverage_properties['y_axis_label'] in subsets, len(bbox) > 0 ]): msg = 'bbox and subsetting by coordinates are exclusive' LOGGER.warning(msg) raise ProviderQueryError(msg) if len(bbox) > 0: minx, miny, maxx, maxy = bbox crs_src = CRS.from_epsg(4326) crs_dest = CRS.from_string(self.crs) LOGGER.debug('source bbox CRS and data CRS are different') LOGGER.debug('reprojecting bbox into native coordinates') temp_geom_min = {"type": "Point", "coordinates": [minx, miny]} temp_geom_max = {"type": "Point", "coordinates": [maxx, maxy]} temp_geom_minup = {"type": "Point", "coordinates": [minx, maxy]} temp_geom_maxdown = {"type": "Point", "coordinates": [maxx, miny]} min_coord = rasterio.warp.transform_geom(crs_src, crs_dest, temp_geom_min) minx2, miny2 = min_coord['coordinates'] max_coord = rasterio.warp.transform_geom(crs_src, crs_dest, temp_geom_max) maxx2, maxy2 = max_coord['coordinates'] upleft_coord = rasterio.warp.transform_geom( crs_src, crs_dest, temp_geom_minup) minx2up, maxy2up = upleft_coord['coordinates'] downright_coord = rasterio.warp.transform_geom( crs_src, crs_dest, temp_geom_maxdown) maxx2down, miny2down = downright_coord['coordinates'] LOGGER.debug('Source coordinates: {}'.format( [minx, miny, maxx, maxy])) LOGGER.debug('Destination coordinates: {}'.format( [minx2, miny2, maxx2, maxy2])) shapes = [{ 'type': 'Polygon', 'coordinates': [[ [minx2, miny2], [minx2up, maxy2up], [maxx2, maxy2], [maxx2down, miny2down], [minx2, miny2], ]] }] elif (self._coverage_properties['x_axis_label'] in subsets and self._coverage_properties['y_axis_label'] in subsets): LOGGER.debug('Creating spatial subset') x = self._coverage_properties['x_axis_label'] y = self._coverage_properties['y_axis_label'] shapes = [{ 'type': 'Polygon', 'coordinates': [[[subsets[x][0], subsets[y][0]], [subsets[x][0], subsets[y][1]], [subsets[x][1], subsets[y][1]], [subsets[x][1], subsets[y][0]], [subsets[x][0], subsets[y][0]]]] }] date_file_list = False if datetime_: if '/' not in datetime_: try: period = datetime.strptime( datetime_, '%Y-%m-%dT%HZ').strftime('%Y%m%d%H') self.data = [v for v in self.file_list if period in v][0] except IndexError as err: msg = 'Datetime value invalid or out of time domain' LOGGER.error(err) raise ProviderQueryError(msg) else: self.get_file_list(self.var, datetime_) date_file_list = self.file_list if bands: LOGGER.debug('Selecting bands') args['indexes'] = list(map(int, bands)) with rasterio.open(self.data) as _data: LOGGER.debug('Creating output coverage metadata') _data._crs = self.crs _data._transform = self.transform out_meta = _data.meta if self.options is not None: LOGGER.debug('Adding dataset options') for key, value in self.options.items(): out_meta[key] = value if shapes: # spatial subset try: LOGGER.debug('Clipping data with bbox') out_image, out_transform = rasterio.mask.mask( _data, filled=False, shapes=shapes, crop=True, indexes=args['indexes']) except ValueError as err: LOGGER.error(err) raise ProviderQueryError(err) out_meta.update({ 'driver': self.native_format, 'height': out_image.shape[1], 'width': out_image.shape[2], 'transform': out_transform }) else: # no spatial subset LOGGER.debug('Creating data in memory with band selection') out_image = _data.read(indexes=args['indexes']) if bbox: out_meta['bbox'] = [bbox[0], bbox[1], bbox[2], bbox[3]] elif shapes: out_meta['bbox'] = [ subsets[x][0], subsets[y][0], subsets[x][1], subsets[y][1] ] else: out_meta['bbox'] = [ _data.bounds.left, _data.bounds.bottom, _data.bounds.right, _data.bounds.top ] out_meta['units'] = _data.units self.filename = self.data.split('/')[-1].replace('*', '') # CovJSON output does not support multiple bands yet # Only the first timestep is returned if format_ == 'json': if date_file_list: err = 'Date range not yet supported for CovJSON output' LOGGER.error(err) raise ProviderQueryError(err) else: LOGGER.debug('Creating output in CoverageJSON') out_meta['bands'] = [1] return self.gen_covjson(out_meta, out_image) else: if date_file_list: out_meta.update(count=len(date_file_list)) LOGGER.debug('Serializing data in memory') with MemoryFile() as memfile: with memfile.open(**out_meta, nbits=nbits) as dest: for id, layer in enumerate(date_file_list, start=1): with rasterio.open(layer) as src1: src1._crs = self.crs src1._transform = self.transform if shapes: # spatial subset try: LOGGER.debug('Clipping data') out_image, out_transform = \ rasterio.mask.mask( src1, filled=False, shapes=shapes, crop=True, indexes=args['indexes']) except ValueError as err: LOGGER.error(err) raise ProviderQueryError(err) else: out_image = src1.read( indexes=args['indexes']) dest.write_band(id, out_image[0]) # return data in native format LOGGER.debug('Returning data in native format') return memfile.read() else: LOGGER.debug('Serializing data in memory') out_meta.update(count=len(args['indexes'])) with MemoryFile() as memfile: with memfile.open(**out_meta, nbits=nbits) as dest: dest.write(out_image) # return data in native format LOGGER.debug('Returning data in native format') return memfile.read()
def _load(self, startindex=0, limit=10, resulttype='results', identifier=None, bbox=[], datetime_=None, properties=[], select_properties=[], skip_geometry=False, q=None): """ Load CSV data :param startindex: starting record to return (default 0) :param limit: number of records to return (default 10) :param datetime_: temporal (datestamp or extent) :param resulttype: return results or hit limit (default results) :param properties: list of tuples (name, value) :param select_properties: list of property names :param skip_geometry: bool of whether to skip geometry (default False) :param q: full-text search term(s) :returns: dict of GeoJSON FeatureCollection """ found = False result = None feature_collection = { 'type': 'FeatureCollection', 'features': [] } with open(self.data) as ff: LOGGER.debug('Serializing DictReader') data_ = csv.DictReader(ff) if resulttype == 'hits': LOGGER.debug('Returning hits only') feature_collection['numberMatched'] = len(list(data_)) return feature_collection LOGGER.debug('Slicing CSV rows') for row in itertools.islice(data_, startindex, startindex+limit): feature = {'type': 'Feature'} feature['id'] = row.pop(self.id_field) if not skip_geometry: feature['geometry'] = { 'type': 'Point', 'coordinates': [ float(row.pop(self.geometry_x)), float(row.pop(self.geometry_y)) ] } else: feature['geometry'] = None if self.properties or select_properties: feature['properties'] = OrderedDict() for p in set(self.properties) | set(select_properties): try: feature['properties'][p] = row[p] except KeyError as err: LOGGER.error(err) raise ProviderQueryError() else: feature['properties'] = row if identifier is not None and feature['id'] == identifier: found = True result = feature feature_collection['features'].append(feature) feature_collection['numberMatched'] = \ len(feature_collection['features']) if identifier is not None and not found: return None elif identifier is not None and found: return result feature_collection['numberReturned'] = len( feature_collection['features']) return feature_collection
def query(self, offset=0, limit=10, resulttype='results', bbox=[], datetime_=None, properties=[], sortby=[], select_properties=[], skip_geometry=False, q=None, **kwargs): """ Query Postgis for all the content. e,g: http://localhost:5000/collections/hotosm_bdi_waterways/items? limit=1&resulttype=results :param offset: starting record to return (default 0) :param limit: number of records to return (default 10) :param resulttype: return results or hit limit (default results) :param bbox: bounding box [minx,miny,maxx,maxy] :param datetime_: temporal (datestamp or extent) :param properties: list of tuples (name, value) :param sortby: list of dicts (property, order) :param select_properties: list of property names :param skip_geometry: bool of whether to skip geometry (default False) :param q: full-text search term(s) :returns: GeoJSON FeaturesCollection """ LOGGER.debug('Querying PostGIS') if resulttype == 'hits': with DatabaseConnection(self.conn_dic, self.table, properties=self.properties, context="hits") as db: cursor = db.conn.cursor(cursor_factory=RealDictCursor) where_clause = self.__get_where_clauses(properties=properties, bbox=bbox) sql_query = SQL("SELECT COUNT(*) as hits from {} {}").\ format(Identifier(self.table), where_clause) try: cursor.execute(sql_query) except Exception as err: LOGGER.error('Error executing sql_query: {}: {}'.format( sql_query.as_string(cursor), err)) raise ProviderQueryError() hits = cursor.fetchone()["hits"] return self.__response_feature_hits(hits) end_index = offset + limit with DatabaseConnection(self.conn_dic, self.table, properties=self.properties) as db: cursor = db.conn.cursor(cursor_factory=RealDictCursor) props = db.columns if select_properties == [] else \ SQL(', ').join([Identifier(p) for p in select_properties]) geom = SQL('') if skip_geometry else \ SQL(",ST_AsGeoJSON({})").format(Identifier(self.geom)) where_clause = self.__get_where_clauses(properties=properties, bbox=bbox) orderby = self._make_orderby(sortby) if sortby else SQL('') sql_query = SQL("DECLARE \"geo_cursor\" CURSOR FOR \ SELECT DISTINCT {} {} FROM {} {} {}" ).\ format(props, geom, Identifier(self.table), where_clause, orderby) LOGGER.debug('SQL Query: {}'.format(sql_query.as_string(cursor))) LOGGER.debug('Start Index: {}'.format(offset)) LOGGER.debug('End Index: {}'.format(end_index)) try: cursor.execute(sql_query) for index in [offset, limit]: cursor.execute( "fetch forward {} from geo_cursor".format(index)) except Exception as err: LOGGER.error('Error executing sql_query: {}'.format( sql_query.as_string(cursor))) LOGGER.error(err) raise ProviderQueryError() row_data = cursor.fetchall() feature_collection = {'type': 'FeatureCollection', 'features': []} for rd in row_data: feature_collection['features'].append( self.__response_feature(rd)) return feature_collection
def query(self, range_subset=[], subsets={}, bbox=[], datetime_=None, format_='json'): """ Extract data from collection collection :param range_subset: list of data variables to return (all if blank) :param subsets: dict of subset names with lists of ranges :param bbox: bounding box [minx,miny,maxx,maxy] :param datetime_: temporal (datestamp or extent) :param format_: data format of output :returns: coverage data as dict of CoverageJSON or native format """ if not range_subset and not subsets and format_ != 'json': LOGGER.debug('No parameters specified, returning native data') if format_ == 'zarr': return _get_zarr_data(self._data) else: return read_data(self.data) if len(range_subset) < 1: range_subset = self.fields data = self._data[[*range_subset]] if any([ self._coverage_properties['x_axis_label'] in subsets, self._coverage_properties['y_axis_label'] in subsets, self._coverage_properties['time_axis_label'] in subsets, datetime_ is not None ]): LOGGER.debug('Creating spatio-temporal subset') query_params = {} for key, val in subsets.items(): LOGGER.debug('Processing subset: {}'.format(key)) if data.coords[key].values[0] > data.coords[key].values[-1]: LOGGER.debug('Reversing slicing from high to low') query_params[key] = slice(val[1], val[0]) else: query_params[key] = slice(val[0], val[1]) if bbox: if all([ self._coverage_properties['x_axis_label'] in subsets, self._coverage_properties['y_axis_label'] in subsets, len(bbox) > 0 ]): msg = 'bbox and subsetting by coordinates are exclusive' LOGGER.warning(msg) raise ProviderQueryError(msg) else: query_params['x_axis_label'] = slice(bbox[0], bbox[2]) query_params['y_axis_label'] = slice(bbox[1], bbox[3]) if datetime_ is not None: if self._coverage_properties['time_axis_label'] in subsets: msg = 'datetime and temporal subsetting are exclusive' LOGGER.error(msg) raise ProviderQueryError(msg) else: if '/' in datetime_: begin, end = datetime_.split('/') if begin < end: query_params[self.time_field] = slice(begin, end) else: LOGGER.debug('Reversing slicing from high to low') query_params[self.time_field] = slice(end, begin) else: query_params[self.time_field] = datetime_ LOGGER.debug('Query parameters: {}'.format(query_params)) try: data = data.sel(query_params) except Exception as err: LOGGER.warning(err) raise ProviderQueryError(err) if (any([ data.coords[self.x_field].size == 0, data.coords[self.y_field].size == 0, data.coords[self.time_field].size == 0 ])): msg = 'No data found' LOGGER.warning(msg) raise ProviderNoDataError(msg) out_meta = { 'bbox': [ data.coords[self.x_field].values[0], data.coords[self.y_field].values[0], data.coords[self.x_field].values[-1], data.coords[self.y_field].values[-1] ], "time": [ _to_datetime_string(data.coords[self.time_field].values[0]), _to_datetime_string(data.coords[self.time_field].values[-1]) ], "driver": "xarray", "height": data.dims[self.y_field], "width": data.dims[self.x_field], "time_steps": data.dims[self.time_field], "variables": {var_name: var.attrs for var_name, var in data.variables.items()} } LOGGER.debug('Serializing data in memory') if format_ == 'json': LOGGER.debug('Creating output in CoverageJSON') return self.gen_covjson(out_meta, data, range_subset) elif format_ == 'zarr': LOGGER.debug('Returning data in native zarr format') return _get_zarr_data(data) else: # return data in native format with tempfile.TemporaryFile() as fp: LOGGER.debug('Returning data in native NetCDF format') fp.write(data.to_netcdf()) fp.seek(0) return fp.read()
def query(self, range_subset=['spei'], subsets={}, bbox=[], datetime_=None, format_='json'): """ Extract data from collection collection :param range_subset: empty for SPEI :param subsets: dict of subset names with lists of ranges :param bbox: bounding box [minx,miny,maxx,maxy] :param datetime: temporal (datestamp or extent) :param format_: data format of output :returns: coverage data as dict of CoverageJSON or native format """ if 'scenario' in subsets: scenario = subsets['scenario'] try: if len(scenario) > 1: msg = 'multiple scenario are not supported' LOGGER.error(msg) raise ProviderQueryError(msg) elif scenario[0] not in ['RCP2.6', 'hist']: scenario_value = scenario[0].replace('RCP', '') self.data = self.data.replace('2.6', scenario_value) except Exception as err: LOGGER.error(err) raise ProviderQueryError(err) subsets.pop('scenario') if 'percentile' in subsets: percentile = subsets['percentile'] try: if percentile != [50]: pctl = str(percentile[0]) self.data = self.data.replace('pctl50', 'pctl{}'.format(pctl)) except Exception as err: LOGGER.error(err) raise ProviderQueryError(err) subsets.pop('percentile') self._data = open_data(self.data) if not range_subset and not subsets and format_ != 'json': LOGGER.debug('No parameters specified, returning native data') if format_ == 'zarr': return _get_zarr_data(self._data) else: return read_data(self.data) data = self._data[[*range_subset]] if any([ self._coverage_properties['x_axis_label'] in subsets, self._coverage_properties['y_axis_label'] in subsets, self._coverage_properties['time_axis_label'] in subsets, bbox, datetime_ is not None ]): LOGGER.debug('Creating spatio-temporal subset') query_params = {} for key, val in subsets.items(): val_0 = self._data.coords[key].values[0] val_1 = self._data.coords[key].values[-1] if val_0 > val_1: LOGGER.debug('Reversing slicing low/high') query_params[key] = slice(val[1], val[0]) else: query_params[key] = slice(val[0], val[1]) if bbox: if all([ self._coverage_properties['x_axis_label'] in subsets, self._coverage_properties['y_axis_label'] in subsets, len(bbox) > 0 ]): msg = 'bbox and subsetting by coordinates are exclusive' LOGGER.warning(msg) raise ProviderQueryError(msg) else: query_params[self._coverage_properties['x_axis_label']] = \ slice(bbox[0], bbox[2]) query_params[self._coverage_properties['y_axis_label']] = \ slice(bbox[3], bbox[1]) if datetime_ is not None: if self._coverage_properties['time_axis_label'] in subsets: msg = 'datetime and temporal subsetting are exclusive' LOGGER.error(msg) else: if '/' in datetime_: begin, end = datetime_.split('/') if begin < end: query_params[self.time_field] = slice(begin, end) else: LOGGER.debug('Reversing slicing from high to low') query_params[self.time_field] = slice(end, begin) else: query_params[self.time_field] = datetime_ LOGGER.debug('Query parameters: {}'.format(query_params)) try: data = self._data.loc[query_params] except Exception as err: LOGGER.warning(err) raise ProviderQueryError(err) if (any([ data.coords[self.x_field].size == 0, data.coords[self.y_field].size == 0 ])): msg = 'No data found' LOGGER.warning(msg) raise ProviderNoDataError(msg) out_meta = { 'bbox': [ data.coords[self.x_field].values[0], data.coords[self.y_field].values[0], data.coords[self.x_field].values[-1], data.coords[self.y_field].values[-1] ], "time": [ self._to_datetime_string( data.coords[self.time_field].values[0]), self._to_datetime_string( data.coords[self.time_field].values[-1]) ], "driver": "xarray", "height": data.dims[self.y_field], "width": data.dims[self.x_field], "time_steps": data.dims[self.time_field], "variables": {var_name: var.attrs for var_name, var in data.variables.items()} } LOGGER.debug('Serializing data in memory') if format_ == 'json': LOGGER.debug('Creating output in CoverageJSON') return self.gen_covjson(out_meta, data, range_subset) elif format_ == 'zarr': LOGGER.debug('Returning data in native zarr format') return _get_zarr_data(data) else: # return data in native format with tempfile.TemporaryFile() as fp: LOGGER.debug('Returning data in native NetCDF format') fp.write(data.to_netcdf()) fp.seek(0) return fp.read()