def make_fes_filter(self): ''' Generates the filter for querying the IOOS database ''' begin, end = fes_date_filter(self.start, self.stop) kw = dict(wildCard='*', escapeChar='\\', singleChar='?', propertyname='apiso:AnyText') if len(self.target) > 1: prop_filt = fes.Or([ fes.PropertyIsLike(literal=('*%s*' % val), **kw) for val in self.target ]) else: prop_filt = fes.PropertyIsLike(literal=(self.target[0]), **kw) if self.models == True: self.filter_list = [ fes.And([ self.bbox_crs, begin, end, prop_filt, fes.PropertyIsLike(literal=('*%s*' % 'forecast'), **kw), fes.Not([fes.PropertyIsLike(literal='*cdip', **kw)]), fes.Not([fes.PropertyIsLike(literal='*grib*', **kw)]) ]) ] else: self.filter_list = [ fes.And([ self.bbox_crs, begin, end, prop_filt, fes.Not([fes.PropertyIsLike(literal='*cdip', **kw)]), fes.Not([fes.PropertyIsLike(literal='*grib*', **kw)]) ]) ]
def make_filter(config, ): kw = { "wildCard": "*", "escapeChar": "\\", "singleChar": "?", "propertyname": "apiso:Subject", } if len(config["cf_names"]) > 1: or_filt = fes.Or([ fes.PropertyIsLike(literal=("*%s*" % val), **kw) for val in config["cf_names"] ]) else: or_filt = fes.PropertyIsLike(literal=("*%s*" % config["cf_names"][0]), **kw) kw.update({"propertyname": "apiso:AnyText"}) not_filt = fes.Not([fes.PropertyIsLike(literal="*cdip*", **kw)]) begin, end = fes_date_filter(config["date"]["start"], config["date"]["stop"]) bbox_crs = fes.BBox(config["region"]["bbox"], crs=config["region"]["crs"]) filter_list = [fes.And([bbox_crs, begin, end, or_filt, not_filt])] return filter_list
def fes_date_filter(start_date='1900-01-01',stop_date='2100-01-01',constraint='overlaps'): if constraint == 'overlaps': start = fes.PropertyIsGreaterThanOrEqualTo(propertyname='apiso:TempExtent_end', literal=start_date) stop = fes.PropertyIsLessThanOrEqualTo(propertyname='apiso:TempExtent_begin', literal=stop_date) elif constraint == 'within': start = fes.PropertyIsGreaterThanOrEqualTo(propertyname='apiso:TempExtent_begin', literal=start_date) stop = fes.PropertyIsLessThanOrEqualTo(propertyname='apiso:TempExtent_end', literal=stop_date) return fes.And([start, stop])
def csw_query(endpoint, bbox=None, start=None, stop=None, kw_names=None, crs="urn:ogc:def:crs:OGC:1.3:CRS84"): crs = "urn:ogc:def:crs:::EPSG:4326" #https://github.com/qgis/QGIS/issues/40778 constraints = [] csw = None while csw is None: try: csw = CatalogueServiceWeb(endpoint, timeout=60) #csw.getrecords2(maxrecords=10) #for rec in csw.records: # print(vars(csw.records[rec])) # print(csw.records[rec].title) except: pass if kw_names: kw = dict(wildCard="*", escapeChar="\\", singleChar="?", propertyname="apiso:AnyText") or_filt = fes.Or([ fes.PropertyIsLike(literal=("*%s*" % val), **kw) for val in kw_names ]) constraints.append(or_filt) if all(v is not None for v in [start, stop]): begin, end = fes_date_filter(start, stop) constraints.append(begin) constraints.append(end) if bbox: bbox_crs = fes.BBox(bbox, crs=crs) constraints.append(bbox_crs) if len(constraints) >= 2: filter_list = [fes.And(constraints)] else: filter_list = constraints get_csw_records(csw, filter_list, pagesize=10, maxrecords=10) print("Found {} records.\n".format(len(csw.records.keys()))) for key, value in list(csw.records.items()): print(u"Title: [{}]\nID: {}\n".format(value.title, key)) msg = "geolink: {geolink}\nscheme: {scheme}\nURL: {url}\n".format for ref in value.references: print(msg(geolink=sniff_link(ref["url"]), **ref)) print("#########################################################", '\n')
def make_filter(config): from owslib import fes from ioos_tools.ioos import fes_date_filter kw = dict( wildCard="*", escapeChar="\\", singleChar="?", propertyname="apiso:AnyText" ) or_filt = fes.Or( [fes.PropertyIsLike(literal=("*%s*" % val), **kw) for val in config["cf_names"]] ) not_filt = fes.Not([fes.PropertyIsLike(literal="GRIB-2", **kw)]) begin, end = fes_date_filter(config["date"]["start"], config["date"]["stop"]) bbox_crs = fes.BBox(config["region"]["bbox"], crs=config["region"]["crs"]) filter_list = [fes.And([bbox_crs, begin, end, or_filt, not_filt])] return filter_list
# <codecell> val = 'sea_water_salinity' any_filt = fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val), escapeChar='\\',wildCard='*',singleChar='?') # <codecell> val = 'Averages' not_filt = fes.Not([fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val), escapeChar='\\',wildCard='*',singleChar='?')]) # <codecell> filter_list = [fes.And([ bbox, start, stop, any_filt]) ] # <codecell> # try request using multiple filters "and" syntax: [[filter1,filter2]] csw.getrecords2(constraints=filter_list,maxrecords=1000,esn='full') print len(csw.records.keys()) # <codecell> csw.request # <markdowncell> # Now print out some titles
'roms', 'selfe', 'adcirc', 'ncom', 'hycom', 'fvcom', 'wrf', 'wrams' ] for model in model_strings: title_filter = fes.PropertyIsLike(propertyname='apiso:Title', literal='*%s*' % model, wildCard='*') name_filters.append(title_filter) subject_filter = fes.PropertyIsLike(propertyname='apiso:Subject', literal='*%s*' % model, wildCard='*') name_filters.append(subject_filter) # Or all of the name filters together name_filters = fes.Or(name_filters) # Final filters filters = fes.And([polygon_filters, name_filters]) # <markdowncell> # ##### The actual CSW filters look like this # <codecell> from owslib.etree import etree print etree.tostring(filters.toXML(), pretty_print=True) # <markdowncell> # ##### Find all models contain in all CSW endpoints # <codecell>
# In[8]: csw.request # In[9]: csw.records[choice].xml # Add bounding box constraint. To specify lon,lat order for bbox (which we want to do so that we can use the same bbox with either geoportal server or pycsw requests), we need to request the bounding box specifying the CRS84 coordinate reference system. The CRS84 option is available in `pycsw 1.1.10`+. The ability to specify the `crs` in the bounding box request is available in `owslib 0.8.12`+. For more info on the bounding box problem and how it was solved, see this [pycsw issue](https://github.com/geopython/pycsw/issues/287), this [geoportal server issue](https://github.com/Esri/geoportal-server/issues/124), and this [owslib issue](https://github.com/geopython/OWSLib/issues/201) # In[10]: bbox = [-87.40, 34.25, -63.70, 66.70] # [lon_min, lat_min, lon_max, lat_max] bbox_filter = fes.BBox(bbox, crs='urn:ogc:def:crs:OGC:1.3:CRS84') filter_list = [fes.And([filter1, bbox_filter])] csw.getrecords2(constraints=filter_list, maxrecords=1000) # In[11]: print(len(csw.records.keys())) for rec in list(csw.records.keys()): print('title:' + csw.records[rec].title) print('identifier:' + csw.records[rec].identifier) print('modified:' + csw.records[rec].modified) print(' ') # In[12]: val = 'WMS' filter2 = fes.PropertyIsLike(propertyname='apiso:ServiceType',
filt = fes.PropertyIsLike(propertyname='apiso:AnyText', literal=('*%s*' % val), escapeChar='\\', wildCard='*', singleChar='?') filter3 = fes.Not([filt]) val = 'espresso' filt = fes.PropertyIsLike(propertyname='apiso:AnyText', literal=('*%s*' % val), escapeChar='\\', wildCard='*', singleChar='?') filter4 = fes.Not([filt]) filter_list = [fes.And([filter1, filter2, filter3, filter4])] # In[ ]: csw.getrecords2(constraints=filter_list, maxrecords=1000, esn='full') len(csw.records.keys()) # In[ ]: choice = random.choice(list(csw.records.keys())) print choice csw.records[choice].references # In[ ]: try:
kw = dict(wildCard='*', escapeChar='\\', singleChar='?', propertyname='apiso:AnyText') or_filt = fes.Or([fes.PropertyIsLike(literal=('*%s*' % val), **kw) for val in name_list]) # Exclude ROMS Averages and History files. not_filt = fes.Not([fes.PropertyIsLike(literal='*Averages*', **kw)]) bbox_filter = fes.BBox(bbox,crs='urn:ogc:def:crs:OGC:1.3:CRS84') begin, end = fes_date_filter(start, stop) filter_list = [fes.And([bbox_filter, begin, end, or_filt, not_filt])] # In[ ]: bbox = [-87.40, 34.25, -63.70, 66.70] # [lon_min, lat_min, lon_max, lat_max] bbox_filter = fes.BBox(bbox,crs='urn:ogc:def:crs:OGC:1.3:CRS84') filter_list = [fes.And([filter1, bbox_filter])] csw.getrecords2(constraints=filter_list, maxrecords=1000) # In[141]: csw.get_operation_by_name('GetRecords').constraints
literal=('*%s*' % val), escapeChar='\\', wildCard='*', singleChar='?') for val in data_dict['currents']['names'] ]) val = 'Averages' not_filt = fes.Not([ fes.PropertyIsLike(propertyname='apiso:AnyText', literal=('*%s*' % val), escapeChar='\\', wildCard='*', singleChar='?') ]) filter_list = [fes.And([bbox, start, stop, or_filt, not_filt])] # connect to CSW, explore it's properties # try request using multiple filters "and" syntax: [[filter1,filter2]] csw.getrecords2(constraints=filter_list, maxrecords=1000, esn='full') print str(len(csw.records)) + " csw records found" for rec, item in csw.records.items(): print(item.title) # <markdowncell> # #### List end points available # <codecell> dap_urls = service_urls(csw.records) #remove duplicates and organize
from owslib import fes kw = dict(wildCard='*', escapeChar='\\', singleChar='?', propertyname='apiso:AnyText') or_filt = fes.Or( [fes.PropertyIsLike(literal=('*%s*' % val), **kw) for val in name_list]) # Exculde ROMS Averages and History files. not_filt = fes.Not([fes.PropertyIsLike(literal='*Averages*', **kw)]) begin, end = fes_date_filter(start, stop) filter_list = [fes.And([fes.BBox(bbox), begin, end, or_filt, not_filt])] # In[6]: from owslib.csw import CatalogueServiceWeb endpoint = 'http://www.ngdc.noaa.gov/geoportal/csw' csw = CatalogueServiceWeb(endpoint, timeout=60) csw.getrecords2(constraints=filter_list, maxrecords=1000, esn='full') log.info(fmt(' Catalog information ')) log.info("URL: {}".format(endpoint)) log.info("CSW version: {}".format(csw.version)) log.info("Number of datasets available: {}".format(len(csw.records.keys()))) # In[7]:
propertyname = "apiso:TempExtent_end" end = fes.PropertyIsLessThanOrEqualTo(propertyname=propertyname, literal=stop) else: raise NameError("Unrecognized constraint {}".format(constraint)) return begin, end kw = dict(wildCard="*", escapeChar="\\", singleChar="?", propertyname="apiso:AnyText") or_filt = fes.Or([fes.PropertyIsLike(literal=("*%s*" % val), **kw) for val in cf_names]) # Exclude GNOME returns. not_filt = fes.Not([fes.PropertyIsLike(literal="*GNOME*", **kw)]) begin, end = fes_date_filter(start, stop) bbox_crs = fes.BBox(bbox, crs=crs) filter_list = [fes.And([bbox_crs, begin, end, or_filt, not_filt])] It is possible to use the same filter to search multiple catalogs. The cell below loops over 3 catalogs hoping to find which one is more up-to-date and returns the near real time data. def get_csw_records(csw, filter_list, pagesize=10, maxrecords=1000): """Iterate `maxrecords`/`pagesize` times until the requested value in `maxrecords` is reached. """ from owslib.fes import SortBy, SortProperty # Iterate over sorted results. sortby = SortBy([SortProperty("dc:title", "ASC")]) csw_records = {} startposition = 0 nextrecord = getattr(csw, "results", 1) while nextrecord != 0:
def retrieve(self, variable=None, dataframe=False, count=False, bbox=None, or_filter=False, *searchArgs, **kwargs): ''' Retrieve data from WFS service ''' if not self._valid: raise Exception(self._status) if not variable: variable = self._client.items()[0][0] query_params = dict(typename=variable, propertyname=None) # construct query from searchArgs if count: query_params['resulttype'] = 'hits' if bbox: query_params['bbox'] = bbox #query_params.update(kwargs) if len(kwargs) > 0: import owslib.fes as fes from owslib.etree import etree filter_components = [ fes.PropertyIsLike(propertyname=k, literal='%s' % (str(v))) for k, v in kwargs.items() ] if len(filter_components) == 1: query_filter = filter_components[0] elif or_filter: query_filter = fes.Or(filter_components) else: query_filter = fes.And(filter_components) filterxml = etree.tostring(query_filter.toXML()).decode('utf-8') filterxml = '<Filter>%s</Filter>' % filterxml query_params['filter'] = filterxml filter_result = False try: results = self._client.getfeature(**query_params) except: if 'filter' in query_params: query_params.pop('filter') filter_result = True results = self._client.getfeature(**query_params) else: raise feature_text = results.read() if not isinstance(feature_text, str): feature_text = feature_text.decode('utf-8') tmp_fn = 'tmp.gml' open(tmp_fn, 'w').write(feature_text) from osgeo import ogr ds = ogr.Open(tmp_fn) # remove tmp file layer = ds.GetLayer() features = [ layer.GetNextFeature() for i in range(layer.GetFeatureCount()) ] if filter_result: features = self.filter_features(features, or_filter, kwargs) from shapely.wkt import loads for f in features: geom = f.geometry() wkt = geom.ExportToWkt() try: f.geom = loads(wkt) except: f.geom = loads(wkt.replace('MULTISURFACE', 'MULTIPOLYGON')) if dataframe: from geopandas import GeoDataFrame feature_dicts = [ dict([('geometry', f.geom)] + [(p, f[p]) for p in f.keys()]) for f in features ] return GeoDataFrame(feature_dicts) return features
# <codecell> # Standard Name filters cf_name_filters = [] for cf_name in variables_to_query: text_filter = fes.PropertyIsLike(propertyname='apiso:AnyText', literal="*%s*" % cf_name, wildCard='*') cf_name_filters.append(text_filter) cf_name_filters = fes.Or(cf_name_filters) # Geographic filters geographic_filter = fes.BBox(bbox=bounding_box) # Temporal filters temporal_filter = fes_date_filter(start_date_string, end_date_string) filters = fes.And([cf_name_filters, geographic_filter, temporal_filter]) # <markdowncell> # ##### The actual CSW filter POST envelope looks like this # <codecell> from owslib.etree import etree print etree.tostring(filters.toXML(), pretty_print=True) # <markdowncell> # ##### Filter out CSW servers that do not support a BBOX query # <codecell>
start = fes.PropertyIsGreaterThanOrEqualTo(propertyname='apiso:TempExtent_end', literal=start_date) stop = fes.PropertyIsLessThanOrEqualTo(propertyname='apiso:TempExtent_begin', literal=stop_date) elif constraint == 'within': start = fes.PropertyIsGreaterThanOrEqualTo(propertyname='apiso:TempExtent_begin', literal=start_date) stop = fes.PropertyIsLessThanOrEqualTo(propertyname='apiso:TempExtent_end', literal=stop_date) return fes.And([start, stop]) # <codecell> # Geographic filters geographic_filter = fes.BBox(bbox=bounding_box) # Temporal filters temporal_filter = fes_date_filter(start_date_string, end_date_string) filters = fes.And([geographic_filter, temporal_filter]) # <markdowncell> # ##### The actual CSW filter POST envelope looks like this # <codecell> from owslib.etree import etree print etree.tostring(filters.toXML(), pretty_print=True) # <markdowncell> # ##### Filter out CSW servers that do not support a BBOX query # <codecell>
# In[19]: val = 'coawst' filter1 = fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val), escapeChar='\\',wildCard='*',singleChar='?') filter_list = [ filter1 ] # In[20]: val = 'experimental' filter2 = fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val), escapeChar='\\',wildCard='*',singleChar='?') filter_list = [fes.And([filter1, filter2])] # In[21]: csw.getrecords2(constraints=filter_list,maxrecords=100,esn='full') print len(csw.records.keys()) for rec in list(csw.records.keys()): print csw.records[rec].title # In[22]: choice=np.random.choice(list(csw.records.keys())) print(csw.records[choice].title) csw.records[choice].references
else: raise NameError("Unrecognized constraint {}".format(constraint)) return begin, end kw = dict(wildCard="*", escapeChar="\\", singleChar="?", propertyname="apiso:AnyText") or_filt = fes.Or([fes.PropertyIsLike(literal=("*%s*" % val), **kw) for val in cf_names]) begin, end = fes_date_filter(start, stop) bbox_crs = fes.BBox(bbox, crs=crs) filter_list = [ fes.And( [ bbox_crs, # bounding box begin, end, # start and end date or_filt, # or conditions (CF variable names) ] ) ] from owslib.csw import CatalogueServiceWeb endpoint = "https://data.ioos.us/csw" csw = CatalogueServiceWeb(endpoint, timeout=60) We have created a `csw` object, but nothing has been searched yet. Below we create a `get_csw_records` function that calls the OSWLib method `getrecords2` iteratively to retrieve all the records matching the search criteria specified by the `filter_list`.
jd_start = dt.datetime.strptime(start_date,'%Y-%m-%d %H:%M') jd_stop = dt.datetime.strptime(stop_date,'%Y-%m-%d %H:%M') print start_date,'to',stop_date # <codecell> start,stop = dateRange(start_date,stop_date) filter1 = fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val), escapeChar='\\',wildCard='*',singleChar='?') bbox = fes.BBox(box,crs='urn:ogc:def:crs:OGC:1.3:CRS84') #filter_list = [fes.And([ bbox, filter1, start,stop]) ] # <codecell> filter_list = [fes.And([ bbox, filter1]) ] csw.getrecords2(constraints=filter_list) csw.results['matches'] # <codecell> filter_list = [fes.And([ bbox, filter1, start,stop]) ] csw.getrecords2(constraints=filter_list) csw.results['matches'] # <codecell> filter_list = [filter1] csw.getrecords2(constraints=filter_list) csw.results['matches']
kw = dict(wildCard='*', escapeChar='\\', singleChar='?', propertyname='apiso:ServiceType') serviceType = fes.PropertyIsLike(literal=('*%s*' % service_type), **kw) begin, end = fes_date_filter(start, stop) bbox_crs = fes.BBox(bbox, crs=crs) filter_list = [ fes.And([ bbox_crs, # bounding box begin, end, # start and end date or_filt, # or conditions (CF variable names) serviceType # search only for datasets that have WMS services ]) ] # In[9]: from owslib.csw import CatalogueServiceWeb endpoint = 'https://data.ioos.us/csw' csw = CatalogueServiceWeb(endpoint, timeout=60) # The `csw` object created from `CatalogueServiceWeb` did not fetched anything yet. # It is the method `getrecords2` that uses the filter for the search. However, even though there is a `maxrecords` option, the search is always limited by the server side and there is the need to iterate over multiple calls of `getrecords2` to actually retrieve all records.
# <codecell> # convert User Input into FES filters start, stop = dateRange(start_date, stop_date) bbox = fes.BBox(box) any_text = fes.PropertyIsLike(propertyname='apiso:AnyText', literal=('*%s*' % val), escapeChar='\\', wildCard='*', singleChar='?') # <codecell> # combine filters into a list filter_list = [fes.And([start, stop, bbox, any_text])] # <codecell> csw.getrecords2(constraints=filter_list, maxrecords=100, esn='full') len(csw.records.keys()) # <codecell> #scheme='urn:x-esri:specification:ServiceType:odp:url' scheme = 'OPeNDAP:OPeNDAP' urls = service_urls(csw.records, service_string=scheme) print "\n".join(urls) # <headingcell level=2>