def test_csw_ngdc(): "rewritten doctest/cws_ngdc.txt" c = csw.CatalogueServiceWeb(SERVICE_URL, timeout=120) assert c.identification.title == 'ArcGIS Server Geoportal Extension 10 - OGC CSW 2.0.2 ISO AP' assert c.identification.version == '2.0.2' assert sorted(c.identification.keywords) == [ 'Geophysical Metadata', 'NGDC', 'Ocean Metadata', 'Space Weather Metadata' ] assert c.provider.name == 'NOAA NGDC' # Get some records sos_urn = 'urn:x-esri:specification:ServiceType:sos:url' aoos_uuid = '1706F520-2647-4A33-B7BF-592FAFDE4B45' uuid_filter = fes.PropertyIsEqualTo(propertyname='sys.siteuuid', literal="{%s}" % aoos_uuid) c.getrecords2([uuid_filter], esn='full', maxrecords=999999) assert len(c.records) > 40 assert 'AOOS SOS' in c.records aoos_sos = c.records['AOOS SOS'] assert aoos_sos.abstract == 'Alaska Ocean Observing System SOS' assert sorted([ x['url'] for x in aoos_sos.references if x['scheme'] == sos_urn ]) == [ 'http://sos.aoos.org/sos/sos/kvp?service=SOS&request=GetCapabilities&acceptVersions=1.0.0', 'http://sos.aoos.org/sos/sos/kvp?service=SOS&request=GetCapabilities&acceptVersions=1.0.0', 'http://sos.aoos.org/sos/sos/kvp?service=SOS&request=GetCapabilities&acceptVersions=1.0.0' ] assert c.getService_urls(sos_urn) == [ 'http://sos.aoos.org/sos/sos/kvp?service=SOS&request=GetCapabilities&acceptVersions=1.0.0' ]
def csw_query_metadata_by_id(csw_url, identifier, username=None, password=None): csw = CatalogueServiceWeb(csw_url, username=username, password=password) result = csw.identification.type record = None if result == 'CSW': constraints = [fes.PropertyIsEqualTo('dc:identifier', identifier)] csw.getrecords2(typenames='gmd:MD_Metadata', esn='full', outputschema='http://www.isotc211.org/2005/gmd', constraints=constraints) for key in csw.records: record = csw.records[key] return record
def parseFilter(self, s): """translates inspirobot filter syntax into fes for example: 'OrganisationName = DREAL Bretagne && Type = dataset || OrganisationName ~ DDTM 29 && Type = dataset' """ filters = [] for f_or in [x.split('&&') for x in s.split('||')]: andgroup = [] for f_and in f_or: if '=' in f_and: a = [s.strip() for s in f_and.split('=')] andgroup.append(fes.PropertyIsEqualTo(propertyname=a[0], literal=a[1])) elif '~' in f_and: a = [s.strip() for s in f_and.split('~')] andgroup.append(fes.PropertyIsLike(propertyname=a[0], literal=a[1])) filters.append(andgroup) return filters
def GetWFSLayerFilter(u, l, pwd, n, d, a, fe, s): start = datetime.now() idList = fe.split(",") chemin = d if not exists(chemin): filterList = [fes.PropertyIsEqualTo(a, i) for i in idList] fr = fes.FilterRequest() filter_fes = fr.setConstraintList(filterList, tostring=True) # Get the vector layer using OGC WFS standard vrsion 1.0.0 wfs = WebFeatureService(u, version='1.0.0', username=l, password=pwd, timeout=10) # Supported outputFormat : GML2, GML3, shape-zip, application/json getFeature = wfs.getfeature(typename=(n, ), filter=filter_fes, outputFormat="application/json", srsname=s) # maxfeatures=200) # Download the zipped shapefile data = getFeature.read() f = open(chemin, 'wb') f.write(data) f.close() # Calculat time delta = datetime.now() - start print "\n{0} Downloaded on : {1}\n".format(n, delta) else: print "\n{0} exsists\n".format(n) return
def reindex_services(): region_map = { 'AOOS': '1706F520-2647-4A33-B7BF-592FAFDE4B45', 'CARICOOS': '117F1684-A5E3-400E-98D8-A270BDBA1603', 'CENCOOS': '4BA5624D-A61F-4C7E-BAEE-7F8BDDB8D9C4', 'GCOOS': '003747E7-4818-43CD-937D-44D5B8E2F4E9', 'GLOS': 'B664427E-6953-4517-A874-78DDBBD3893E', 'MARACOOS': 'C664F631-6E53-4108-B8DD-EFADF558E408', 'NANOOS': '254CCFC0-E408-4E13-BD62-87567E7586BB', 'NERACOOS': 'E41F4FCD-0297-415D-AC53-967B970C3A3E', 'PacIOOS': '68FF11D8-D66B-45EE-B33A-21919BB26421', 'SCCOOS': 'B70B3E3C-3851-4BA9-8E9B-C9F195DCEAC7', 'SECOORA': 'B3EA8869-B726-4E39-898A-299E53ABBC98' } #'NOS/CO-OPS': '72E748DF-23B1-4E80-A2C4-81E70783094A', #'USACE': '73019DFF-2E01-4800-91CD-0B3F812256A7', #'NAVY': '3B94DAAE-B7E9-4789-993B-0045AD9149D9', #'NDBC': '828981B0-0039-4360-9788-E788FA6B0875', #'USGS/CMGP': 'C6F11F00-C2BD-4AC6-8E2C-013E16F4932E' } services = { 'SOS': 'urn:x-esri:specification:ServiceType:sos:url', 'WMS': 'urn:x-esri:specification:ServiceType:wms:url', 'WCS': 'urn:x-esri:specification:ServiceType:wcs:url', 'DAP': 'urn:x-esri:specification:ServiceType:odp:url' } endpoint = 'http://www.ngdc.noaa.gov/geoportal/csw' # NGDC Geoportal c = csw.CatalogueServiceWeb(endpoint, timeout=120) ns = Namespaces() with app.app_context(): for region, uuid in region_map.iteritems(): # Setup uuid filter uuid_filter = fes.PropertyIsEqualTo(propertyname='sys.siteuuid', literal="{%s}" % uuid) # Make CSW request c.getrecords2([uuid_filter], esn='full', maxrecords=999999) for name, record in c.records.iteritems(): # @TODO: unfortunately CSW does not provide us with contact info, so # we must request it manually contact_email = "" metadata_url = None iso_ref = [ x['url'] for x in record.references if x['scheme'] == 'urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Document' ] if len(iso_ref): metadata_url = iso_ref[0] # Don't query for contact info right now. It takes WAY too long. #r = requests.get(iso_ref[0]) #r.raise_for_status() #node = ET.fromstring(r.content) #safe = nspath_eval("gmd:CI_ResponsibleParty/gmd:contactInfo/gmd:CI_Contact/gmd:address/gmd:CI_Address/gmd:electronicMailAddress/gco:CharacterString", ns.get_namespaces()) #contact_node = node.find(".//" + safe) #if contact_node is not None and contact_node.text != "": # contact_email = contact_node.text # if " or " in contact_email: # contact_email = ",".join(contact_email.split(" or ")) for ref in record.references: # We are only interested in the 'services' if ref["scheme"] in services.values(): url = unicode(ref["url"]) s = db.Service.find_one({ 'data_provider': unicode(region), 'url': url }) if s is None: s = db.Service() s.url = url s.data_provider = unicode(region) s.service_id = unicode(name) s.name = unicode(record.title) s.service_type = unicode( next((k for k, v in services.items() if v == ref["scheme"]))) s.interval = 3600 # 1 hour s.tld = unicode(urlparse(url).netloc) s.updated = datetime.utcnow() s.contact = unicode(contact_email) s.metadata_url = unicode(metadata_url) s.save() s.schedule_harvest()
def show_add_layer_dialog(request, *args, **kwargs): if request.method == 'POST': csw_url = request.session['csw_url'] user = request.session['user'] password = request.session['password'] layer_id = request.POST['layer_id'] try: csw = CatalogueServiceWeb(csw_url, username=user, password=password) constraints = [fes.PropertyIsEqualTo('dc:identifier', layer_id)] result = csw.identification.type if result == 'CSW': csw.getrecords2(constraints=constraints) record = None for key in csw.records: rec = csw.records[key] for ref in rec.references: if 'OGC:WCS' in ref['scheme']: rec.type = 'WCS' rec.endpoint = ref['url'] record = rec break if 'OGC:WFS' in ref['scheme']: rec.type = 'WFS' rec.endpoint = ref['url'] record = rec break if record: break if record.type == 'WCS': # get describe coverage # find coverage id from references coverage_id = None version = None for ref in record.references: if 'service=WCS' in ref['url']: url = ref['url'] parse_result = urlparse.urlparse(url) query = parse_result.query query_dict = urlparse.parse_qs(query) coverage_id = query_dict['coverageid'][0] version = query_dict['version'][0] if coverage_id and version: break record.service_id = coverage_id record.service_version = version elif record.type == 'WFS': typename = None version = None for ref in record.references: if 'service=WFS' in ref['url']: url = ref['url'] parse_result = urlparse.urlparse(url) query = parse_result.query query_dict = urlparse.parse_qs(query) typename = query_dict['typename'][0] version = query_dict['version'][0] if typename and version: break record.service_id = typename record.service_version = version # wcs = WebCoverageService(record.endpoint) # result = wcs.getDescribeCoverage(coverage_id) context = {'record': record} return render(request, 'geosafe/metasearch/modal/add_layer.html', context) except: return HttpResponseServerError() return HttpResponseServerError()
def get_data(self, typename, **kwargs): """ Download WOUDC observations :param bbox: a list representing a bounding box spatial filter (`minx, miny, maxx, maxy`) :param temporal: a list of two elements representing a time period (start, end) which accepts the following types: - :py:class:`datetime.date` - :py:class:`datetime.datetime` - string date (e.g. ``2012-10-30``) - string datetime (e.g. ``2012-10-30 11:11:11``) :param property_name: a string representing the property name to apply as filter against :param property_value: a string representing the value which filters against `property_name` :param sort_property: a string representing the property on which to sort results (default ``instance_datetime``) :param sort_descending: a boolean of whether to sort descending (default is ``False``). Applied if `sort_property` is specified :returns: list of WOUDC observations GeoJSON payload """ constraints = [] variables = [] filter_string = None bbox = None temporal = None property_name = None property_value = None sort_property = None sort_descending = False startindex = 0 output = [] LOGGER.info('Downloading dataset %s', typename) LOGGER.debug('Assembling query parameters') for key, value in kwargs.iteritems(): if key == 'bbox': bbox = value if key == 'temporal': temporal = value if key == 'property_name': property_name = value if key == 'property_value': property_value = str(value) if key == 'variables': variables = value if key == 'sortby': sort_property = value if key == 'sort_descending': sort_descending = value LOGGER.debug('Assembling constraints') if property_name is not None and property_value is not None: constraints.append(fes.PropertyIsEqualTo(property_name, property_value)) if bbox is not None: if not isinstance(bbox, list) or len(bbox) != 4: raise ValueError('bbox must be list of minx, miny, maxx, maxy') LOGGER.debug('Setting spatial constraint') constraints.append(fes.BBox(bbox)) if temporal is not None: if not isinstance(temporal, list) or len(temporal) != 2: msg = 'temporal must be list of start date, end date' raise ValueError(msg) LOGGER.info('Setting temporal constraint') temporal_start = date2string(temporal[0], 'begin') temporal_end = date2string(temporal[1], 'end') constraints.append(fes.PropertyIsBetween( 'instance_datetime', temporal_start, temporal_end)) if sort_descending is not None: if not isinstance(sort_descending, bool): raise ValueError('sort_descending must be boolean') if constraints: LOGGER.debug('Combining constraints') flt = fes.FilterRequest() if len(constraints) == 1: LOGGER.debug('Single constraint') filter_string = flt.setConstraint(constraints[0], tostring=True) if len(constraints) > 1: LOGGER.debug('Multiple constraints') filter_string = flt.setConstraintList([constraints], tostring=True) LOGGER.info('Fetching observations') LOGGER.info('Filters:') LOGGER.info('bbox: %r', bbox) LOGGER.info('temporal: %r', temporal) LOGGER.info('attribute query: %r = %r', property_name, property_value) # page download and assemble single list of JSON features while True: LOGGER.debug('Fetching features %d - %d', startindex, startindex + self.maxfeatures) payload = self.server.getfeature( typename=typename, startindex=startindex, propertyname=variables, maxfeatures=self.maxfeatures, filter=filter_string, outputFormat=self.outputformat).read() LOGGER.debug('Processing response') if payload.isspace(): LOGGER.debug('Empty response. Exiting') break try: features = json.loads(payload)['features'] except ValueError: msg = 'Query produced no results' LOGGER.info(msg) return None len_features = len(features) LOGGER.debug('Found %d features', len_features) output.extend(features) if len_features < self.maxfeatures: break startindex = startindex + self.maxfeatures LOGGER.info('Found %d features', len(output)) if sort_property is not None: LOGGER.info('Sorting response by %s', sort_property) output.sort(key=lambda e: e['properties'][sort_property], reverse=sort_descending) return output
def reindex_services(filter_regions=None, filter_service_types=None): c = csw.CatalogueServiceWeb(endpoint, timeout=120) ns = Namespaces() filter_regions = filter_regions or region_map.keys() filter_service_types = filter_service_types or services.keys() with app.app_context(): new_services = [] update_services = [] # get a set of all non-manual, active services for possible deactivation later current_services = set((s._id for s in db.Service.find( { 'manual': False, 'active': True, 'data_provider': { '$in': filter_regions } }, {'_id': True}))) # FIXME: find a more robust mechanism for detecting ERDDAP instances # this would fail if behind a url rewriting/proxying mechanism which # remove the 'erddap' portion from the URL. May want to have GeoPortal # use a separate 'scheme' dedicated to ERDDAP for CSW record # 'references' # workaround for matching ERDDAP endpoints # match griddap or tabledap endpoints with html or graph # discarding any query string parameters (i.e. some datasets on PacIOOS) re_string = r'(^.*erddap/(?:grid|table)dap.*)\.(?:html|graph)(:?\?.*)?$' erddap_re = re.compile(re_string) erddap_all_re = re.compile(r'(^.*erddap/(?:(?:grid|table|)dap|wms).*)' r'\.(?:html|graph)(:?\?.*)?$') for region, uuid in region_map.iteritems(): if region not in filter_regions: app.logger.info("Skipping region %s due to filter", region) continue app.logger.info("Requesting region %s", region) # Setup uuid filter uuid_filter = fes.PropertyIsEqualTo(propertyname='sys.siteuuid', literal="{%s}" % uuid) # Make CSW request c.getrecords2([uuid_filter], esn='full', maxrecords=999999) for name, record in c.records.iteritems(): try: # @TODO: unfortunately CSW does not provide us with contact info, so # we must request it manually contact_email = "" metadata_url = None for ref in record.references: try: # TODO: Use a more robust mechanism for detecting # ERDDAP instances aside from relying on the url erddap_match = erddap_re.search(ref['url']) # We are only interested in the 'services' if (ref["scheme"] in services.values()): metadata_url = next(( r['url'] for r in record.references if r['scheme'] == 'urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Document' ), None) # strip extension if erddap endpoint url = unicode(ref['url']) elif erddap_match: test_url = (erddap_match.group(1) + '.iso19115') req = requests.get(test_url) # if we have a valid ERDDAP metadata endpoint, # store it. if req.status_code == 200: metadata_url = unicode(test_url) else: app.logger.error('Invalid service URL %s', ref['url']) continue url = get_erddap_url_from_iso(req.content) if url is None: app.logger.error(ref['url']) app.logger.error( "Failed to parse Erddap ISO for %s", test_url) continue # Either not a valid ISO or there's not a valid endpoint # next record if not one of the previously mentioned else: continue # end metadata find block s = db.Service.find_one({ 'data_provider': unicode(region), 'url': url }) if s is None: s = db.Service() s.url = unicode(url) s.data_provider = unicode(region) s.manual = False s.active = True new_services.append(s) else: # will run twice if erddap services have # both .html and .graph, but resultant # data should be the same update_services.append(s) s.service_id = unicode(name) s.name = unicode(record.title) s.service_type = unicode( 'DAP' if erddap_match else next(( k for k, v in services.items() if v == ref["scheme"]))) s.interval = 3600 # 1 hour s.tld = unicode(urlparse(url).netloc) s.updated = datetime.utcnow() s.contact = unicode(contact_email) s.metadata_url = metadata_url # grab opendap form url if present if s.service_type == 'DAP': possible_refs = [ r['url'] for r in record.references if r['scheme'] == opendap_form_schema ] if len(possible_refs): # this is bad, it can grab any associated # record from the dataset s.extra_url = unicode(possible_refs[0]) # if we see the service, this is "Active", unless we've set manual (then we don't touch) if not s.manual: s.active = True s.save() except Exception as e: app.logger.warn("Could not save service: %s", e) except Exception as e: app.logger.warn("Could not save region info: %s", e) # DEACTIVATE KNOWN SERVICES updated_ids = set((s._id for s in update_services)) deactivate = list(current_services.difference(updated_ids)) # bulk update (using pymongo syntax) db.services.update({'_id': { '$in': deactivate }}, {'$set': { 'active': False, 'updated': datetime.utcnow() }}, multi=True, upsert=False) return "New services: %s, updated services: %s, deactivated services: %s" % ( len(new_services), len(update_services), len(deactivate))
def get_data(self, typename, **kwargs): """ Download WOUDC observations :param bbox: a list representing a bounding box spatial filter (`minx, miny, maxx, maxy`) :param temporal: a list of two elements representing a time period (start, end) which accepts the following types: - :py:class:`datetime.date` - :py:class:`datetime.datetime` - string date (e.g. ``2012-10-30``) - string datetime (e.g. ``2012-10-30 11:11:11``) :param filters: `dict` of key-value pairs of property names and values. Constructs exclusive search :param variables: a list of variables to return as part of the response (default returns all) :param sort_property: a string representing the property on which to sort results (default ``instance_datetime``) :param sort_order: a string representing sort order of response (``asc`` or ``desc``). Default is ``asc``. Applied if `sort_property` is specified :returns: list of WOUDC observations GeoJSON payload """ constraints = [] filters = [] variables = '*' filter_string = None bbox = None temporal = None sort_property = None sort_order = 'asc' startindex = 0 features = None feature_collection = None sort_descending = False LOGGER.info('Downloading dataset %s', typename) LOGGER.debug('Assembling query parameters') for key, value in kwargs.items(): if key == 'bbox': bbox = value if key == 'temporal': temporal = value if key == 'filters': filters = value if key == 'variables': variables = value if key == 'sortby': sort_property = value if key == 'sort_order': sort_order = value LOGGER.debug('Assembling constraints') if filters: for key, value in filters.items(): constraints.append(fes.PropertyIsEqualTo(key, value)) if bbox is not None: if not isinstance(bbox, list) or len(bbox) != 4: raise ValueError('bbox must be list of minx, miny, maxx, maxy') LOGGER.debug('Setting spatial constraint') constraints.append(fes.BBox(bbox)) if temporal is not None: if not isinstance(temporal, list) or len(temporal) != 2: msg = 'temporal must be list of start date, end date' raise ValueError(msg) LOGGER.info('Setting temporal constraint') temporal_start = date2string(temporal[0], 'begin') temporal_end = date2string(temporal[1], 'end') constraints.append(fes.PropertyIsBetween( 'instance_datetime', temporal_start, temporal_end)) if sort_order not in ['asc', 'desc']: raise ValueError('sort_order must be asc or desc') else: if sort_order == 'desc': sort_descending = True if variables != '*': if not isinstance(variables, list): raise ValueError('variables must be list') if constraints: LOGGER.debug('Combining constraints') flt = fes.FilterRequest() if len(constraints) == 1: LOGGER.debug('Single constraint') filter_string = flt.setConstraint(constraints[0], tostring=True) if len(constraints) > 1: LOGGER.debug('Multiple constraints') filter_string = flt.setConstraintList([constraints], tostring=True) LOGGER.info('Fetching observations') LOGGER.info('Filters:') LOGGER.info('bbox: %r', bbox) LOGGER.info('temporal: %r', temporal) LOGGER.info('attribute queries: %r', filters) # page download and assemble single list of JSON features while True: LOGGER.debug('Fetching features %d - %d', startindex, startindex + self.maxfeatures) payload = self.server.getfeature( typename=typename, startindex=startindex, propertyname=variables, maxfeatures=self.maxfeatures, filter=filter_string, outputFormat=self.outputformat).read() LOGGER.debug('Processing response') if payload.isspace(): LOGGER.debug('Empty response. Exiting') break try: features = json.loads(payload) except ValueError: msg = 'Query produced no results' LOGGER.info(msg) return None len_features = len(features['features']) LOGGER.debug('Found %d features', len_features) if feature_collection is None: feature_collection = features else: feature_collection['features'].extend(features['features']) if len_features < self.maxfeatures: break startindex = startindex + self.maxfeatures len_feature_collection = len(feature_collection['features']) LOGGER.info('Found %d total features', len_feature_collection) if sort_property is not None: LOGGER.info('Sorting response by %s', sort_property) feature_collection['features'].sort( key=lambda e: e['properties'][sort_property], reverse=sort_descending) return feature_collection