class CSWTest(TestCase): """ Test CSW endpoint """ def setUp(self): """setup records and CSW""" self.csw = CatalogueServiceWeb(settings.REGISTRY_PYCSW['server']['url']) def tearDown(self): """shutdown endpoint and clean out records""" Service.objects.all().delete() def test_capabilities(self): """verify that HHypermap's CSW works properly""" # test that OGC:CSW URLs are identical to what is defined in settings for op in self.csw.operations: for method in op.methods: self.assertEqual(settings.REGISTRY_PYCSW['server']['url'], method['url'], 'Expected URL equality') # test that OGC:CSW 2.0.2 is supported self.assertEqual(self.csw.version, '2.0.2', 'Expected "2.0.2" as a supported version') # test that transactions are supported transaction = self.csw.get_operation_by_name('Transaction') harvest = self.csw.get_operation_by_name('Harvest') # test that HHypermap Service types are Harvestable for restype in ['http://www.opengis.net/wms', 'http://www.opengis.net/wmts/1.0', 'urn:x-esri:serviceType:ArcGIS:MapServer', 'urn:x-esri:serviceType:ArcGIS:ImageServer']: self.assertIn(restype, harvest.parameters['ResourceType']['values']) self.assertIn(restype, transaction.parameters['TransactionSchemas']['values'])
def test_csw_skgeodsy(): c = CatalogueServiceWeb(SERVICE_URL) assert sorted([op.name for op in c.operations]) == [ 'DescribeRecord', 'GetCapabilities', 'GetRecordById', 'GetRecords', 'Transaction' ] grop = c.get_operation_by_name('GetRecords') assert grop.name == 'GetRecords' c.getrecords2(typenames='csw:Record gmd:MD_Metadata') assert c.results.get('returned') > 0 assert c.results.get('nextrecord') > 0 assert c.results.get('matches') > 0
import numpy as np # In[2]: #endpoint = 'http://geoport.whoi.edu/csw' #endpoint = 'http://data.nodc.noaa.gov/geoportal/csw' endpoint = 'http://catalog.data.gov/csw-all' #endpoint = 'http://data.doi.gov/csw' csw = CatalogueServiceWeb(endpoint,timeout=60) print csw.version # In[3]: csw.get_operation_by_name('GetRecords').constraints # In[4]: try: csw.get_operation_by_name('GetDomain') csw.getdomain('apiso:ServiceType', 'property') print(csw.results['values']) except: print('GetDomain not supported') # In[5]: val = 'laterite'
class TestCSW(LiveServerTestCase): def setUp(self): self.script_name = '/registry/{}/csw'.format(catalog_test_slug) self.url = '{}{}'.format(self.live_server_url, self.script_name) self.username = '******' self.password = '******' self.client = Client() user = User.objects.create(username=self.username) user.set_password(self.password) user.save() self.client.login(username=self.username, password=self.password) settings.REGISTRY_PYCSW['server']['url'] = self.url Catalog.objects.get_or_create(name=catalog_test_slug) Layer.objects.all().delete() Service.objects.all().delete() print "" print ">>> with env:" print "REGISTRY_SKIP_CELERY: %s" % settings.REGISTRY_SKIP_CELERY print "REGISTRY_LIMIT_LAYERS: %s" % settings.REGISTRY_LIMIT_LAYERS print "REGISTRY_CHECK_PERIOD: %s" % settings.REGISTRY_CHECK_PERIOD print "REGISTRY_SEARCH_URL: %s" % settings.REGISTRY_SEARCH_URL print "REGISTRY_HARVEST_SERVICES: %s" % settings.REGISTRY_HARVEST_SERVICES print "" # Post the 10 Layers contained in this file: data/cswt_insert.xml path = os.path.join(settings.PROJECT_DIR, "..", "data", "cswt_insert.xml") with open(path, 'rb') as ff: payload = ff.read() content_type = "application/xml" res = self.client.post(self.url, data=payload, content_type=content_type) self.assertEqual(res.status_code, 200) self.assertEqual(Layer.objects.all().count(), 10) def test_csw(self): # test 2.0.2 Basic Service Profile self.csw = CatalogueServiceWeb(self.url, version='2.0.2', username=self.username, password=self.password) self.assertEqual(self.csw.version, '2.0.2') self.assertIn('2.0.2', self.csw.parameters['version'].values) self.assertIn('3.0.0', self.csw.parameters['version'].values) for op in self.csw.operations: for method in op.methods: self.assertEqual(self.csw.url, method['url']) self.assertTrue('Transaction' in [o.name for o in self.csw.operations]) self.assertTrue('Harvest' in [o.name for o in self.csw.operations]) get_records_op = self.csw.get_operation_by_name('GetRecords') self.assertIn('application/json', get_records_op.parameters['outputFormat']['values']) # test basic search, no predicates self.csw.getrecords2() self.assertEqual(Layer.objects.all().count(), self.csw.results['matches']) # test csw:AnyText anytext = PropertyIsLike('csw:AnyText', 'Brasilia') self.csw.getrecords2(constraints=[anytext]) self.assertEqual(self.csw.results['matches'], 1) anytext = PropertyIsLike('csw:AnyText', 'roads') self.csw.getrecords2(constraints=[anytext]) self.assertEqual(self.csw.results['matches'], 4) # test ogc:BBOX bbox = BBox(['-13', '-80', '15', '-30']) self.csw.getrecords2(constraints=[bbox]) self.assertEqual(self.csw.results['matches'], 2) # test csw:AnyText OR ogc:BBOX self.csw.getrecords2(constraints=[anytext, bbox]) self.assertEqual(self.csw.results['matches'], 5) # test csw:AnyText && ogc:BBOX self.csw.getrecords2(constraints=[[anytext, bbox]]) self.assertEqual(self.csw.results['matches'], 1) # test that ElementSetName=full stores full metadata record as inserted self.csw.getrecords2(esn='full') self.assertIn( 'xmlns:registry="http://gis.harvard.edu/HHypermap/registry/0.1"', self.csw.response) # test JSON output # TODO: fix owslib.csw.CatalogueServiceWeb.getrecords2 to handle non-XML request/response with self.assertRaises(XMLSyntaxError): self.csw.getrecords2(constraints=[anytext, bbox], format='application/json') records_json = json.loads(self.csw.response) self.assertEqual( records_json['csw:GetRecordsResponse']['csw:SearchResults'] ['@numberOfRecordsMatched'], '5') # test 3.0.0 OpenSearch bsp = { 'mode': 'opensearch', 'service': 'CSW', 'version': '3.0.0', 'request': 'GetRecords', 'typenames': 'csw:Record', 'elementsetname': 'full', 'outputformat': 'application/json' } # test basic search, no predicates res = json.loads(self.client.get(self.script_name, bsp).content) self.assertEqual(res['atom:feed']['os:totalResults'], '10') # test q bsp['q'] = 'Brasilia' res = json.loads(self.client.get(self.script_name, bsp).content) self.assertEqual(res['atom:feed']['os:totalResults'], '1') bsp.pop('q') # test bbox bsp['bbox'] = '-80,-13,-30,15' res = json.loads(self.client.get(self.script_name, bsp).content) self.assertEqual(res['atom:feed']['os:totalResults'], '2') bsp.pop('bbox') # test time bsp['time'] = '2014-09-23T12:04:31.102243+00:00/' res = json.loads(self.client.get(self.script_name, bsp).content) self.assertEqual(res['atom:feed']['os:totalResults'], '10') bsp.pop('time') # test q and bbox bsp['q'] = 'roads' bsp['bbox'] = '-80,-13,-30,15' res = json.loads(self.client.get(self.script_name, bsp).content) self.assertEqual(res['atom:feed']['os:totalResults'], '1') # test q and bbox and time bsp['time'] = '2014-09-23T12:04:31.102243+00:00/' res = json.loads(self.client.get(self.script_name, bsp).content) self.assertEqual(res['atom:feed']['os:totalResults'], '1') @classmethod def tearDownClass(cls): # Workaround for https://code.djangoproject.com/ticket/22414 # Persistent connections not closed by LiveServerTestCase, preventing dropping test databases # https://github.com/cjerdonek/django/commit/b07fbca02688a0f8eb159f0dde132e7498aa40cc def close_sessions(conn): close_sessions_query = """ SELECT pg_terminate_backend(pg_stat_activity.pid) FROM pg_stat_activity WHERE datname = current_database() AND pid <> pg_backend_pid(); """ with conn.cursor() as cursor: try: cursor.execute(close_sessions_query) except OperationalError: # We get kicked out after closing. pass for alias in connections: connections[alias].close() close_sessions(connections[alias]) print "Forcefully closed database connections."
from owslib.csw import CatalogueServiceWeb from owslib import fes import numpy as np # In[2]: endpoint = 'http://geoport.whoi.edu/csw' #endpoint = 'http://catalog.data.gov/csw-all' #endpoint = 'http://www.ngdc.noaa.gov/geoportal/csw' #endpoint = 'http://www.nodc.noaa.gov/geoportal/csw' csw = CatalogueServiceWeb(endpoint, timeout=60) print csw.version # In[3]: csw.get_operation_by_name('GetRecords').constraints # In[4]: try: csw.get_operation_by_name('GetDomain') csw.getdomain('apiso:ServiceType', 'property') print(csw.results['values']) except: print('GetDomain not supported') # ## Query for all COAWST datasets # In[5]: val = 'COAWST'
def _parse_csw(context, repos, record, identifier, pagesize=10): from owslib.csw import CatalogueServiceWeb recobjs = [] # records serviceobj = repos.dataset() # if init raises error, this might not be a CSW md = CatalogueServiceWeb(record) LOGGER.debug('Setting CSW service metadata') # generate record of service instance _set(context, serviceobj, 'pycsw:Identifier', identifier) _set(context, serviceobj, 'pycsw:Typename', 'csw:Record') _set(context, serviceobj, 'pycsw:Schema', 'http://www.opengis.net/cat/csw/2.0.2') _set(context, serviceobj, 'pycsw:MdSource', record) _set(context, serviceobj, 'pycsw:InsertDate', util.get_today_and_now()) _set(context, serviceobj, 'pycsw:XML', md.response) _set(context, serviceobj, 'pycsw:AnyText', util.get_anytext(md._exml)) _set(context, serviceobj, 'pycsw:Type', 'service') _set(context, serviceobj, 'pycsw:Title', md.identification.title) _set(context, serviceobj, 'pycsw:Abstract', md.identification.abstract) _set(context, serviceobj, 'pycsw:Keywords', ','.join(md.identification.keywords)) _set(context, serviceobj, 'pycsw:Creator', md.provider.contact.name) _set(context, serviceobj, 'pycsw:Publisher', md.provider.name) _set(context, serviceobj, 'pycsw:Contributor', md.provider.contact.name) _set(context, serviceobj, 'pycsw:OrganizationName', md.provider.contact.name) _set(context, serviceobj, 'pycsw:AccessConstraints', md.identification.accessconstraints) _set(context, serviceobj, 'pycsw:OtherConstraints', md.identification.fees) _set(context, serviceobj, 'pycsw:Source', record) _set(context, serviceobj, 'pycsw:Format', md.identification.type) _set(context, serviceobj, 'pycsw:ServiceType', md.identification.type) _set(context, serviceobj, 'pycsw:ServiceTypeVersion', md.identification.version) _set(context, serviceobj, 'pycsw:Operation', ','.join([d.name for d in md.operations])) _set(context, serviceobj, 'pycsw:CouplingType', 'tight') links = [ '%s,OGC-CSW Catalogue Service for the Web,OGC:CSW,%s' % (identifier, md.url) ] _set(context, serviceobj, 'pycsw:Links', '^'.join(links)) recobjs.append(serviceobj) # get all supported typenames of metadata # so we can harvest the entire CSW # try for ISO, settle for Dublin Core csw_typenames = 'csw:Record' csw_outputschema = 'http://www.opengis.net/cat/csw/2.0.2' grop = md.get_operation_by_name('GetRecords') if all(['gmd:MD_Metadata' in grop.parameters['typeNames']['values'], 'http://www.isotc211.org/2005/gmd' in grop.parameters['outputSchema']['values']]): LOGGER.info('CSW supports ISO') csw_typenames = 'gmd:MD_Metadata' csw_outputschema = 'http://www.isotc211.org/2005/gmd' # now get all records # get total number of records to loop against try: md.getrecords2(typenames=csw_typenames, resulttype='hits', outputschema=csw_outputschema) matches = md.results['matches'] except: # this is a CSW, but server rejects query raise RuntimeError(md.response) if pagesize > matches: pagesize = matches LOGGER.debug('Harvesting %d CSW records' % matches) # loop over all catalogue records incrementally for r in range(1, matches, pagesize): try: md.getrecords2(typenames=csw_typenames, startposition=r, maxrecords=pagesize, outputschema=csw_outputschema) except Exception, err: # this is a CSW, but server rejects query raise RuntimeError(md.response) for k, v in md.records.iteritems(): if csw_typenames == 'gmd:MD_Metadata': recobjs.append(_parse_iso(context, repos, etree.fromstring(v.xml))) else: recobjs.append(_parse_dc(context, repos, etree.fromstring(v.xml)))
import numpy as np # In[2]: # endpoint = 'http://geoport.whoi.edu/csw' # endpoint = 'http://data.nodc.noaa.gov/geoportal/csw' endpoint = "http://catalog.data.gov/csw-all" # endpoint = 'http://geonode.wfp.org/catalogue/csw' csw = CatalogueServiceWeb(endpoint, timeout=60) print csw.version # In[3]: csw.get_operation_by_name("GetRecords").constraints # In[4]: try: csw.get_operation_by_name("GetDomain") csw.getdomain("apiso:ServiceType", "property") print (csw.results["values"]) except: print ("GetDomain not supported") # In[5]: val = "COAWST"
recobjs = [] # records pagesize=10 # if init raises error, this might not be a CSW csw = CatalogueServiceWeb(gogeo, timeout=60) outPutDir = "/Users/murrayking/Documents/Gogeo2" # get all supported typenames of metadata # so we can harvest the entire CSW # try for ISO, settle for Dublin Core csw_typenames = 'csw:Record' csw_outputschema = 'http://www.opengis.net/cat/csw/2.0.2' grop = csw.get_operation_by_name('GetRecords') if all(['gmd:MD_Metadata' in grop.parameters['typeNames']['values'], 'http://www.isotc211.org/2005/gmd' in grop.parameters['outputSchema']['values']]): csw_typenames = 'gmd:MD_Metadata' csw_outputschema = 'http://www.isotc211.org/2005/gmd' # now get all records # get total number of records to loop against try: csw.getrecords2(typenames=csw_typenames, resulttype='hits', outputschema=csw_outputschema) matches = csw.results['matches'] except: # this is a CSW, but server rejects query raise RuntimeError(csw.response)
def _parse_csw(context, repos, record, identifier, pagesize=10): from owslib.csw import CatalogueServiceWeb recobjs = [] # records serviceobj = repos.dataset() # if init raises error, this might not be a CSW md = CatalogueServiceWeb(record) LOGGER.debug('Setting CSW service metadata') # generate record of service instance _set(context, serviceobj, 'pycsw:Identifier', identifier) _set(context, serviceobj, 'pycsw:Typename', 'csw:Record') _set(context, serviceobj, 'pycsw:Schema', 'http://www.opengis.net/cat/csw/2.0.2') _set(context, serviceobj, 'pycsw:MdSource', record) _set(context, serviceobj, 'pycsw:InsertDate', util.get_today_and_now()) _set(context, serviceobj, 'pycsw:XML', md.response) _set(context, serviceobj, 'pycsw:AnyText', util.get_anytext(md._exml)) _set(context, serviceobj, 'pycsw:Type', 'service') _set(context, serviceobj, 'pycsw:Title', md.identification.title) _set(context, serviceobj, 'pycsw:Abstract', md.identification.abstract) _set(context, serviceobj, 'pycsw:Keywords', ','.join(md.identification.keywords)) _set(context, serviceobj, 'pycsw:Creator', md.provider.contact.name) _set(context, serviceobj, 'pycsw:Publisher', md.provider.name) _set(context, serviceobj, 'pycsw:Contributor', md.provider.contact.name) _set(context, serviceobj, 'pycsw:OrganizationName', md.provider.contact.name) _set(context, serviceobj, 'pycsw:AccessConstraints', md.identification.accessconstraints) _set(context, serviceobj, 'pycsw:OtherConstraints', md.identification.fees) _set(context, serviceobj, 'pycsw:Source', record) _set(context, serviceobj, 'pycsw:Format', md.identification.type) _set(context, serviceobj, 'pycsw:ServiceType', md.identification.type) _set(context, serviceobj, 'pycsw:ServiceTypeVersion', md.identification.version) _set(context, serviceobj, 'pycsw:Operation', ','.join([d.name for d in md.operations])) _set(context, serviceobj, 'pycsw:CouplingType', 'tight') links = [ '%s,OGC-CSW Catalogue Service for the Web,OGC:CSW,%s' % (identifier, md.url) ] _set(context, serviceobj, 'pycsw:Links', '^'.join(links)) recobjs.append(serviceobj) # get all supported typenames of metadata # so we can harvest the entire CSW # try for ISO, settle for Dublin Core csw_typenames = 'csw:Record' csw_outputschema = 'http://www.opengis.net/cat/csw/2.0.2' grop = md.get_operation_by_name('GetRecords') if all([ 'gmd:MD_Metadata' in grop.parameters['typeNames']['values'], 'http://www.isotc211.org/2005/gmd' in grop.parameters['outputSchema']['values'] ]): LOGGER.info('CSW supports ISO') csw_typenames = 'gmd:MD_Metadata' csw_outputschema = 'http://www.isotc211.org/2005/gmd' # now get all records # get total number of records to loop against try: md.getrecords2(typenames=csw_typenames, resulttype='hits', outputschema=csw_outputschema) matches = md.results['matches'] except: # this is a CSW, but server rejects query raise RuntimeError(md.response) if pagesize > matches: pagesize = matches LOGGER.debug('Harvesting %d CSW records' % matches) # loop over all catalogue records incrementally for r in range(1, matches, pagesize): try: md.getrecords2(typenames=csw_typenames, startposition=r, maxrecords=pagesize, outputschema=csw_outputschema) except Exception, err: # this is a CSW, but server rejects query raise RuntimeError(md.response) for k, v in md.records.iteritems(): if csw_typenames == 'gmd:MD_Metadata': recobjs.append( _parse_iso(context, repos, etree.fromstring(v.xml))) else: recobjs.append( _parse_dc(context, repos, etree.fromstring(v.xml)))
class TestCSW(LiveServerTestCase): def setUp(self): self.script_name = '/registry/{}/csw'.format(catalog_test_slug) self.url = '{}{}'.format(self.live_server_url, self.script_name) self.username = '******' self.password = '******' self.client = Client() user = User.objects.create(username=self.username) user.set_password(self.password) user.save() self.client.login(username=self.username, password=self.password) settings.REGISTRY_PYCSW['server']['url'] = self.url Catalog.objects.get_or_create( name=catalog_test_slug ) Layer.objects.all().delete() Service.objects.all().delete() print "" print ">>> with env:" print "REGISTRY_SKIP_CELERY: %s" % settings.REGISTRY_SKIP_CELERY print "REGISTRY_LIMIT_LAYERS: %s" % settings.REGISTRY_LIMIT_LAYERS print "REGISTRY_SEARCH_URL: %s" % settings.REGISTRY_SEARCH_URL print "REGISTRY_HARVEST_SERVICES: %s" % settings.REGISTRY_HARVEST_SERVICES print "" # Post the 10 Layers contained in this file: data/cswt_insert.xml path = os.path.join(settings.PROJECT_DIR, "..", "data", "cswt_insert.xml") with open(path, 'rb') as ff: payload = ff.read() content_type = "application/xml" res = self.client.post(self.url, data=payload, content_type=content_type) self.assertEqual(res.status_code, 200) self.assertEqual(Layer.objects.all().count(), 10) def test_csw(self): # test 2.0.2 Basic Service Profile self.csw = CatalogueServiceWeb(self.url, version='2.0.2', username=self.username, password=self.password) self.assertEqual(self.csw.version, '2.0.2') self.assertIn('2.0.2', self.csw.parameters['version'].values) self.assertIn('3.0.0', self.csw.parameters['version'].values) for op in self.csw.operations: for method in op.methods: self.assertEqual(self.csw.url, method['url']) self.assertTrue('Transaction' in [o.name for o in self.csw.operations]) self.assertTrue('Harvest' in [o.name for o in self.csw.operations]) get_records_op = self.csw.get_operation_by_name('GetRecords') self.assertIn('application/json', get_records_op.parameters['outputFormat']['values']) # test basic search, no predicates self.csw.getrecords2() self.assertEqual(Layer.objects.all().count(), self.csw.results['matches']) # test csw:AnyText anytext = PropertyIsLike('csw:AnyText', 'Brasilia') self.csw.getrecords2(constraints=[anytext]) self.assertEqual(self.csw.results['matches'], 1) anytext = PropertyIsLike('csw:AnyText', 'roads') self.csw.getrecords2(constraints=[anytext]) self.assertEqual(self.csw.results['matches'], 4) # test ogc:BBOX bbox = BBox(['-13', '-80', '15', '-30']) self.csw.getrecords2(constraints=[bbox]) self.assertEqual(self.csw.results['matches'], 2) # test csw:AnyText OR ogc:BBOX self.csw.getrecords2(constraints=[anytext, bbox]) self.assertEqual(self.csw.results['matches'], 5) # test csw:AnyText && ogc:BBOX self.csw.getrecords2(constraints=[[anytext, bbox]]) self.assertEqual(self.csw.results['matches'], 1) # test that ElementSetName=full stores full metadata record as inserted self.csw.getrecords2(esn='full') self.assertIn('xmlns:registry="http://gis.harvard.edu/HHypermap/registry/0.1"', self.csw.response) # test JSON output # TODO: fix owslib.csw.CatalogueServiceWeb.getrecords2 to handle non-XML request/response with self.assertRaises(XMLSyntaxError): self.csw.getrecords2(constraints=[anytext, bbox], format='application/json') records_json = json.loads(self.csw.response) self.assertEqual(records_json['csw:GetRecordsResponse']['csw:SearchResults']['@numberOfRecordsMatched'], '5') # test 3.0.0 OpenSearch bsp = { 'mode': 'opensearch', 'service': 'CSW', 'version': '3.0.0', 'request': 'GetRecords', 'typenames': 'csw:Record', 'elementsetname': 'full', 'outputformat': 'application/json' } # test basic search, no predicates res = json.loads(self.client.get(self.script_name, bsp).content) self.assertEqual(res['atom:feed']['os:totalResults'], '10') # test q bsp['q'] = 'Brasilia' res = json.loads(self.client.get(self.script_name, bsp).content) self.assertEqual(res['atom:feed']['os:totalResults'], '1') bsp.pop('q') # test bbox bsp['bbox'] = '-80,-13,-30,15' res = json.loads(self.client.get(self.script_name, bsp).content) self.assertEqual(res['atom:feed']['os:totalResults'], '2') bsp.pop('bbox') # test time bsp['time'] = '2014-09-23T12:04:31.102243+00:00/' res = json.loads(self.client.get(self.script_name, bsp).content) self.assertEqual(res['atom:feed']['os:totalResults'], '10') bsp.pop('time') # test q and bbox bsp['q'] = 'roads' bsp['bbox'] = '-80,-13,-30,15' res = json.loads(self.client.get(self.script_name, bsp).content) self.assertEqual(res['atom:feed']['os:totalResults'], '1') # test q and bbox and time bsp['time'] = '2014-09-23T12:04:31.102243+00:00/' res = json.loads(self.client.get(self.script_name, bsp).content) self.assertEqual(res['atom:feed']['os:totalResults'], '1') @classmethod def tearDownClass(cls): # Workaround for https://code.djangoproject.com/ticket/22414 # Persistent connections not closed by LiveServerTestCase, preventing dropping test databases # https://github.com/cjerdonek/django/commit/b07fbca02688a0f8eb159f0dde132e7498aa40cc def close_sessions(conn): close_sessions_query = """ SELECT pg_terminate_backend(pg_stat_activity.pid) FROM pg_stat_activity WHERE datname = current_database() AND pid <> pg_backend_pid(); """ with conn.cursor() as cursor: try: cursor.execute(close_sessions_query) except OperationalError: # We get kicked out after closing. pass for alias in connections: connections[alias].close() close_sessions(connections[alias]) print "Forcefully closed database connections."