def setUp(self): yaml_files = [ 'lib/configs/ogc_identifier.yaml', 'lib/configs/iso_identifier.yaml' ] # set up for the known csw getcapabilities with open('tests/test_data/cwic_csw_v2_0_2.xml', 'r') as f: csw_content = f.read() csw_url = 'http://www.mapserver.com/cgi?SERVICE=WCS&VERSION=2.0.2&REQUEST=GETCAPABILITIES' csw_content = csw_content.replace('\\n', '') csw_parser = Parser(csw_content) self.csw_identifier = Identify(yaml_files, csw_content, csw_url, **{'parser': csw_parser}) # set up for the geonetwork mismatched namespacing iso issue with open('tests/test_data/geonetwork_iso_NOT_csw.xml', 'r') as f: iso_content = f.read() iso_url = 'http://catalog.data.gov/harvest/object/d5de6dde-3042-4daf-b4ba-95e21e3ab343' iso_content = iso_content.replace('\\n', '') iso_parser = Parser(iso_content) self.iso_identifier = Identify(yaml_files, iso_content, iso_url, **{'parser': iso_parser})
def setUp(self): yaml_file = 'tests/test_data/simple_identifier_test.yaml' content = '''<OpenSearch xmlns="http://a9.com/-/spec/opensearch/1.1/"> <element>OpenSearchDescription</element></OpenSearch>''' url = 'http://www.opensearch.com' self.identifier = Identify([yaml_file], content, url) self.identifier.identify()
def setUp(self): yaml_file = 'tests/test_data/complex_identifier_test.yaml' with open('tests/test_data/wms_exception.xml', 'r') as f: content = f.read() url = 'http://www.mapserver.com/cgi?SERVICE=WMS&VERSION=1.3.0&REQUEST=GETCAPABILITIES' self.identifier = Identify([yaml_file], content, url) self.identifier.identify()
def setUp(self): yaml_file = 'lib/configs/thredds_identifier.yaml' with open('tests/test_data/mod_stellwagen.xml', 'r') as f: content = f.read() url = 'http://stellwagen.er.usgs.gov/thredds/catalog/TSdata/catalog.xml' content = content.replace('\\n', '') parser = Parser(content) self.identifier = Identify([yaml_file], content, url, **{'parser': parser})
def setUp(self): yaml_file = 'tests/test_data/complex_identifier_test.yaml' with open('tests/test_data/wfs_v1_1_0.xml', 'r') as f: content = f.read() url = 'http://www.mapserver.com/cgi?SERVICE=WFS&VERSION=1.1.0&REQUEST=GETCAPABILITIES' content = content.replace('\\n', '') parser = Parser(content) self.identifier = Identify([yaml_file], content, url, **{'parser': parser})
def test_if_returning_iso_protocol_for_chunk(self): with open('tests/test_data/invalid_iso_chunk.xml', 'r') as f: content = f.read() url = 'http://www.mapserver.com/some_iso' content = content.replace('\\n', '') parser = Parser(content) identifier = Identify([self.yaml_file], content, url, **{'parser': parser}) identifier.identify() self.assertFalse(identifier.protocol == 'ISO-19115')
def setUp(self): yaml_file = 'tests/test_data/complex_identifier_test.yaml' with open( 'tests/test_data/esri_wms_35bd4e2ce8cd13e8697b03976ffe1ee6.txt', 'r') as f: content = f.read() url = 'http://www.mapserver.com/cgi?SERVICE=WMS&VERSION=1.3.0&REQUEST=GETCAPABILITIES' content = content.replace('\\n', '') parser = Parser(content) self.identifier = Identify([yaml_file], content, url, **{'parser': parser})
def setUp(self): yaml_file = 'tests/test_data/combined_version_identifier_test.yaml' content = '''<catalog xmlns="http://www.unidata.ucar.edu/namespaces/thredds/InvCatalog/v1.0" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.unidata.ucar.edu/namespaces/thredds/InvCatalog/v1.0 http://www.unidata.ucar.edu/schemas/thredds/InvCatalog.1.0.2.xsd" version="1.0.2" name="Actinic Flux measurements during OASIS Barrow field intensive Spring 2009"></catalog>''' url = 'http://www.unidata.com/hyrax/thredds' self.parser = Parser(content) self.identifier = Identify([yaml_file], content, url) self.identifier.identify()
def test_if_returning_iso_protocol_for_mi(self): with open('tests/test_data/iso-19115_mi.xml', 'r') as f: content = f.read() url = 'http://www.mapserver.com/some_iso' content = content.replace('\\n', '') parser = Parser(content) identifier = Identify([self.yaml_file], content, url, **{'parser': parser}) identifier.identify() self.assertTrue(identifier.protocol == 'ISO-19115') # and now make sure it's not csw or rdf or oai-pmh identifier = Identify([ 'lib/configs/iso_identifier.yaml', 'lib/configs/ogc_identifier.yaml', 'lib/configs/oaipmh_identifier.yaml', 'lib/configs/rdf_identifier.yaml' ], content, url, **{'parser': parser}) identifier.identify() self.assertTrue(identifier.protocol == 'ISO-19115')
def test_if_returning_iso_protocol_for_ds(self): with open('tests/test_data/iso-19115_ds.xml', 'r') as f: content = f.read() url = 'http://www.mapserver.com/some_iso' content = content.replace('\\n', '') parser = Parser(content) identifier = Identify([self.yaml_file], content, url, **{'parser': parser}) identifier.identify() print identifier.to_json() self.assertTrue(identifier.protocol == 'ISO-19115 DS') self.assertTrue(identifier.version == 'ISO19115 2003/Cor.1:2006') self.assertTrue(identifier.has_metadata)
def setUp(self): # yaml_file = 'lib/configs/rdf_identifier.yaml' with open( 'tests/test_data/datagov_9bcffa1c-6164-4635-bc2c-6c98cce59d7b.rdf', 'r') as f: content = f.read() url = 'http://catalog.data.gov/9bcffa1c-6164-4635-bc2c-6c98cce59d7b.rdf' content = content.replace('\\n', '') parser = Parser(content) self.identifier = Identify([ 'lib/configs/iso_identifier.yaml', 'lib/configs/ogc_identifier.yaml', 'lib/configs/oaipmh_identifier.yaml', 'lib/configs/rdf_identifier.yaml' ], content, url, **{'parser': parser})
def test_rdf_language(self): with open( 'tests/test_data/rdf_french_ed14b44e96042ad56c11cc0ca3768979.xml', 'r') as f: content = f.read() url = 'http://catalog.data.gov/9bcffa1c-6164-4635-bc2c-6c98cce59d7b.rdf' content = content.replace('\\n', '') parser = Parser(content) identifier = Identify([ 'lib/configs/iso_identifier.yaml', 'lib/configs/ogc_identifier.yaml', 'lib/configs/oaipmh_identifier.yaml', 'lib/configs/rdf_identifier.yaml' ], content, url, **{'parser': parser}) identifier.identify() print identifier.to_json() self.assertTrue(identifier.protocol == 'RDF') self.assertTrue(identifier.language == 'fr')
raw_content = data['raw_content'] url = data['url'] rr = RawResponse(url.upper(), raw_content, digest, **{}) cleaned_text = rr.clean_raw_content() cleaned_text = cleaned_text.strip() try: parser = Parser(cleaned_text) except Exception as ex: logger.debug('xml parsing error: %s' % digest, exc_info=1) continue print digest identifier = Identify(YAML_FILE, cleaned_text, url, **{'parser': parser, 'ignore_case': True}) identifier.identify() protocol = identifier.protocol subtype = identifier.subtype service = identifier.service has_dataset = identifier.has_dataset has_metadata = identifier.has_metadata version = identifier.version is_error = identifier.is_error # if not protocol: # continue with open('testdata/second_harvest/priority_identification_all.csv', 'a') as f: f.write('|'.join([digest, url.replace(',', ';').replace('|', ';'), protocol, str(subtype), service, str(has_dataset), str(has_metadata),