def setUp(self):
        yaml_files = [
            'lib/configs/ogc_identifier.yaml',
            'lib/configs/iso_identifier.yaml'
        ]

        # set up for the known csw getcapabilities
        with open('tests/test_data/cwic_csw_v2_0_2.xml', 'r') as f:
            csw_content = f.read()
        csw_url = 'http://www.mapserver.com/cgi?SERVICE=WCS&VERSION=2.0.2&REQUEST=GETCAPABILITIES'

        csw_content = csw_content.replace('\\n', '')
        csw_parser = Parser(csw_content)

        self.csw_identifier = Identify(yaml_files, csw_content, csw_url,
                                       **{'parser': csw_parser})

        # set up for the geonetwork mismatched namespacing iso issue
        with open('tests/test_data/geonetwork_iso_NOT_csw.xml', 'r') as f:
            iso_content = f.read()
        iso_url = 'http://catalog.data.gov/harvest/object/d5de6dde-3042-4daf-b4ba-95e21e3ab343'

        iso_content = iso_content.replace('\\n', '')
        iso_parser = Parser(iso_content)

        self.iso_identifier = Identify(yaml_files, iso_content, iso_url,
                                       **{'parser': iso_parser})
    def setUp(self):
        yaml_file = 'tests/test_data/simple_identifier_test.yaml'

        content = '''<OpenSearch xmlns="http://a9.com/-/spec/opensearch/1.1/">
                        <element>OpenSearchDescription</element></OpenSearch>'''
        url = 'http://www.opensearch.com'

        self.identifier = Identify([yaml_file], content, url)
        self.identifier.identify()
    def setUp(self):
        yaml_file = 'tests/test_data/complex_identifier_test.yaml'

        with open('tests/test_data/wms_exception.xml', 'r') as f:
            content = f.read()
        url = 'http://www.mapserver.com/cgi?SERVICE=WMS&VERSION=1.3.0&REQUEST=GETCAPABILITIES'

        self.identifier = Identify([yaml_file], content, url)
        self.identifier.identify()
    def setUp(self):
        yaml_file = 'lib/configs/thredds_identifier.yaml'

        with open('tests/test_data/mod_stellwagen.xml', 'r') as f:
            content = f.read()
        url = 'http://stellwagen.er.usgs.gov/thredds/catalog/TSdata/catalog.xml'

        content = content.replace('\\n', '')
        parser = Parser(content)

        self.identifier = Identify([yaml_file], content, url,
                                   **{'parser': parser})
    def setUp(self):
        yaml_file = 'tests/test_data/complex_identifier_test.yaml'

        with open('tests/test_data/wfs_v1_1_0.xml', 'r') as f:
            content = f.read()
        url = 'http://www.mapserver.com/cgi?SERVICE=WFS&VERSION=1.1.0&REQUEST=GETCAPABILITIES'

        content = content.replace('\\n', '')
        parser = Parser(content)

        self.identifier = Identify([yaml_file], content, url,
                                   **{'parser': parser})
    def test_if_returning_iso_protocol_for_chunk(self):
        with open('tests/test_data/invalid_iso_chunk.xml', 'r') as f:
            content = f.read()
        url = 'http://www.mapserver.com/some_iso'

        content = content.replace('\\n', '')
        parser = Parser(content)

        identifier = Identify([self.yaml_file], content, url,
                              **{'parser': parser})
        identifier.identify()

        self.assertFalse(identifier.protocol == 'ISO-19115')
    def setUp(self):
        yaml_file = 'tests/test_data/complex_identifier_test.yaml'

        with open(
                'tests/test_data/esri_wms_35bd4e2ce8cd13e8697b03976ffe1ee6.txt',
                'r') as f:
            content = f.read()
        url = 'http://www.mapserver.com/cgi?SERVICE=WMS&VERSION=1.3.0&REQUEST=GETCAPABILITIES'

        content = content.replace('\\n', '')
        parser = Parser(content)

        self.identifier = Identify([yaml_file], content, url,
                                   **{'parser': parser})
    def setUp(self):
        yaml_file = 'tests/test_data/combined_version_identifier_test.yaml'

        content = '''<catalog xmlns="http://www.unidata.ucar.edu/namespaces/thredds/InvCatalog/v1.0"
    xmlns:xlink="http://www.w3.org/1999/xlink"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://www.unidata.ucar.edu/namespaces/thredds/InvCatalog/v1.0 http://www.unidata.ucar.edu/schemas/thredds/InvCatalog.1.0.2.xsd"
    version="1.0.2" name="Actinic Flux measurements during OASIS Barrow field intensive Spring 2009"></catalog>'''
        url = 'http://www.unidata.com/hyrax/thredds'

        self.parser = Parser(content)

        self.identifier = Identify([yaml_file], content, url)
        self.identifier.identify()
    def test_if_returning_iso_protocol_for_mi(self):
        with open('tests/test_data/iso-19115_mi.xml', 'r') as f:
            content = f.read()
        url = 'http://www.mapserver.com/some_iso'

        content = content.replace('\\n', '')
        parser = Parser(content)

        identifier = Identify([self.yaml_file], content, url,
                              **{'parser': parser})
        identifier.identify()

        self.assertTrue(identifier.protocol == 'ISO-19115')

        # and now make sure it's not csw or rdf or oai-pmh
        identifier = Identify([
            'lib/configs/iso_identifier.yaml',
            'lib/configs/ogc_identifier.yaml',
            'lib/configs/oaipmh_identifier.yaml',
            'lib/configs/rdf_identifier.yaml'
        ], content, url, **{'parser': parser})
        identifier.identify()

        self.assertTrue(identifier.protocol == 'ISO-19115')
    def test_if_returning_iso_protocol_for_ds(self):
        with open('tests/test_data/iso-19115_ds.xml', 'r') as f:
            content = f.read()
        url = 'http://www.mapserver.com/some_iso'

        content = content.replace('\\n', '')
        parser = Parser(content)

        identifier = Identify([self.yaml_file], content, url,
                              **{'parser': parser})
        identifier.identify()

        print identifier.to_json()

        self.assertTrue(identifier.protocol == 'ISO-19115 DS')
        self.assertTrue(identifier.version == 'ISO19115 2003/Cor.1:2006')
        self.assertTrue(identifier.has_metadata)
    def setUp(self):
        # yaml_file = 'lib/configs/rdf_identifier.yaml'

        with open(
                'tests/test_data/datagov_9bcffa1c-6164-4635-bc2c-6c98cce59d7b.rdf',
                'r') as f:
            content = f.read()
        url = 'http://catalog.data.gov/9bcffa1c-6164-4635-bc2c-6c98cce59d7b.rdf'

        content = content.replace('\\n', '')
        parser = Parser(content)

        self.identifier = Identify([
            'lib/configs/iso_identifier.yaml',
            'lib/configs/ogc_identifier.yaml',
            'lib/configs/oaipmh_identifier.yaml',
            'lib/configs/rdf_identifier.yaml'
        ], content, url, **{'parser': parser})
    def test_rdf_language(self):
        with open(
                'tests/test_data/rdf_french_ed14b44e96042ad56c11cc0ca3768979.xml',
                'r') as f:
            content = f.read()
        url = 'http://catalog.data.gov/9bcffa1c-6164-4635-bc2c-6c98cce59d7b.rdf'

        content = content.replace('\\n', '')
        parser = Parser(content)

        identifier = Identify([
            'lib/configs/iso_identifier.yaml',
            'lib/configs/ogc_identifier.yaml',
            'lib/configs/oaipmh_identifier.yaml',
            'lib/configs/rdf_identifier.yaml'
        ], content, url, **{'parser': parser})
        identifier.identify()
        print identifier.to_json()
        self.assertTrue(identifier.protocol == 'RDF')
        self.assertTrue(identifier.language == 'fr')
Esempio n. 13
0
    raw_content = data['raw_content']
    url = data['url']

    rr = RawResponse(url.upper(), raw_content, digest, **{})
    cleaned_text = rr.clean_raw_content()
    cleaned_text = cleaned_text.strip()

    try:
        parser = Parser(cleaned_text)
    except Exception as ex:
        logger.debug('xml parsing error: %s' % digest, exc_info=1)
        continue

    print digest

    identifier = Identify(YAML_FILE, cleaned_text, url, **{'parser': parser, 'ignore_case': True})
    identifier.identify()
    protocol = identifier.protocol
    subtype = identifier.subtype
    service = identifier.service
    has_dataset = identifier.has_dataset
    has_metadata = identifier.has_metadata
    version = identifier.version
    is_error = identifier.is_error

    # if not protocol:
    #     continue

    with open('testdata/second_harvest/priority_identification_all.csv', 'a') as f:
        f.write('|'.join([digest, url.replace(',', ';').replace('|', ';'), protocol,
                str(subtype), service, str(has_dataset), str(has_metadata),