Beispiel #1
0
class CSWTest(TestCase):
    """
    Test CSW endpoint
    """

    def setUp(self):
        """setup records and CSW"""

        self.csw = CatalogueServiceWeb(settings.REGISTRY_PYCSW['server']['url'])

    def tearDown(self):
        """shutdown endpoint and clean out records"""

        Service.objects.all().delete()

    def test_capabilities(self):
        """verify that HHypermap's CSW works properly"""

        # test that OGC:CSW URLs are identical to what is defined in settings
        for op in self.csw.operations:
            for method in op.methods:
                self.assertEqual(settings.REGISTRY_PYCSW['server']['url'], method['url'], 'Expected URL equality')

        # test that OGC:CSW 2.0.2 is supported
        self.assertEqual(self.csw.version, '2.0.2', 'Expected "2.0.2" as a supported version')

        # test that transactions are supported
        transaction = self.csw.get_operation_by_name('Transaction')
        harvest = self.csw.get_operation_by_name('Harvest')

        # test that HHypermap Service types are Harvestable
        for restype in ['http://www.opengis.net/wms', 'http://www.opengis.net/wmts/1.0',
                        'urn:x-esri:serviceType:ArcGIS:MapServer', 'urn:x-esri:serviceType:ArcGIS:ImageServer']:
            self.assertIn(restype, harvest.parameters['ResourceType']['values'])
            self.assertIn(restype, transaction.parameters['TransactionSchemas']['values'])
def get_datasets_from_csw(csw_endpoint, extended):
    print 'Retrieving data sets from %s' % csw_endpoint

    csw = CatalogueServiceWeb(csw_endpoint)
    csw.getrecords2(esn='full', maxrecords=1000)
    parsed_records = csw.records.values()
    if extended:
        #request the data a second time in an encoding with different information
        #manually parse the second response and join the information in to the existing records 
        csw.getrecords2(esn='full', maxrecords=1000, outputschema=namespaces['gmd'])
        unparsed_response = csw.response
        root = etree.XML(unparsed_response)
   
        for record in parsed_records:
            record_id = record.identifier
            xpath_record_fragment = record_xpath_template.format(record_id)
            for attribute, xpath_fragment in attribute_to_xpath_fragment.iteritems():
                xpath_attribute_fragment = xpath_record_fragment + xpath_fragment
                value = root.xpath(xpath_attribute_fragment, namespaces=namespaces)
                if len(value) != 0:
                    #unpack list
                    value = value[0]
                else:
                    value = ""
                
                setattr(record, attribute, value)
                
            print(vars(record))
       
    return {
            'datasets' : parsed_records,
    }
Beispiel #3
0
def send_transaction_request(**kwargs):
    pycsw_url = "http://meta.iguess.list.lu/"
    
    try:    
        csw = CatalogueServiceWeb(pycsw_url)
        
    except:
        log_error_msg("Unable to create Catalogue object")
        
    text = ""
      
    try:
        with open(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "iguess", "csw_template.xml")), "r") as r:
            text = r.read()
    except:
        log_error_msg("problem reading the xml template")
        
    template = Template(text)
    try:       
        result = template.render(**kwargs)
    except:
        log_error_msg("error rendering xml transaction template")
    
    try:
        csw.transaction(ttype='insert', typename='gmd:MD_Metadata', record=result)
    except:
        log_error_msg("catalogue record already present")
 def test_GetRecords_summary(self):
     csw = CatalogueServiceWeb(service)
     csw.getrecords2(outputschema=GMD,
                     startposition=1,
                     maxrecords=5,
                     esn="summary")
     nrecords = len(csw.records)
Beispiel #5
0
def refresh_harvested_records(context, database, table, url):
    """refresh / harvest all non-local records in repository"""
    from owslib.csw import CatalogueServiceWeb

    # get configuration and init repo connection
    repos = repository.Repository(database, context, table=table)

    # get all harvested records
    count, records = repos.query(constraint={'where': 'source != "local"'})

    if int(count) > 0:
        LOGGER.info('Refreshing %s harvested records', count)
        csw = CatalogueServiceWeb(url)

        for rec in records:
            source = \
                getattr(rec,
                        context.md_core_model['mappings']['pycsw:Source'])
            schema = \
                getattr(rec,
                        context.md_core_model['mappings']['pycsw:Schema'])
            identifier = \
                getattr(rec,
                        context.md_core_model['mappings']['pycsw:Identifier'])

            LOGGER.info('Harvesting %s (identifier = %s) ...', source,
                        identifier)
            # TODO: find a smarter way of catching this
            if schema == 'http://www.isotc211.org/2005/gmd':
                schema = 'http://www.isotc211.org/schemas/2005/gmd/'
            try:
                csw.harvest(source, schema)
                LOGGER.info(csw.response)
            except Exception, err:
                LOGGER.warn(err)
def test_ows_interfaces_csw():
    service = CatalogueServiceWeb(CSW_SERVICE_URL)
    # Check each service instance conforms to OWSLib interface
    service.alias = 'CSW'
    isinstance(service, owslib.catalogue.csw2.CatalogueServiceWeb)
    # URL attribute
    assert service.url == CSW_SERVICE_URL
    # version attribute
    assert service.version == '2.0.2'
    # Identification object
    assert hasattr(service, 'identification')
    # Check all ServiceIdentification attributes
    assert service.identification.type == 'CSW'
    for attribute in [
            'type', 'version', 'title', 'abstract', 'keywords',
            'accessconstraints', 'fees'
    ]:
        assert hasattr(service.identification, attribute)
    # Check all ServiceProvider attributes
    for attribute in ['name', 'url', 'contact']:
        assert hasattr(service.provider, attribute)
    # Check all operations implement IOperationMetadata
    for op in service.operations:
        for attribute in ['name', 'formatOptions', 'methods']:
            assert hasattr(op, attribute)
    # Check all contents implement IContentMetadata as a dictionary
    # CSW does not work in this way so use dummy
    service.contents = {'dummy': '1'}
    isinstance(service.contents, dict)
Beispiel #7
0
def refresh_harvested_records(context, database, table, url):
    """refresh / harvest all non-local records in repository"""
    from owslib.csw import CatalogueServiceWeb

    # get configuration and init repo connection
    repos = repository.Repository(database, context, table=table)

    # get all harvested records
    count, records = repos.query(constraint={'where': 'source != "local"'})

    if int(count) > 0:
        LOGGER.info('Refreshing %s harvested records', count)
        csw = CatalogueServiceWeb(url)

        for rec in records:
            source = \
                getattr(rec,
                        context.md_core_model['mappings']['pycsw:Source'])
            schema = \
                getattr(rec,
                        context.md_core_model['mappings']['pycsw:Schema'])
            identifier = \
                getattr(rec,
                        context.md_core_model['mappings']['pycsw:Identifier'])

            LOGGER.info('Harvesting %s (identifier = %s) ...',
                        source, identifier)
            # TODO: find a smarter way of catching this
            if schema == 'http://www.isotc211.org/2005/gmd':
                schema = 'http://www.isotc211.org/schemas/2005/gmd/'
            try:
                csw.harvest(source, schema)
                LOGGER.info(csw.response)
            except Exception, err:
                LOGGER.warn(err)
Beispiel #8
0
def refresh_harvested_records(database, table, url):
    ''' refresh / harvest all non-local records in repository '''
    from owslib.csw import CatalogueServiceWeb

    # get configuration and init repo connection
    REPOS = repository.Repository(database, CONTEXT, table=table)

    # get all harvested records
    COUNT, RECORDS = REPOS.query(constraint={'where': 'source != "local"'})

    if int(COUNT) > 0:
        print 'Refreshing %s harvested records' % COUNT
        CSW = CatalogueServiceWeb(url)

        for rec in RECORDS:
            source = getattr(rec, 
            CONTEXT.md_core_model['mappings']['pycsw:Source'])
            schema = getattr(rec, 
            CONTEXT.md_core_model['mappings']['pycsw:Schema'])
            identifier = getattr(rec, 
            CONTEXT.md_core_model['mappings']['pycsw:Identifier'])

            print 'Harvesting %s (identifier = %s) ...' % \
            (source, identifier)
            # TODO: find a smarter way of catching this
            if schema == 'http://www.isotc211.org/2005/gmd':
                schema = 'http://www.isotc211.org/schemas/2005/gmd/'
            try:
                CSW.harvest(source, schema)
                print CSW.response
            except Exception, err:
                print err
 def test_GetRecords_dataset(self):
     csw = CatalogueServiceWeb(service)
     csw.getrecords(qtype="dataset",
                    outputschema=GMD,
                    startposition=1,
                    maxrecords=5)
     nrecords = len(csw.records)
 def test_GetRecords_brief(self):
     csw = CatalogueServiceWeb(service)
     csw.getrecords(outputschema=GMD,
                    startposition=1,
                    maxrecords=5,
                    esn="brief")
     nrecords = len(csw.records)
def test_ows_interfaces_csw():
    service = CatalogueServiceWeb(CSW_SERVICE_URL)
    # Check each service instance conforms to OWSLib interface
    service.alias = 'CSW'
    isinstance(service, owslib.csw.CatalogueServiceWeb)
    # URL attribute
    assert service.url == CSW_SERVICE_URL
    # version attribute
    assert service.version == '2.0.2'
    # Identification object
    assert hasattr(service, 'identification')
    # Check all ServiceIdentification attributes
    assert service.identification.type == 'CSW'
    for attribute in ['type', 'version', 'title', 'abstract', 'keywords', 'accessconstraints', 'fees']:
        assert hasattr(service.identification, attribute)
    # Check all ServiceProvider attributes
    for attribute in ['name', 'url', 'contact']:
        assert hasattr(service.provider, attribute)
    # Check all operations implement IOperationMetadata
    for op in service.operations:
        for attribute in ['name', 'formatOptions', 'methods']:
            assert hasattr(op, attribute)
    # Check all contents implement IContentMetadata as a dictionary
    # CSW does not work in this way so use dummy
    service.contents = {'dummy': '1'}
    isinstance(service.contents, dict)
Beispiel #12
0
def refresh_harvested_records(context, database, table, url):
    """refresh / harvest all non-local records in repository"""
    from owslib.csw import CatalogueServiceWeb

    # get configuration and init repo connection
    repos = repository.Repository(database, context, table=table)

    # get all harvested records
    count, records = repos.query(constraint={"where": 'mdsource != "local"', "values": []})

    if int(count) > 0:
        LOGGER.info("Refreshing %s harvested records", count)
        csw = CatalogueServiceWeb(url)

        for rec in records:
            source = getattr(rec, context.md_core_model["mappings"]["pycsw:Source"])
            schema = getattr(rec, context.md_core_model["mappings"]["pycsw:Schema"])
            identifier = getattr(rec, context.md_core_model["mappings"]["pycsw:Identifier"])

            LOGGER.info("Harvesting %s (identifier = %s) ...", source, identifier)
            # TODO: find a smarter way of catching this
            if schema == "http://www.isotc211.org/2005/gmd":
                schema = "http://www.isotc211.org/schemas/2005/gmd/"
            try:
                csw.harvest(source, schema)
                LOGGER.info(csw.response)
            except Exception as err:
                LOGGER.warn(err)
    else:
        LOGGER.info("No harvested records")
Beispiel #13
0
 def mdcount(self, cswurl, constraints=[], startrecord=0, maxharvest=10):
     """Queries the csw and count md matching constraints"""
     csw = CatalogueServiceWeb(cswurl, skip_caps=True)
     csw.getrecords2(
         esn="brief", constraints=constraints, startposition=startrecord, maxrecords=maxharvest, resulttype="hits"
     )
     return csw.results
Beispiel #14
0
def test_csw_sends_headers():
    """
    Test that if headers are provided in the CSW class they are sent
    when performing HTTP requests (in this case for GetCapabilities)
    """

    with mock.patch('owslib.util.requests.request',
                    side_effect=RuntimeError) as mock_request:
        try:
            CatalogueServiceWeb('http://example.com/csw',
                                version='2.0.2',
                                headers={'User-agent': 'my-app/1.0'})
        except RuntimeError:
            assert mock_request.called
            assert mock_request.call_args[1]['headers'] == {
                'User-agent': 'my-app/1.0'
            }

    with mock.patch('owslib.util.requests.request',
                    side_effect=RuntimeError) as mock_request:
        try:
            CatalogueServiceWeb('http://example.com/csw',
                                version='3.0.0',
                                headers={'User-agent': 'my-app/1.0'})
        except RuntimeError:
            assert mock_request.called
            assert mock_request.call_args[1]['headers'] == {
                'Accept': 'application/xml',
                'User-agent': 'my-app/1.0'
            }
class GeocatCatalogueServiceWeb(object):
    def __init__(self, url):
        self.csw = CatalogueServiceWeb(url)
        self.schema = CHE_SCHEMA

    def get_geocat_id_from_csw(self,
                               cqlquery=CQL_QUERY_DEFAULT,
                               cqlterm=CQL_SEARCH_TERM_DEFAULT):
        harvest_query = PropertyIsEqualTo(cqlquery, cqlterm)
        nextrecord = 0
        record_ids = []
        while nextrecord is not None:
            self.csw.getrecords2(constraints=[harvest_query],
                                 maxrecords=50,
                                 startposition=nextrecord)
            if self.csw.response is None or self.csw.results['matches'] == 0:
                raise CswNotFoundError(
                    "No dataset found for harvest query {}".format(
                        harvest_query))
            if self.csw.results['returned'] > 0:
                if self.csw.results['nextrecord'] > 0:
                    nextrecord = self.csw.results['nextrecord']
                else:
                    nextrecord = None
                for id in self.csw.records.keys():
                    record_ids.append(id)
        return record_ids

    def get_record_by_id(self, geocat_id):
        self.csw.getrecordbyid(id=[geocat_id], outputschema=self.schema)
        csw_record_as_string = self.csw.response
        if csw_record_as_string:
            return csw_record_as_string
        else:
            return None
 def test_GetRecords_dataset(self):
     csw = CatalogueServiceWeb(service)
     constraints = [PropertyIsEqualTo("dc:type", "dataset")]
     csw.getrecords2(constraints=constraints,
                     outputschema=GMD,
                     startposition=1,
                     maxrecords=5)
     nrecords = len(csw.records)
Beispiel #17
0
 def setRepoParams(self, repoParams):
     self.metadataprefix = "csw"
     super(CSWRepository, self).setRepoParams(repoParams)
     try:
         self.cswrepo = CatalogueServiceWeb(self.url)
     except:
         self.cswrepo = None
     self.domain_metadata = []
Beispiel #18
0
 def test_GetRecords(self):
     csw = CatalogueServiceWeb(service)
     csw.getrecords(outputschema=GMD, startposition=1, maxrecords=5)
     nrecords = len(csw.records)
     #print csw.response[:1024]
     assert nrecords == 5, nrecords
     for ident in csw.records:
         identifiers.append(ident)
         assert isinstance(csw.records[ident], MD_Metadata), (ident, csw.records[ident])
Beispiel #19
0
def _harvest_csw(csw, maxrecords=10, totalrecords=float("inf")):
    """
    Step through CSW results, and if one seems to be a WMS or Arc REST service then register it
    """
    stop = 0
    flag = 0

    src = CatalogueServiceWeb(csw.base_url)

    while stop == 0:
        if flag == 0:  # first run, start from 0
            startposition = 0
        else:  # subsequent run, startposition is now paged
            startposition = src.results["nextrecord"]

        src.getrecords(esn="summary", startposition=startposition, maxrecords=maxrecords)

        max = min(src.results["matches"], totalrecords)

        if (
            src.results["nextrecord"] == 0 or src.results["returned"] == 0 or src.results["nextrecord"] > max
        ):  # end the loop, exhausted all records or max records to process
            stop = 1
            break

        # harvest each record to destination CSW
        for record in list(src.records):
            record = src.records[record]
            known_types = {}
            for ref in record.references:
                if (
                    ref["scheme"] == "OGC:WMS"
                    or "service=wms&request=getcapabilities" in urllib.unquote(ref["url"]).lower()
                ):
                    print "WMS:%s" % ref["url"]
                    known_types["WMS"] = ref["url"]
                if (
                    ref["scheme"] == "OGC:WFS"
                    or "service=wfs&request=getcapabilities" in urllib.unquote(ref["url"]).lower()
                ):
                    print "WFS:%s" % ref["url"]
                    known_types["WFS"] = ref["url"]
                if ref["scheme"] == "ESRI":
                    print "ESRI:%s" % ref["url"]
                    known_types["REST"] = ref["url"]

            if "WMS" in known_types:
                type = "OWS" if "WFS" in known_types else "WMS"
                try:
                    _process_wms_service(known_types["WMS"], type, None, None, parent=csw)
                except Exception, e:
                    logger.error("Error registering %s:%s" % (known_types["WMS"], str(e)))
            elif "REST" in known_types:
                try:
                    _register_arcgis_url(ref["url"], None, None, None, parent=csw)
                except Exception, e:
                    logger.error("Error registering %s:%s" % (known_types["REST"], str(e)))
Beispiel #20
0
def _harvest_csw(csw, maxrecords=10, totalrecords=float('inf')):
    """
    Step through CSW results, and if one seems to be a WMS or Arc REST service then register it
    """
    stop = 0
    flag = 0

    src = CatalogueServiceWeb(csw.base_url)

    while stop == 0:
        if flag == 0:  # first run, start from 0
            startposition = 0
        else:  # subsequent run, startposition is now paged
            startposition = src.results['nextrecord']

        src.getrecords(
            esn='summary', startposition=startposition, maxrecords=maxrecords)

        max = min(src.results['matches'], totalrecords)

        if src.results['nextrecord'] == 0 \
                or src.results['returned'] == 0 \
                or src.results['nextrecord'] > max:  # end the loop, exhausted all records or max records to process
            stop = 1
            break

        # harvest each record to destination CSW
        for record in list(src.records):
            record = src.records[record]
            known_types = {}
            for ref in record.references:
                if ref["scheme"] == "OGC:WMS" or \
                        "service=wms&request=getcapabilities" in urllib.unquote(ref["url"]).lower():
                    print "WMS:%s" % ref["url"]
                    known_types["WMS"] = ref["url"]
                if ref["scheme"] == "OGC:WFS" or \
                        "service=wfs&request=getcapabilities" in urllib.unquote(ref["url"]).lower():
                    print "WFS:%s" % ref["url"]
                    known_types["WFS"] = ref["url"]
                if ref["scheme"] == "ESRI":
                    print "ESRI:%s" % ref["url"]
                    known_types["REST"] = ref["url"]

            if "WMS" in known_types:
                type = "OWS" if "WFS" in known_types else "WMS"
                try:
                    _process_wms_service(
                        known_types["WMS"], type, None, None, parent=csw)
                except Exception, e:
                    logger.error("Error registering %s:%s" %
                                 (known_types["WMS"], str(e)))
            elif "REST" in known_types:
                try:
                    _register_arcgis_url(ref["url"], None, None, None, parent=csw)
                except Exception, e:
                    logger.error("Error registering %s:%s" %
                                 (known_types["REST"], str(e)))
Beispiel #21
0
class HarvestNode(NgdsDataObject):
    """Stores information about harvest endpoints"""
    csw = None
    
    def __init__(self, url, **kwargs):
        # A URL must be given
        p = urlparse(url)
        self.url = urlunparse((p.scheme, p.netloc, p.path, "", "", "")) # Strip URL to just domain + path
        self.frequency = kwargs.get('frequency', 'manual') # frequency should be one of manual|daily|weekly|monthly
        self.title = kwargs.get('title', 'No Title Was Given') # A title for bookkeeping
        self.node_admin_id = kwargs.get('node_admin_id', None) # Foreign Key to a responsible_party who maintains the remote node
        #self.csw = CatalogueServiceWeb(self.url) # owslib CSW class provides mechanisms for making CSW requests
    
    def setup_csw(self):
        self.csw = CatalogueServiceWeb(self.url)
        
    def do_harvest(self):
        """Perform a harvest from another CSW server"""
        if self.csw == None:
            self.setup_csw()                      
        self.get_records() # Do the first GetRecords request
        ids = self.csw.records.keys() # Start an array to house all of the ids
        print "next: %s, total: %s" % (self.csw.results["nextrecord"], self.csw.results["matches"])
        
        while self.csw.results["nextrecord"] < self.csw.results["matches"] and self.csw.results["nextrecord"] != 0: # Once next_record > number_matched, we've gotten everything
            self.get_records(self.csw.results["nextrecord"], self.csw.results["returned"]) # Get another set, starting from next_record from previous response
            ids += self.csw.records.keys() # Add new ids to the array
            print "next: %s, total: %s" % (self.csw.results["nextrecord"], self.csw.results["matches"])
        
        self.parse_records(ids) # Gather the records themselves
                   
    def parse_records(self, ids):
        """Perform as many GetRecordById requests as needed"""
        print "Gathered %s IDs" % str(len(ids))
        for record_id in ids:
            self.get_record_by_id(record_id)
            rec = HarvestedRecord.from_md_metadata(self.csw.records[record_id], self)
    
    def get_record_by_id(self, record_id):
        """Get a single record, by ID"""
        params = {
            "id": [ record_id ],
            "outputschema": "http://www.isotc211.org/2005/gmd"    
        }
        self.csw.getrecordbyid(**params) # Puts response in self.csw.records        
    
    def get_records(self, start_position=1, max_records=1000):
        """Perform a GetRecords request"""
        params = {
            "typenames": "gmd:MD_Metadata",
            "outputschema": "http://www.isotc211.org/2005/gmd",
            "startposition": start_position,
            "maxrecords": max_records,
            "esn": "brief"          
        }
        self.csw.getrecords(**params) # Puts results in self.csw.records        
Beispiel #22
0
 def __init__(self, community, url, schema, fromdate, clean, limit, outdir,
              verify):
     super().__init__(community, url, fromdate, clean, limit, outdir,
                      verify)
     logging.captureWarnings(True)
     self.csw = CatalogueServiceWeb(self.url,
                                    auth=Authentication(verify=self.verify))
     self._schema_type = schema
     self._schema = None
     self._constraints = None
Beispiel #23
0
 def __init__(self, url, username=None, password=None):
     self.url = url
     self.username = username
     self.password = password
     try:
         self.remote = CatalogueServiceWeb(
             self.url, timeout=3600, lang='fr-FR', version='2.0.2',
             skip_caps=True, username=self.username, password=self.password)
     except Exception:
         raise CswReadError()
 def test_GetRecords(self):
     # NB: This test fails because no records have been setup...
     raise SkipTest() # therefore skip
     csw = CatalogueServiceWeb(service)
     csw.getrecords(outputschema=GMD, startposition=1, maxrecords=5)
     nrecords = len(csw.records)
     #print csw.response[:1024]
     assert nrecords == 5, nrecords
     for ident in csw.records:
         identifiers.append(ident)
         assert isinstance(csw.records[ident], MD_Metadata), (ident, csw.records[ident])
    def __init__(self, config):
        self._csw_url = config.get(self.__class__.__name__, 'csw_url')

        self._csw_usr = config.get(self.__class__.__name__, 'csw_usr') if \
            config.has_option(self.__class__.__name__, 'csw_usr') else None

        self._csw_pwd = config.get(self.__class__.__name__, 'csw_pwd') if \
            config.has_option(self.__class__.__name__, 'csw_pwd') else None

        from owslib.csw import CatalogueServiceWeb
        self._csw = CatalogueServiceWeb(url=self._csw_url, skip_caps=True)
 def __init__(self):
     self.username = GEONETWORK_LOGIN
     self.password = GEONETWORK_PASSWORD
     self.remote = CatalogueServiceWeb(urljoin(GEONETWORK_URL,
                                               'srv/fre/csw-publication'),
                                       timeout=GEONETWORK_TIMEOUT,
                                       lang='fr-FR',
                                       version='2.0.2',
                                       skip_caps=True,
                                       username=self.username,
                                       password=self.password)
    def __init__(self, csw_url=None, timeout=None, debug=False):
        '''
        Constructor for CSWUtils class
        @param csw_url: URL for CSW service. Defaults to value of CSWUtils.DEFAULT_CSW_URL
        @param timeout: Timeout in seconds. Defaults to value of CSWUtils.DEFAULT_TIMEOUT
        '''
        csw_url = csw_url or CSWUtils.DEFAULT_CSW_URL
        timeout = timeout or CSWUtils.DEFAULT_TIMEOUT
        self.debug = debug

        self.csw = CatalogueServiceWeb(csw_url, timeout=timeout)
Beispiel #28
0
def getDataSetURI(anyText, CSWURL, BBox):
    """

                            Searches a given CSW server and returns metadata content for the datasets found.

                            Arguments
                            ---------

                            - anyText - A string that will be submitted to the CSW search. (Optional, default is empty which will return all records.)
                            - CSWURL - A base URL for the CSW server to be searched. (Optional, defaults to the CDIA/GDP CSW server.)
                            - BBox - A lat/lon bounding box in [minx,miny,maxx,maxy] that will be used to limit results to datasets that atleast partially intersect. (Optional)

                            """

    csw = CatalogueServiceWeb(CSWURL, skip_caps=True)
    #Works with owslib version 0.8.6.
    csw.getrecords(keywords=[anyText],
                   outputschema='http://www.isotc211.org/2005/gmd',
                   esn='full',
                   maxrecords=100)
    dataSetURIs = [['title', 'abstract', ['urls']]]
    for rec in csw.records:
        title = csw.records[rec].identification.title
        abstract = csw.records[rec].identification.abstract
        urls = []
        try:
            for onlineresource in range(
                    len(csw.records[rec].distribution.online)):
                urls.append(
                    csw.records[rec].distribution.online[onlineresource].url)
        except AttributeError:
            print  #pass
        else:
            print  #pass
        for ident in range(len(csw.records[rec].identificationinfo)):
            try:
                for operation in range(
                        len(csw.records[rec].identificationinfo[ident].
                            operations)):
                    urls.append(csw.records[rec].identificationinfo[ident].
                                operations[0]['connectpoint'][0].url)
            except AttributeError:
                print  #pass
            else:
                print  #pass
        entry = [title, abstract, urls]
        dataSetURIs.append(entry)
    for i, dataset in enumerate(dataSetURIs):
        dataSetURIs[i][2] = [
            uri.replace("http", "dods") if "/dodsC/" in uri else uri
            for uri in dataset[2]
        ]
    return dataSetURIs
Beispiel #29
0
    def fetch(self, clean_url=True, timeout=120):
        # connect to csw source
        url = self.get_cleaned_url() if clean_url else self.url
        try:
            self.csw = CatalogueServiceWeb(url, timeout=timeout)
        except Exception as e:
            error = f'Error connection CSW: {e}'
            self.errors.append(error)
            logger.error(error)
            raise

        self.read_csw_info()
 def check_for_property(self,p):
     """
     Check that the specified dataset is available for the current URL.
     Returns boolean- false if the dataset is not found
     | set service url             | ${CSWSERVICE_URL}        |
     | ${dataset_exists}           | Check for property       | free parking             |
     | Should be True              | ${dataset_exists}        |
     """
     
     csw = CatalogueServiceWeb(self._url)
     csw.getdomain('Title', dtype='property')
     return p in csw.results.values()[0]
 def test_GetRecords(self):
     # NB: This test fails because no records have been setup...
     raise SkipTest()  # therefore skip
     csw = CatalogueServiceWeb(service)
     csw.getrecords2(outputschema=GMD, startposition=1, maxrecords=5)
     nrecords = len(csw.records)
     #print csw.response[:1024]
     assert nrecords == 5, nrecords
     for ident in csw.records:
         identifiers.append(ident)
         assert isinstance(csw.records[ident],
                           MD_Metadata), (ident, csw.records[ident])
Beispiel #32
0
    def show_metadata(self):
        """show record metadata"""

        if not self.treeRecords.selectedItems():
            return

        item = self.treeRecords.currentItem()
        if not item:
            return

        identifier = get_item_data(item, 'identifier')

        self.disable_ssl_verification = self.disableSSLVerification.isChecked()
        auth = None

        if self.disable_ssl_verification:
            try:
                auth = Authentication(verify=False)
            except NameError:
                pass

        try:
            with OverrideCursor(Qt.WaitCursor):
                cat = CatalogueServiceWeb(
                    self.catalog_url,
                    timeout=self.timeout,  # spellok
                    username=self.catalog_username,
                    password=self.catalog_password,
                    auth=auth)
                cat.getrecordbyid(
                    [self.catalog.records[identifier].identifier])
        except ExceptionReport as err:
            QMessageBox.warning(
                self, self.tr('GetRecords error'),
                self.tr('Error getting response: {0}').format(err))
            return
        except KeyError as err:
            QMessageBox.warning(self, self.tr('Record parsing error'),
                                self.tr('Unable to locate record identifier'))
            return

        record = cat.records[identifier]
        record.xml_url = cat.request

        crd = RecordDialog()
        metadata = render_template('en', self.context, record,
                                   'record_metadata_dc.html')

        style = QgsApplication.reportStyleSheet()
        crd.textMetadata.document().setDefaultStyleSheet(style)
        crd.textMetadata.setHtml(metadata)
        crd.exec_()
Beispiel #33
0
def csw_ajax(request, *args, **kwargs):
    if request.method == 'GET':
        csw_url = request.session['csw_url']
        user = request.session['user']
        password = request.session['password']
        keywords = request.session['keywords']
        keywords_query = [fes.PropertyIsLike(
            'csw:AnyText', '%%%s%%' % keywords)]
        if not csw_url:
            return HttpResponseServerError()

        try:
            csw = CatalogueServiceWeb(
                csw_url,
                username=user,
                password=password)
            result = csw.identification.type
            if result == 'CSW':
                offset = int(request.GET['offset'])
                per_page = int(request.GET['perPage'])
                csw.getrecords2(
                    typenames='gmd:MD_Metadata',
                    esn='full',
                    outputschema='http://www.isotc211.org/2005/gmd',
                    constraints=keywords_query,
                    startposition=offset,
                    maxrecords=per_page)
                result = []
                for key in csw.records:
                    rec = csw.records[key]
                    res = {}
                    if isinstance(rec, MD_Metadata):
                        res['id'] = rec.identifier
                        res['title'] = rec.identification.title
                        res['inasafe_keywords'] = rec.identification.\
                            supplementalinformation
                        if res['inasafe_keywords']:
                            res['inasafe_layer'] = (
                                '<inasafe_keywords/>' in
                                res['inasafe_keywords'])
                        result.append(res)
            json_result = {
                'records': result,
                'queryRecordCount': csw.results['matches'],
                'totalRecordCount': csw.results['matches']
            }
            return JsonResponse(json_result, safe=False)
        except Exception as e:
            LOGGER.exception(e)
            return HttpResponseServerError()

    return HttpResponseServerError()
Beispiel #34
0
    def test_GetRecordById(self):
        csw = CatalogueServiceWeb(service)
        tofetch = identifiers[:2]
        csw.getrecordbyid(tofetch, outputschema=GMD)
        nrecords = len(csw.records)
        assert nrecords == len(tofetch), nrecords
        for ident in csw.records:
            identifiers.append(ident)
            assert isinstance(csw.records[ident], MD_Metadata), (ident, csw.records[ident])

        csw.getrecordbyid(["nonexistent"], outputschema=GMD)
        nrecords = len(csw.records)
        assert nrecords == 0, nrecords
Beispiel #35
0
    def mdsearch(self, cswurl, esn="summary", constraints=[], startrecord=0, maxrecords=10, maxharvest=20):
        tstart = datetime.datetime.now()
        """Queries a csw to retrieve md ids matching constraints"""
        records = {}

        logging.info("searching max %s md from %s" % (maxharvest, cswurl))
        csw = CatalogueServiceWeb(cswurl, skip_caps=True)
        first = True
        nextrecord = startrecord
        count = 0

        while True:
            if not first:
                nextrecord = csw.results["nextrecord"]

            if count + maxrecords > maxharvest:
                maxrecords = maxharvest - count  # retrieve exactly maxharvest md

            csw.getrecords2(
                esn=esn,
                constraints=constraints,
                startposition=nextrecord,
                maxrecords=maxrecords,
                outputschema=self.OUTPUTSCHEMA,
            )

            if csw.results["matches"] == 0:
                logging.info("0 md found from %s" % cswurl)
                break
            else:
                first = False
                # fetch records
                for rec_id, rec in csw.records.iteritems():
                    count += 1
                    percent = int(float(count) / min(maxharvest, csw.results["matches"]) * 100)
                    logging.debug("%s%% %s" % (percent, rec_id))
                    records[rec_id] = rec

                # get out if no records or beyond maxrecords
                if (
                    csw.results["nextrecord"] == 0
                    or csw.results["returned"] == 0
                    or csw.results["nextrecord"] > csw.results["matches"]
                    or csw.results["nextrecord"] > maxharvest
                ):
                    d = (datetime.datetime.now() - tstart).total_seconds()
                    logging.info("%s md found from %s in %d s" % (count, cswurl, d))
                    break

        return records
def test_bbox(endpoints,bbox):
    for title,url in endpoints.iteritems():
        try:
            csw = CatalogueServiceWeb(url, timeout=40)
            if "BBOX" in csw.filters.spatial_operators:
                filter_list = [fes.BBox(bbox)]
                try:
                    csw.getrecords2(constraints=filter_list, maxrecords=1000)
                    print("%s : Datasets = %d" % (title,len(csw.records.keys())))
                except Exception:
                    print "%s : BBOX Query FAILS" % title
            else:
                print "%s - BBOX Query NOT supported" % title
        except Exception:
            print "%s - Timed out" % title
Beispiel #37
0
def test_csw_skgeodsy():
    c = CatalogueServiceWeb(SERVICE_URL)

    assert sorted([op.name for op in c.operations]) == [
        'DescribeRecord', 'GetCapabilities', 'GetRecordById', 'GetRecords',
        'Transaction'
    ]

    grop = c.get_operation_by_name('GetRecords')
    assert grop.name == 'GetRecords'

    c.getrecords2(typenames='csw:Record gmd:MD_Metadata')
    assert c.results.get('returned') > 0
    assert c.results.get('nextrecord') > 0
    assert c.results.get('matches') > 0
Beispiel #38
0
def test_default_csw_connections():
    """test that the default CSW connections work"""

    relpath = 'resources%sconnections-default.xml' % os.sep
    csw_connections_xml = options.base.plugin / relpath

    conns = etree.parse(csw_connections_xml)

    for conn in conns.findall('csw'):
        try:
            csw = CatalogueServiceWeb(conn.attrib.get('url'))
            info('Success: %s', csw.identification.title)
            csw.getrecords2()
        except Exception as err:
            raise ValueError('ERROR: %s', err)
Beispiel #39
0
def test_default_csw_connections():
    """test that the default CSW connections work"""

    relpath = 'resources%sconnections-default.xml' % os.sep
    csw_connections_xml = options.base.plugin / relpath

    conns = etree.parse(csw_connections_xml)

    for conn in conns.findall('csw'):
        try:
            csw = CatalogueServiceWeb(conn.attrib.get('url'))
            info('Success: %s', csw.identification.title)
            csw.getrecords2()
        except Exception as err:
            raise ValueError('ERROR: %s', err)
Beispiel #40
0
def test_bbox(endpoints, bbox):
    for title, url in endpoints.iteritems():
        try:
            csw = CatalogueServiceWeb(url, timeout=40)
            if "BBOX" in csw.filters.spatial_operators:
                filter_list = [fes.BBox(bbox)]
                try:
                    csw.getrecords2(constraints=filter_list, maxrecords=1000)
                    print("%s : Datasets = %d" %
                          (title, len(csw.records.keys())))
                except Exception:
                    print "%s : BBOX Query FAILS" % title
            else:
                print "%s - BBOX Query NOT supported" % title
        except Exception:
            print "%s - Timed out" % title
    def get_csw_record_by_id(self, csw_url, identifier):
        '''
        Function to return OWSLib CSW record record from specified CSW URL using UUID as the search criterion
        '''
        csw = CatalogueServiceWeb(csw_url)
        assert csw.identification.type == 'CSW', '%s is not a valid CSW service' % csw_url

        csw.getrecordbyid(id=[identifier], esn='full', outputschema='own')

        # Ensure there is exactly one record found
        assert len(
            csw.records) > 0, 'No CSW records found for ID "%s"' % identifier
        assert len(
            csw.records) == 1, 'Multiple CSW records found for ID "%s"' % identifier

        return csw.records.values()[0]
Beispiel #42
0
def download_csw(db, harvest, csw_url, dest):
    '''
    Downloads from a CSW endpoint.

    :param db: Mongo DB Client
    :param dict harvest: A dictionary returned from the mongo collection for
                         harvests.
    :param url csw_url: URL to the CSW endpoint
    :param str dest: Folder to download to
    '''
    if not os.path.exists(dest):
        os.makedirs(dest)

    csw = CatalogueServiceWeb(csw_url)
    # remove any records from past run
    db.Records.remove({"harvest_id": harvest['_id']})
    count, errors = 0, 0
    for csw in get_records(csw):
        for name, raw_rec in csw.records.items():
            success = parse_csw_record(db, harvest, csw_url, dest, name,
                                       raw_rec)
            count += 1
            if not success:
                errors += 1

    return count, errors
Beispiel #43
0
def csw_query_metadata_by_id(csw_url,
                             identifier,
                             username=None,
                             password=None):
    csw = CatalogueServiceWeb(csw_url, username=username, password=password)
    result = csw.identification.type
    record = None
    if result == 'CSW':
        constraints = [fes.PropertyIsEqualTo('dc:identifier', identifier)]
        csw.getrecords2(typenames='gmd:MD_Metadata',
                        esn='full',
                        outputschema='http://www.isotc211.org/2005/gmd',
                        constraints=constraints)
        for key in csw.records:
            record = csw.records[key]
    return record
Beispiel #44
0
def generate_csw_connections_file():
    """generate a CSW connections file from a flat file of CSW URLs"""

    filename = options.get('filename', False)

    if not filename:
        raise ValueError('path to file of CSW URLs required')

    conns = etree.Element('qgsCSWConnections')
    conns.attrib['version'] = '1.0'

    with open(filename) as connsfh:
        for line in connsfh:
            url = line.strip()
            if not url:  # blank line
                continue
            try:
                csw = CatalogueServiceWeb(url)
                title = str(csw.identification.title)
                etree.SubElement(conns, 'csw', name=title, url=url)
            except Exception as err:
                error('ERROR on CSW %s: %s', url, err)

    with open('%s.xml' % filename, 'w') as connsxmlfh:
        connsxmlfh.write(etree.tostring(conns, encoding='utf-8'))
Beispiel #45
0
 def run(self, csw=None, offset=0):
     if csw is None:
         endpoint = self.config['endpoint']
         csw = CatalogueServiceWeb(endpoint)
     outputschema = self.config.get('outputschema', csw_ns['gmd'])
     typenames = self.config.get('typenames', 'csw:dataset')
     csw.getrecords(esn="full", outputschema=outputschema,
             typenames=typenames, startposition=offset, 
             maxrecords=PAGE_SIZE)
     for record in csw.records.values(): 
         self.queue(CSWDatasetCrawler, record=record)
     
     if csw.results.get('nextrecord') <= csw.results.get('matches'):
         print "OFFSET", offset
         self.queue(CSWCatalogCrawler, csw=csw, 
                    offset=csw.results.get('nextrecord'))
 def test_good_post(self):
     csw = CatalogueServiceWeb(service)
     assert csw.identification.title, csw.identification.title
     ops = dict((x.name, x.methods) for x in csw.operations)
     assert "GetCapabilities" in ops
     assert "GetRecords" in ops
     assert "GetRecordById" in ops
Beispiel #47
0
    def _get_csw(self):
        """convenience function to init owslib.csw.CatalogueServiceWeb"""  # spellok

        self.disable_ssl_verification = self.disableSSLVerification.isChecked()
        auth = None

        if self.disable_ssl_verification:
            try:
                auth = Authentication(verify=False)
            except NameError:
                pass

        # connect to the server
        with OverrideCursor(Qt.WaitCursor):
            try:
                self.catalog = CatalogueServiceWeb(
                    self.catalog_url,  # spellok
                    timeout=self.timeout,
                    username=self.catalog_username,
                    password=self.catalog_password,
                    auth=auth)
                return True
            except ExceptionReport as err:
                msg = self.tr('Error connecting to service: {0}').format(err)
            except ValueError as err:
                msg = self.tr('Value Error: {0}').format(err)
            except Exception as err:
                msg = self.tr('Unknown Error: {0}').format(err)

        QMessageBox.warning(self, self.tr('CSW Connection error'), msg)
        return False
Beispiel #48
0
 def __init__(self, url, schema, version='2.0.2', lang='en-US'):
     self.schema = schema
     self.catalog = CatalogueServiceWeb(
         url,
         lang,
         version,
         timeout=10,
         skip_caps=True
     )
     self.metadata = dict.fromkeys([
         'id',
         'name',
         'title',
         'url',
         'author',
         'maintainer',
         'maintainer_email',
         'license_url',
         'version',
         'service_url',
         'service_type',
         'notes',
         'tags',
         'metadata_url',
         'metadata_raw',
     ])
Beispiel #49
0
 def setRepoParams(self, repoParams):
     self.metadataprefix = "csw"
     super(CSWRepository, self).setRepoParams(repoParams)
     try:
         self.cswrepo = CatalogueServiceWeb(self.url)
     except:
         self.cswrepo = None
     self.domain_metadata = []
Beispiel #50
0
    def show_metadata(self):
        """show record metadata"""

        if not self.treeRecords.selectedItems():
            return

        item = self.treeRecords.currentItem()
        if not item:
            return

        identifier = get_item_data(item, 'identifier')

        try:
            QApplication.setOverrideCursor(QCursor(Qt.WaitCursor))
            cat = CatalogueServiceWeb(self.catalog_url, timeout=self.timeout,
                                      username=self.catalog_username,
                                      password=self.catalog_password)
            cat.getrecordbyid(
                [self.catalog.records[identifier].identifier])
        except ExceptionReport as err:
            QApplication.restoreOverrideCursor()
            QMessageBox.warning(self, self.tr('GetRecords error'),
                                self.tr('Error getting response: {0}').format(err))
            return
        except KeyError as err:
            QMessageBox.warning(self,
                                self.tr('Record parsing error'),
                                self.tr('Unable to locate record identifier'))
            QApplication.restoreOverrideCursor()
            return

        QApplication.restoreOverrideCursor()

        record = cat.records[identifier]
        record.xml_url = cat.request

        crd = RecordDialog()
        metadata = render_template('en', self.context,
                                   record, 'record_metadata_dc.html')

        style = QgsApplication.reportStyleSheet()
        crd.textMetadata.document().setDefaultStyleSheet(style)
        crd.textMetadata.setHtml(metadata)
        crd.exec_()
Beispiel #51
0
def getDataSetURI(anyText, CSWURL, BBox):
                            """

                            Searches a given CSW server and returns metadata content for the datasets found.

                            Arguments
                            ---------

                            - anyText - A string that will be submitted to the CSW search. (Optional, default is empty which will return all records.)
                            - CSWURL - A base URL for the CSW server to be searched. (Optional, defaults to the CDIA/GDP CSW server.)
                            - BBox - A lat/lon bounding box in [minx,miny,maxx,maxy] that will be used to limit results to datasets that atleast partially intersect. (Optional)

                            """

                            csw = CatalogueServiceWeb(CSWURL, skip_caps=True)
                            #Works with owslib version 0.8.6.
                            csw.getrecords(keywords=[anyText], outputschema='http://www.isotc211.org/2005/gmd', esn='full', maxrecords=100)
                            dataSetURIs = [['title','abstract',['urls']]]
                            for rec in csw.records:
                                    title=csw.records[rec].identification.title
                                    abstract=csw.records[rec].identification.abstract
                                    urls=[]
                                    try:
                                            for onlineresource in range(len(csw.records[rec].distribution.online)):
                                                    urls.append(csw.records[rec].distribution.online[onlineresource].url)
                                    except AttributeError:
                                            print#pass
                                    else:
                                            print#pass
                                    for ident in range(len(csw.records[rec].identificationinfo)):
                                            try:
                                                    for operation in range(len(csw.records[rec].identificationinfo[ident].operations)):
                                                            urls.append(csw.records[rec].identificationinfo[ident].operations[0]['connectpoint'][0].url)
                                            except AttributeError:
                                                    print#pass
                                            else:
                                                    print#pass
                                    entry=[title,abstract,urls]
                                    dataSetURIs.append(entry)
                            for i,dataset in enumerate(dataSetURIs):
                                    dataSetURIs[i][2]=[uri.replace("http", "dods") if "/dodsC/" in uri else uri for uri in dataset[2]]
                            return dataSetURIs
Beispiel #52
0
    def __init__(self, *args, **kwargs):
        self.url = kwargs['URL']
        self.user = None
        self.password = None
        self.type = kwargs['ENGINE'].split('.')[-1]
        self.local = False
        self._group_ids = {}
        self._operation_ids = {}
        self.connected = False
        skip_caps = kwargs.get('skip_caps', True)
        CatalogueServiceWeb.__init__(self, url=self.url, skip_caps=skip_caps)

        upurl = urlparse(self.url)

        self.base = '%s://%s/' % (upurl.scheme, upurl.netloc)

        # User and Password are optional
        if 'USER'in kwargs:
            self.user = kwargs['USER']
        if 'PASSWORD' in kwargs:
            self.password = kwargs['PASSWORD']
Beispiel #53
0
    def __init__(self, *args, **kwargs):
        self.url = kwargs["URL"]
        self.user = None
        self.password = None
        self.type = kwargs["ENGINE"].split(".")[-1]
        self.local = False
        self._group_ids = {}
        self._operation_ids = {}
        self.connected = False
        skip_caps = kwargs.get("skip_caps", True)
        CatalogueServiceWeb.__init__(self, url=self.url, skip_caps=skip_caps)

        upurl = urlparse(self.url)

        self.base = "%s://%s/" % (upurl.scheme, upurl.netloc)

        # User and Password are optional
        if "USER" in kwargs:
            self.user = kwargs["USER"]
        if "PASSWORD" in kwargs:
            self.password = kwargs["PASSWORD"]
        def get_uuid_from_title(csw_url, title):
            '''
            Function to return OWSLib CSW record record from specified CSW URL using title as the search criterion
            Sample UUID: 221dcfd8-03d7-5083-e053-10a3070a64e3
            '''
            MAXRECORDS = 200

            uuid = None
            csw = CatalogueServiceWeb(csw_url)
            assert csw.identification.type == 'CSW', '%s is not a valid CSW service' % csw_url

            search_title = title.replace('_', '%')
            while search_title and len(
                    title) - len(search_title) < 10 and not uuid:
                title_query = PropertyIsEqualTo(
                    'csw:Title', '%' + search_title + '%')
                csw.getrecords2(
                    constraints=[title_query], esn='summary', maxrecords=MAXRECORDS)

                if not csw.records:  # No records found
                    # Broaden search by shortening title
                    search_title = search_title[0:-1]
                else:
                    uuid_list = []
                    # Strip all non-alphanumeric characters from title
                    alphanumeric_title = re.sub('\W', '', title)
                    while not uuid_list:
                        uuid_list = [identifier for identifier in csw.records.keys(
                        ) if alphanumeric_title in re.sub('\W', '', csw.records[identifier].title)]
                        if len(uuid_list) == 1:  # Unique match found
                            uuid = uuid_list[0]
                            logger.info(
                                'UUID %s found from title characters', uuid)
                            break
                        else:
                            # Broaden search by shortening munged_title
                            alphanumeric_title = alphanumeric_title[0:-1]

            return uuid
Beispiel #55
0
def getResource(endpoint = 'http://www.nodc.noaa.gov/geoportal/csw', bbox=None, keywords=None, maxrecords=1, service_type='opendap', verbose=None):
    if service_type == 'opendap':
        service_string='urn:x-esri:specification:ServiceType:OPeNDAP'
    if service_type == 'wms':
        service_string='urn:x-esri:specification:ServiceType:WMS'
    csw = CatalogueServiceWeb(endpoint,timeout=30)
    if keywords is not None:
        csw.getrecords(keywords=keywords, bbox=bbox, maxrecords=maxrecords)
    else :
        csw.getrecords(bbox=bbox, maxrecords=maxrecords)
    csw.records.keys()
    result = {}
    for i in csw.records.keys():
        records=csw.records[i]
        resource = {}
        for key,rec in csw.records.iteritems():
            url = next((d['url'] for d in rec.references if d['scheme'] == service_string), None)
            print rec.references[0]['url']
            if url is not None:
                resource[rec.title] = url
        result[i] = resource
    if verbose is not None:
        print 'endpoint: ', endpoint, '\n' , 'bbox: ', bbox, '\n' , 'keywords: ', keywords, '\n', 'maxrecords: ', maxrecords , '\n', 'service_type: ' , service_type
    return result
Beispiel #56
0
    def __init__(self, 
                 csw_url=None, 
                 timeout=None,
                 debug=False
                 ):
        '''
        Constructor for CSWUtils class
        @param csw_url: URL for CSW service. Defaults to value of CSWUtils.DEFAULT_CSW_URL
        @param timeout: Timeout in seconds. Defaults to value of CSWUtils.DEFAULT_TIMEOUT
        '''
        csw_url = csw_url or CSWUtils.DEFAULT_CSW_URL
        timeout = timeout or CSWUtils.DEFAULT_TIMEOUT
        self.debug = debug

        self.csw = CatalogueServiceWeb(csw_url, timeout=timeout)
Beispiel #57
0
# # Query `apiso:ServiceType` on pycsw endpoint

# In[1]:

from owslib.csw import CatalogueServiceWeb
from owslib import fes
import numpy as np


# In[2]:

#endpoint = 'http://geoport.whoi.edu/csw' 
#endpoint = 'http://data.nodc.noaa.gov/geoportal/csw'
endpoint = 'http://catalog.data.gov/csw-all'
#endpoint = 'http://data.doi.gov/csw'
csw = CatalogueServiceWeb(endpoint,timeout=60)
print csw.version


# In[3]:

csw.get_operation_by_name('GetRecords').constraints


# In[4]:

try:
    csw.get_operation_by_name('GetDomain')
    csw.getdomain('apiso:ServiceType', 'property')
    print(csw.results['values'])
except:
# <markdowncell>

# #### Query each CSW catalog for revery model_name_filter constructed above

# <codecell>

from owslib.csw import CatalogueServiceWeb

model_results = []

for x in range(len(model_name_filters)):
    model_name          = known_model_strings[x]
    single_model_filter = model_name_filters[x]
    for url in known_csw_servers:
        try:
            csw = CatalogueServiceWeb(url, timeout=20)
            csw.getrecords2(constraints=[single_model_filter], maxrecords=1000, esn='full')
            for record, item in csw.records.items():
                for d in item.references:
                    result = dict(model=model_name,
                                  scheme=d['scheme'],
                                  url=d['url'],
                                  server=url)
                    model_results.append(result)
        except BaseException as e:
            print "- FAILED: %s - %s" % (url, e.msg)

# <markdowncell>

# <div class="error"><strong>Paginating CSW Records</strong> - Some servers have a maximum amount of records you can retrieve at once. See: https://github.com/ioos/system-test/issues/126</div>