def get_records(self, maxrecords=50, **kwargs):
        """
        Send a GetRecords request. The results are stored in self.records property.

        :param kwargs: see OWSLib's getrecords2 (https://github.com/geopython/OWSLib/blob/master/owslib/csw.py).
        A hint: if "xml" argument is passed (raw WML request), other arguments are ignored. Also, if maxrecords exceeds
        50, getrecords2 is called multiple times to get maxrecords records (or less if less are found)
        """
        # has xml argument been passed?
        try:
            payload = kwargs['xml'].strip()
        except KeyError:
            payload = None

        # all 'csw:Record' dict from 'GetRecords' request pages
        all_records = OrderedDict()

        while True:
            # set kwargs' maxrecords according to how many records we want (doesn't matter if it exceeds 50)
            kwargs['maxrecords'] = min(maxrecords,
                                       maxrecords - len(all_records))

            # get next page by using OWSLib's getrecords2
            if payload is None:
                self.getrecords2(**kwargs)
            else:
                self.getrecords2(xml=payload)

            # store found records in all_records
            all_records.update(self.records)

            # stop if records reached limit
            if len(all_records) >= maxrecords:
                break

            next_record = self.results['nextrecord']
            # if next_record is "0", we got all records
            if next_record == 0:
                break

            # else, update start position
            if payload is None:
                kwargs['startposition'] = next_record
            else:
                payload_xml = etree.fromstring(payload)
                payload_xml.set('startPosition', str(next_record))
                payload = etree.tostring(payload_xml,
                                         pretty_print=True,
                                         encoding='unicode')

        self.records = all_records
Example #2
0
    def mundigetrecords2(self, xml):
        # removing unrelevant information from given payload
        payload = xml.strip()

        # all 'csw:Record' dictionnary from 'GetRecords' request pages
        all_records = OrderedDict()

        # getting first page (i.e. 'page0')
        self.getrecords2(xml=payload)
        all_records.update(self.records)
        page0 = lxml.etree.fromstring(self.response)

        sr_node = page0.find('csw:SearchResults', namespaces=mundi_nsmap)
        nb_total = int(sr_node.get("numberOfRecordsMatched"))
        nb_set = int(sr_node.get("numberOfRecordsReturned"))
        next_record = int(sr_node.get("nextRecord"))

        # calculation of page number
        if (nb_total == 0):
            nbPages = 1
        else:
            nbPages = ceil(nb_total / nb_set)

        # getting other/next pages (i.e. 'pageN')
        i = 1
        while (i < nbPages):
            # modifying payload with new start position
            node_p = lxml.etree.fromstring(payload)
            node_p.set('startPosition', str(next_record))
            payload = lxml.etree.tostring(node_p, pretty_print=True)
            self.getrecords2(xml=payload)
            pageN = lxml.etree.fromstring(self.response)
            sr_node = pageN.find('csw:SearchResults', namespaces=mundi_nsmap)
            next_record = int(sr_node.get("nextRecord"))
            all_records.update(self.records)
            # go next page
            i += 1
        self.records = all_records