def get_records(self, maxrecords=50, **kwargs): """ Send a GetRecords request. The results are stored in self.records property. :param kwargs: see OWSLib's getrecords2 (https://github.com/geopython/OWSLib/blob/master/owslib/csw.py). A hint: if "xml" argument is passed (raw WML request), other arguments are ignored. Also, if maxrecords exceeds 50, getrecords2 is called multiple times to get maxrecords records (or less if less are found) """ # has xml argument been passed? try: payload = kwargs['xml'].strip() except KeyError: payload = None # all 'csw:Record' dict from 'GetRecords' request pages all_records = OrderedDict() while True: # set kwargs' maxrecords according to how many records we want (doesn't matter if it exceeds 50) kwargs['maxrecords'] = min(maxrecords, maxrecords - len(all_records)) # get next page by using OWSLib's getrecords2 if payload is None: self.getrecords2(**kwargs) else: self.getrecords2(xml=payload) # store found records in all_records all_records.update(self.records) # stop if records reached limit if len(all_records) >= maxrecords: break next_record = self.results['nextrecord'] # if next_record is "0", we got all records if next_record == 0: break # else, update start position if payload is None: kwargs['startposition'] = next_record else: payload_xml = etree.fromstring(payload) payload_xml.set('startPosition', str(next_record)) payload = etree.tostring(payload_xml, pretty_print=True, encoding='unicode') self.records = all_records
def mundigetrecords2(self, xml): # removing unrelevant information from given payload payload = xml.strip() # all 'csw:Record' dictionnary from 'GetRecords' request pages all_records = OrderedDict() # getting first page (i.e. 'page0') self.getrecords2(xml=payload) all_records.update(self.records) page0 = lxml.etree.fromstring(self.response) sr_node = page0.find('csw:SearchResults', namespaces=mundi_nsmap) nb_total = int(sr_node.get("numberOfRecordsMatched")) nb_set = int(sr_node.get("numberOfRecordsReturned")) next_record = int(sr_node.get("nextRecord")) # calculation of page number if (nb_total == 0): nbPages = 1 else: nbPages = ceil(nb_total / nb_set) # getting other/next pages (i.e. 'pageN') i = 1 while (i < nbPages): # modifying payload with new start position node_p = lxml.etree.fromstring(payload) node_p.set('startPosition', str(next_record)) payload = lxml.etree.tostring(node_p, pretty_print=True) self.getrecords2(xml=payload) pageN = lxml.etree.fromstring(self.response) sr_node = pageN.find('csw:SearchResults', namespaces=mundi_nsmap) next_record = int(sr_node.get("nextRecord")) all_records.update(self.records) # go next page i += 1 self.records = all_records