def get_object(self, object_number): log.info('Getting object: %s', object_number) resp = self.api_call('collection/%s' % object_number) if not resp['artObject']: raise NotFound return 'application/json', json.dumps(resp['artObject'])
def get_all_results(self): """Retrieves all available items in a result set. :returns: a generator that yields a tuple for each record, a tuple consists of the content-type and the content as a string. """ # Perform an initial call to get the total number of results in # the result set resp = self.opensearch_call({'q': self.query, 'count': 0}) total_results = int(resp.find('.//channel/opensearch:totalResults', namespaces=resp.nsmap).text) start_index = 1 page_delay = self.source_definition.get('opensearch_delay', 1) while start_index <= total_results: sleep(page_delay) log.info('Getting results for %s from %s' % ( self.query, start_index,)) try: resp = self.opensearch_call({ 'q': self.query, 'count': self.per_page_count, 'startIndex': start_index }) except requests.exceptions.HTTPError as e: log.exception('Error getting results for %s from %s' % ( self.query, start_index,)) start_index += self.per_page_count continue except etree.XMLSyntaxError as e: log.exception('Error parsing XML for %s from %s' % ( self.query, start_index,)) start_index += self.per_page_count continue # Create a copy of the tree without any items itemless_tree = deepcopy(resp) for item in itemless_tree.xpath('.//channel/item'): item.getparent().remove(item) # Construct a tree that only includes the item we are iterating over for item in resp.xpath('.//channel/item'): single_item_tree = deepcopy(itemless_tree) single_item_tree.find('./channel').append(item) yield 'application/xml', etree.tostring(single_item_tree) start_index += self.per_page_count
def get_collection_objects(self): # Perform an initial call to get the total number of results resp = self.api_call('collection/', params={'p': 0, 'ps': 1}) total_items = resp['count'] # Calculate the total number of pages that are available total_pages = int(ceil(total_items / float(self.items_per_page))) log.info('Total collection items to fetch %s (%s pages)', total_items, total_pages) for p in xrange(0, total_pages): log.info('Getting collection items page %s of %s', p, total_pages) resp = self.api_call('collection/', params={ 'p': p, 'ps': self.items_per_page }) for item in resp['artObjects']: yield item
def get_object(self, item): log.info('Getting object: %s', item['id'].split('/')[-1]) return 'application/json', json.dumps(item)