def page(self, number): """ Returns a page object. This class overrides the default behavior and ignores "orphans" and assigns the count from the ES result to the Paginator. """ number = self.validate_number(number) bottom = (number - 1) * self.per_page top = bottom + self.per_page # Force the search to evaluate and then attach the count. We want to # avoid an extra useless query even if there are no results, so we # directly fetch the count from hits. result = self.object_list[bottom:top].execute() page = Page(result.hits, number, self) # Update the `_count`. self._count = page.object_list.total # Also store the facets, if any. if hasattr(result, 'facets'): page.facets = result.facets # Now that we have the count validate that the page number isn't higher # than the possible number of pages and adjust accordingly. if number > self.num_pages: if number == 1 and self.allow_empty_first_page: pass else: raise EmptyPage('That page contains no results') return page
def page(self, number): """ Override the page method in Paginator since Solr has already paginated stuff for us. """ number = self.validate_number(number) # figure out the solr query and execute it solr = SolrConnection( settings.SOLR) # TODO: maybe keep connection around? start = self.per_page * (number - 1) params = { "hl.snippets": 100, # TODO: make this unlimited "hl.requireFieldMatch": 'true', # limits highlighting slop "hl.maxAnalyzedChars": '102400', # increased from default 51200 } params.update(self.facet_params) sort_field, sort_order = _get_sort(self.query.get('sort'), in_pages=True) solr_response = solr.query(self._q, fields=[ 'id', 'title', 'date', 'month', 'day', 'sequence', 'edition_label', 'section_label' ], highlight=self._ocr_list, rows=self.per_page, sort=sort_field, sort_order=sort_order, start=start, **params) solr_facets = solr_response.facet_counts # sort states by number of hits per state (desc) facets = { 'city': _sort_facets_asc(solr_facets, 'city'), 'county': _sort_facets_asc(solr_facets, 'county'), 'frequency': _sort_facets_asc(solr_facets, 'frequency'), 'language': _sort_facets_asc(solr_facets, 'language'), 'state': _sort_facets_asc(solr_facets, 'state'), } # sort by year (desc) facets['year'] = sorted(list( solr_facets['facet_ranges']['year']['counts'].items()), key=lambda k: k[0], reverse=True) facet_gap = self.facet_params['f_year_facet_range_gap'] if facet_gap > 1: facets['year'] = [('%s-%d' % (y[0], int(y[0]) + facet_gap - 1), y[1]) for y in facets['year']] pages = [] for result in solr_response.results: page = models.Page.lookup(result['id']) if not page: continue words = set() coords = solr_response.highlighting[result['id']] for ocr in self._ocr_list: for s in coords.get(ocr) or []: words.update(find_words(s)) page.words = sorted(words, key=lambda v: v.lower()) page.highlight_url = self.highlight_url(page.url, page.words) pages.append(page) solr_page = Page(pages, number, self) solr_page.facets = facets return solr_page
def page(self, number): """ Override the page method in Paginator since Solr has already paginated stuff for us. """ number = self.validate_number(number) # figure out the solr query and execute it solr = SolrConnection(settings.SOLR) # TODO: maybe keep connection around? start = self.per_page * (number - 1) params = { "hl.snippets": 100, # TODO: make this unlimited "hl.requireFieldMatch": 'true', # limits highlighting slop "hl.maxAnalyzedChars": '102400', # increased from default 51200 # "hl.method":'unified' } params.update(self.facet_params) sort_field, sort_order = _get_sort(self.query.get('sort'), in_pages=True) solr_response = solr.query( self._q, fields="*", # highlight=self._ocr_list, highlight='ocr_vector', rows=self.per_page, sort=sort_field, sort_order=sort_order, start=start, **params) solr_facets = solr_response.facet_counts facets = dict() facets['year'] = sorted(solr_facets.get('facet_fields')['year'].items()) facets['lccn'] = sorted(solr_facets.get('facet_fields')['lccn'].items(), lambda x, y: x - y, lambda k: k[1], True) facets['county'] = sorted(solr_facets.get('facet_fields')['county'].items(), lambda x, y: x - y, lambda k: k[1], True) facets['region'] = sorted(solr_facets.get('facet_fields')['region'].items(), lambda x, y: x - y, lambda k: k[1], True) facets['city'] = sorted(solr_facets.get('facet_fields')['city'].items(), lambda x, y: x - y, lambda k: k[1], True) facets['newspaper_type'] = sorted(solr_facets.get('facet_fields')['newspaper_type'].items(), lambda x, y: x - y, lambda k: k[1], True) pages = [] for result in solr_response.results: page = models.Page.lookup(result['id']) if not page: continue words = set() coords = solr_response.highlighting[result['id']] # for ocr in self._ocr_list: for s in coords.get('ocr_vector') or []: words.update(find_words(s)) page.words = sorted(words, key=lambda v: v.lower()) page.highlight_url = self.highlight_url(page.url, page.words, number, len(pages)) pages.append(page) solr_page = Page(pages, number, self) solr_page.facets = facets return solr_page
def page(self, number): """ Override the page method in Paginator since Solr has already paginated stuff for us. """ number = self.validate_number(number) # figure out the solr query and execute it solr = SolrConnection(settings.SOLR) # TODO: maybe keep connection around? start = self.per_page * (number - 1) params = {"hl.snippets": 100, # TODO: make this unlimited "hl.requireFieldMatch": 'true', # limits highlighting slop "hl.maxAnalyzedChars": '102400', # increased from default 51200 } params.update(self.facet_params) sort_field, sort_order = _get_sort(self.query.get('sort'), in_pages=True) solr_response = solr.query(self._q, fields=['id', 'title', 'date', 'month', 'day', 'sequence', 'edition_label', 'section_label'], highlight=self._ocr_list, rows=self.per_page, sort=sort_field, sort_order=sort_order, start=start, **params) solr_facets = solr_response.facet_counts # sort states by number of hits per state (desc) facets = {'state': sorted(solr_facets.get('facet_fields')['state'].items(), lambda x, y: x - y, lambda k: k[1], True), 'year': solr_facets['facet_ranges']['year']['counts'], 'county': sorted(solr_facets.get('facet_fields')['county'].items(), lambda x, y: x - y, lambda k: k[1], True)} # sort by year (desc) facets['year'] = sorted(solr_facets['facet_ranges']['year']['counts'].items(), lambda x, y: int(x) - int(y), lambda k: k[0], True) facet_gap = self.facet_params['f_year_facet_range_gap'] if facet_gap > 1: facets['year'] = [('%s-%d' % (y[0], int(y[0])+facet_gap-1), y[1]) for y in facets['year']] pages = [] for result in solr_response.results: page = models.Page.lookup(result['id']) if not page: continue words = set() coords = solr_response.highlighting[result['id']] for ocr in self._ocr_list: for s in coords.get(ocr) or []: words.update(find_words(s)) page.words = sorted(words, key=lambda v: v.lower()) page.highlight_url = self.highlight_url(page.url, page.words, number, len(pages)) pages.append(page) solr_page = Page(pages, number, self) solr_page.facets = facets return solr_page
def page(self, number): """ Override the page method in Paginator since Solr has already paginated stuff for us. """ number = self.validate_number(number) # figure out the solr query and execute it start = self.per_page * (number - 1) params = { 'fl': 'id,title,date,month,day,sequence,edition_label,section_label', 'hl': 'true', 'hl.snippets': 100, # TODO: make this unlimited 'hl.requireFieldMatch': 'true', # limits highlighting slop 'hl.maxAnalyzedChars': '102400', # increased from default 51200 'hl.fl': ','.join(self._ocr_list), 'rows': self.per_page, 'start': start, } params.update(self.facet_params) sort_field, sort_order = _get_sort(self.query.get('sort'), in_pages=True) if sort_field and sort_order: params['sort'] = '%s %s' % (sort_field, sort_order) solr_response = conn().search(self._q, **params) # Gather facet data from the solr response solr_facets = solr_response.facets field_counts = solr_facets.get('facet_fields') facets = { 'city': _sorted_facet_counts(field_counts, 'city'), 'county': _sorted_facet_counts(field_counts, 'county'), 'frequency': _sorted_facet_counts(field_counts, 'frequency'), 'language': _sorted_facet_counts(field_counts, 'language'), 'state': _sorted_facet_counts(field_counts, 'state'), } # sort by year (desc) facets['year'] = _sorted_facet_counts( solr_facets['facet_ranges']['year'], 'counts') facet_gap = self.facet_params['f.year.facet.range.gap'] if facet_gap > 1: facets['year'] = [('%s-%d' % (y[0], int(y[0]) + facet_gap - 1), y[1]) for y in facets['year']] pages = [] for result in solr_response.docs: page = models.Page.lookup(result['id']) if not page: continue words = set() coords = solr_response.highlighting[result['id']] for ocr in self._ocr_list: for s in coords.get(ocr) or []: words.update(find_words(s)) page.words = sorted(words, key=lambda v: v.lower()) page.highlight_url = self.highlight_url(page.url, page.words) pages.append(page) solr_page = Page(pages, number, self) solr_page.facets = facets return solr_page