def test_missing_object(self): awol1 = SearchResult('core', 'mockmodel', '1000000', 2) self.assertEqual(awol1.app_label, 'core') self.assertEqual(awol1.model_name, 'mockmodel') self.assertEqual(awol1.pk, '1000000') self.assertEqual(awol1.score, 2) awol2 = SearchResult('core', 'yetanothermockmodel', '1000000', 2) self.assertEqual(awol2.app_label, 'core') self.assertEqual(awol2.model_name, 'yetanothermockmodel') self.assertEqual(awol2.pk, '1000000') self.assertEqual(awol2.score, 2) # Failed lookups should fail gracefully. CaptureHandler.logs_seen = [] self.assertEqual(awol1.model, MockModel) self.assertEqual(awol1.object, None) self.assertEqual(awol1.verbose_name, u'Mock model') self.assertEqual(awol1.verbose_name_plural, u'Mock models') self.assertEqual(awol1.stored, None) self.assertEqual(len(CaptureHandler.logs_seen), 4) CaptureHandler.logs_seen = [] self.assertEqual(awol2.model, None) self.assertEqual(awol2.object, None) self.assertEqual(awol2.verbose_name, u'') self.assertEqual(awol2.verbose_name_plural, u'') self.assertEqual(awol2.stored, None) self.assertEqual(len(CaptureHandler.logs_seen), 12)
def setUp(self): faq1 = {} faq1['faq_category_desc_lang_en'] = 'Bad Question' faq1['id'] = 'help.faq.1' faq1['app_label'] = 'help' faq1['pk'] = '1' faq1['model_name'] = 'faq' faq1['faq_question_lang_en'] = 'How old are you?' faq1['faq_answer_lang_en'] = 'None of your business!' f1 = SearchResult('help', 'help.faq', 1, None, _stored_fields=faq1) faq2 = {} faq2['faq_category_desc_lang_en'] = 'Bad Question' faq2['id'] = 'help.faq.2' faq2['app_label'] = 'help' faq2['pk'] = '2' faq2['model_name'] = 'faq' faq2['faq_question_lang_en'] = 'What is wrong with you?' faq2['faq_answer_lang_en'] = '#$%@!' f2 = SearchResult('help', 'help.faq', 2, None, _stored_fields=faq2) faq3 = {} faq3['faq_category_desc_lang_en'] = 'Good Question' faq3['id'] = 'help.faq.3' faq3['app_label'] = 'help' faq3['pk'] = '3' faq3['model_name'] = 'faq' faq3['faq_question_lang_en'] = 'What is for lunch?' faq3['faq_answer_lang_en'] = 'Pizza!' f3 = SearchResult('help', 'help.faq', 3, None, _stored_fields=faq3) # real query will be sorted by category like these results self.qresult = [f1, f2, f3]
def test_missing_object(self): awol1 = SearchResult("core", "mockmodel", "1000000", 2) self.assertEqual(awol1.app_label, "core") self.assertEqual(awol1.model_name, "mockmodel") self.assertEqual(awol1.pk, "1000000") self.assertEqual(awol1.score, 2) awol2 = SearchResult("core", "yetanothermockmodel", "1000000", 2) self.assertEqual(awol2.app_label, "core") self.assertEqual(awol2.model_name, "yetanothermockmodel") self.assertEqual(awol2.pk, "1000000") self.assertEqual(awol2.score, 2) # Failed lookups should fail gracefully. CaptureHandler.logs_seen = [] self.assertEqual(awol1.model, MockModel) self.assertEqual(awol1.object, None) self.assertEqual(awol1.verbose_name, "Mock model") self.assertEqual(awol1.verbose_name_plural, "Mock models") self.assertEqual(awol1.stored, None) self.assertEqual(len(CaptureHandler.logs_seen), 4) CaptureHandler.logs_seen = [] self.assertEqual(awol2.model, None) self.assertEqual(awol2.object, None) self.assertEqual(awol2.verbose_name, "") self.assertEqual(awol2.verbose_name_plural, "") self.assertEqual(awol2.stored, None) self.assertEqual(len(CaptureHandler.logs_seen), 12)
def search(self, query_string, sort_by=None, start_offset=0, end_offset=None, fields='', highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, limit_to_registered_models=None, **kwargs): hits = 0 results = [] if query_string: for model in self.site.get_indexed_models(): if query_string == '*': qs = model.objects.all() else: for term in query_string.split(): queries = [] for field in model._meta._fields(): if hasattr(field, 'related'): continue if not field.get_internal_type() in ( 'TextField', 'CharField', 'SlugField'): continue queries.append( Q(**{'%s__icontains' % field.name: term})) qs = model.objects.filter( reduce(lambda x, y: x | y, queries)) hits += len(qs) for match in qs: del match.app_label del match.model_name result = SearchResult(match._meta.app_label, match._meta.module_name, match.pk, 0, **match.__dict__) # For efficiency. result._model = match.__class__ result._object = match results.append(result) return { 'results': results, 'hits': hits, }
def iterator(self): for match in QuerySet.iterator(self): obj = match.content_object if obj is None: continue kwargs = dict() for key, value in match.document.iteritems(): kwargs[str(key)] = value result = SearchResult(obj._meta.app_label, obj._meta.module_name, obj.pk, 0, **kwargs) # For efficiency. result._model = obj.__class__ result._object = obj yield result
def process_documents(self, doclist): # TODO: tame import spaghetti from haystack import connections engine = connections["default"] unified_index = engine.get_unified_index() indexed_models = unified_index.get_indexed_models() for raw_result in doclist: raw_result = raw_result["_source"] app_label, model_name = raw_result[DJANGO_CT].split('.') additional_fields = {} model = haystack_get_model(app_label, model_name) if model and model in indexed_models: for key, value in raw_result.items(): index = unified_index.get_index(model) string_key = str(key) if string_key in index.fields and hasattr( index.fields[string_key], 'convert'): additional_fields[string_key] = index.fields[ string_key].convert(value) result = SearchResult(app_label, model_name, raw_result[DJANGO_ID], 1, **additional_fields) yield result
def process_documents(self, doclist, raw_results): # TODO: tame import spaghetti from haystack import connections engine = connections["default"] conn = engine.get_backend().conn unified_index = engine.get_unified_index() indexed_models = unified_index.get_indexed_models() for raw_result in doclist: app_label, model_name = raw_result[DJANGO_CT].split('.') additional_fields = {} model = get_model(app_label, model_name) if model and model in indexed_models: for key, value in raw_result.items(): index = unified_index.get_index(model) string_key = str(key) if string_key in index.fields and hasattr(index.fields[string_key], 'convert'): additional_fields[string_key] = index.fields[string_key].convert(value) else: additional_fields[string_key] = conn._to_python(value) del(additional_fields[DJANGO_CT]) del(additional_fields[DJANGO_ID]) del(additional_fields['score']) if raw_result[ID] in getattr(raw_results, 'highlighting', {}): additional_fields['highlighted'] = raw_results.highlighting[raw_result[ID]] result = SearchResult(app_label, model_name, raw_result[DJANGO_ID], raw_result['score'], **additional_fields) yield result
def get_data(self, result): if result: result = self.fix_data(result) result['model_name'] = result.pop('module_name') result['score'] = 0 return SearchResult(**result) else: raise StopIteration
def setUp(self): glossary1 = {} glossary1['glossary_term_lang_en'] = 'Cats' glossary1['id'] = 'help.glossary.1' glossary1['app_label'] = 'help' glossary1['glossary_term_lang_en'] = 'Cats' glossary1['pk'] = '1' glossary1['model_name'] = 'glossary' glossary1['glossary_description_lang_en'] = 'All about Cats' g1 = SearchResult('help', 'help.glossary', 1, None, _stored_fields=glossary1) glossary2 = {} glossary2['glossary_term_lang_en'] = 'Dogs' glossary2['id'] = 'help.glossary.2' glossary2['app_label'] = 'help' glossary2['glossary_term_lang_en'] = 'Dogs' glossary2['pk'] = '2' glossary2['model_name'] = 'glossary' glossary2['glossary_description_lang_en'] = 'All about Dogs' g2 = SearchResult('help', 'help.glossary', 2, None, _stored_fields=glossary2) glossary3 = {} glossary3['glossary_term_lang_en'] = 'Anteaters' glossary3['id'] = 'help.glossary.3' glossary3['app_label'] = 'help' glossary3['glossary_term_lang_en'] = 'Anteaters' glossary3['pk'] = '3' glossary3['model_name'] = 'glossary' glossary3['glossary_description_lang_en'] = 'All about Anteaters' g3 = SearchResult('help', 'help.glossary', 3, None, _stored_fields=glossary3) self.qresult = [g1, g2, g3]
def test_pickling(self): pickle_me_1 = SearchResult("core", "mockmodel", "1000000", 2) picklicious = pickle.dumps(pickle_me_1) pickle_me_2 = pickle.loads(picklicious) self.assertEqual(pickle_me_1.app_label, pickle_me_2.app_label) self.assertEqual(pickle_me_1.model_name, pickle_me_2.model_name) self.assertEqual(pickle_me_1.pk, pickle_me_2.pk) self.assertEqual(pickle_me_1.score, pickle_me_2.score)
def test_read_queryset(self): # The model is flagged deleted so not returned by the default manager. deleted1 = SearchResult("core", "afifthmockmodel", 2, 2) self.assertEqual(deleted1.object, None) # Stow. old_unified_index = connections["default"]._index ui = UnifiedIndex() ui.document_field = "author" ui.build(indexes=[ReadQuerySetTestSearchIndex()]) connections["default"]._index = ui # The soft delete manager returns the object. deleted2 = SearchResult("core", "afifthmockmodel", 2, 2) self.assertNotEqual(deleted2.object, None) self.assertEqual(deleted2.object.author, "sam2") # Restore. connections["default"]._index = old_unified_index
def search(self, query_string, sort_by=None, start_offset=0, end_offset=None, fields='', highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, limit_to_registered_models=None, **kwargs): hits = 0 results = [] if query_string: for model in self.site.get_indexed_models(): if query_string == '*': qs = model.objects.all() else: for term in query_string.split(): queries = [] for field in model._meta._fields(): if hasattr(field, 'related'): continue if not field.get_internal_type() in ('TextField', 'CharField', 'SlugField'): continue queries.append(Q(**{'%s__icontains' % field.name: term})) qs = model.objects.filter(reduce(lambda x, y: x|y, queries)) hits += len(qs) for match in qs: del match.app_label del match.model_name result = SearchResult(match._meta.app_label, match._meta.module_name, match.pk, 0, **match.__dict__) # For efficiency. result._model = match.__class__ result._object = match results.append(result) return { 'results': results, 'hits': hits, }
def test_read_queryset(self): # The model is flagged deleted so not returned by the default manager deleted1 = SearchResult('core', 'afifthmockmodel', 2, 2) self.assertEqual(deleted1.object, None) import haystack from haystack.sites import SearchSite # stow old_site = haystack.site test_site = SearchSite() haystack.site = test_site haystack.site.register(AFifthMockModel, ReadQuerySetTestSearchIndex) # The soft delete manager returns the object deleted2 = SearchResult('core', 'afifthmockmodel', 2, 2) self.assertNotEqual(deleted2.object, None) self.assertEqual(deleted2.object.author, 'sam2') # restore haystack.site = old_site
def _process_results(self, raw_page, highlight=False, query_string='', spelling_query=None): from haystack import site results = [] # It's important to grab the hits first before slicing. Otherwise, this # can cause pagination failures. hits = len(raw_page) facets = {} spelling_suggestion = None indexed_models = site.get_indexed_models() for doc_offset, raw_result in enumerate(raw_page): score = raw_page.score(doc_offset) or 0 app_label, model_name = raw_result['django_ct'].split('.') additional_fields = {} model = get_model(app_label, model_name) if model and model in indexed_models: for key, value in raw_result.items(): index = site.get_index(model) string_key = str(key) if string_key in index.fields and hasattr(index.fields[string_key], 'convert'): # Special-cased due to the nature of KEYWORD fields. if isinstance(index.fields[string_key], MultiValueField): if value is None or len(value) is 0: additional_fields[string_key] = [] else: additional_fields[string_key] = value.split(',') else: additional_fields[string_key] = index.fields[string_key].convert(value) else: additional_fields[string_key] = self._to_python(value) del(additional_fields['django_ct']) del(additional_fields['django_id']) if highlight: from whoosh import analysis from whoosh.highlight import highlight, ContextFragmenter, UppercaseFormatter sa = analysis.StemmingAnalyzer() terms = [term.replace('*', '') for term in query_string.split()] additional_fields['highlighted'] = { self.content_field_name: [highlight(additional_fields.get(self.content_field_name), terms, sa, ContextFragmenter(terms), UppercaseFormatter())], } result = SearchResult(app_label, model_name, raw_result['django_id'], score, **additional_fields) results.append(result) else: hits -= 1 if getattr(settings, 'HAYSTACK_INCLUDE_SPELLING', False): if spelling_query: spelling_suggestion = self.create_spelling_suggestion(spelling_query) else: spelling_suggestion = self.create_spelling_suggestion(query_string) return { 'results': results, 'hits': hits, 'facets': facets, 'spelling_suggestion': spelling_suggestion, }
def more_like_this(self, model_instance, additional_query=None, start_offset=0, end_offset=None, limit_to_registered_models=True, **kwargs): """ Given a model instance, returns a result set of similar documents. Required arguments: `model_instance` -- The model instance to use as a basis for retrieving similar documents. Optional arguments: `additional_query` -- An additional query to narrow results `start_offset` -- The starting offset (default=0) `end_offset` -- The ending offset (default=None), if None, then all documents `limit_to_registered_models` -- Limit returned results to models registered in the current `SearchSite` (default = True) Returns: A dictionary with the following keys: `results` -- A list of `SearchResult` `hits` -- The total available results Opens a database connection, then builds a simple query using the `model_instance` to build the unique identifier. For each document retrieved(should always be one), adds an entry into an RSet (relevance set) with the document id, then, uses the RSet to query for an ESet (A set of terms that can be used to suggest expansions to the original query), omitting any document that was in the original query. Finally, processes the resulting matches and returns. """ database = self._database() query = xapian.Query(DOCUMENT_ID_TERM_PREFIX + get_identifier(model_instance)) enquire = xapian.Enquire(database) enquire.set_query(query) rset = xapian.RSet() if not end_offset: end_offset = database.get_doccount() for match in self._get_enquire_mset(database, enquire, 0, end_offset): rset.add_document(match.docid) query = xapian.Query(xapian.Query.OP_ELITE_SET, [ expand.term for expand in enquire.get_eset( match.document.termlist_count(), rset, XHExpandDecider()) ], match.document.termlist_count()) query = xapian.Query( xapian.Query.OP_AND_NOT, [query, DOCUMENT_ID_TERM_PREFIX + get_identifier(model_instance)]) if limit_to_registered_models: registered_models = self.build_registered_models_list() if len(registered_models) > 0: query = xapian.Query( xapian.Query.OP_AND, query, xapian.Query(xapian.Query.OP_OR, [ xapian.Query('%s%s' % (DOCUMENT_CT_TERM_PREFIX, model)) for model in registered_models ])) if additional_query: query = xapian.Query(xapian.Query.OP_AND, query, additional_query) enquire.set_query(query) results = [] matches = self._get_enquire_mset(database, enquire, start_offset, end_offset) for match in matches: app_label, module_name, pk, model_data = pickle.loads( self._get_document_data(database, match.document)) results.append( SearchResult(app_label, module_name, pk, match.percent, **model_data)) return { 'results': results, 'hits': self._get_hit_count(database, enquire), 'facets': { 'fields': {}, 'dates': {}, 'queries': {}, }, 'spelling_suggestion': None, }
def _process_results(self, raw_results, highlight=False): from haystack import site results = [] hits = raw_results.hits facets = {} spelling_suggestion = None if hasattr(raw_results, 'facets'): facets = { 'fields': raw_results.facets.get('facet_fields', {}), 'dates': raw_results.facets.get('facet_dates', {}), 'queries': raw_results.facets.get('facet_queries', {}), } for key in ['fields']: for facet_field in facets[key]: # Convert to a two-tuple, as Solr's json format returns a list of # pairs. facets[key][facet_field] = zip(facets[key][facet_field][::2], facets[key][facet_field][1::2]) if getattr(settings, 'HAYSTACK_INCLUDE_SPELLING', False) is True: if hasattr(raw_results, 'spellcheck'): if len(raw_results.spellcheck.get('suggestions', [])): # For some reason, it's an array of pairs. Pull off the # collated result from the end. spelling_suggestion = raw_results.spellcheck.get('suggestions')[-1] indexed_models = site.get_indexed_models() for raw_result in raw_results.docs: app_label, model_name = raw_result[DJANGO_CT].split('.') additional_fields = {} model = get_model(app_label, model_name) if model and model in indexed_models: for key, value in raw_result.items(): index = site.get_index(model) string_key = str(key) if string_key in index.fields and hasattr(index.fields[string_key], 'convert'): additional_fields[string_key] = index.fields[string_key].convert(value) else: additional_fields[string_key] = self.conn._to_python(value) del(additional_fields[DJANGO_CT]) del(additional_fields[DJANGO_ID]) del(additional_fields['score']) if raw_result[ID] in getattr(raw_results, 'highlighting', {}): additional_fields['highlighted'] = raw_results.highlighting[raw_result[ID]] result = SearchResult(app_label, model_name, raw_result[DJANGO_ID], raw_result['score'], **additional_fields) results.append(result) else: hits -= 1 return { 'results': results, 'hits': hits, 'facets': facets, 'spelling_suggestion': spelling_suggestion, }
def search(self, query, sort_by=None, start_offset=0, end_offset=None, fields='', highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, limit_to_registered_models=True, **kwargs): """ Executes the Xapian::query as defined in `query`. Required arguments: `query` -- Search query to execute Optional arguments: `sort_by` -- Sort results by specified field (default = None) `start_offset` -- Slice results from `start_offset` (default = 0) `end_offset` -- Slice results at `end_offset` (default = None), if None, then all documents `fields` -- Filter results on `fields` (default = '') `highlight` -- Highlight terms in results (default = False) `facets` -- Facet results on fields (default = None) `date_facets` -- Facet results on date ranges (default = None) `query_facets` -- Facet results on queries (default = None) `narrow_queries` -- Narrow queries (default = None) `spelling_query` -- An optional query to execute spelling suggestion on `limit_to_registered_models` -- Limit returned results to models registered in the current `SearchSite` (default = True) Returns: A dictionary with the following keys: `results` -- A list of `SearchResult` `hits` -- The total available results `facets` - A dictionary of facets with the following keys: `fields` -- A list of field facets `dates` -- A list of date facets `queries` -- A list of query facets If faceting was not used, the `facets` key will not be present If `query` is None, returns no results. If `HAYSTACK_INCLUDE_SPELLING` was enabled in `settings.py`, the extra flag `FLAG_SPELLING_CORRECTION` will be passed to the query parser and any suggestions for spell correction will be returned as well as the results. """ if xapian.Query.empty(query): return { 'results': [], 'hits': 0, } database = self._database() if getattr(settings, 'HAYSTACK_INCLUDE_SPELLING', False) is True: spelling_suggestion = self._do_spelling_suggestion( database, query, spelling_query) else: spelling_suggestion = '' if narrow_queries is not None: query = xapian.Query( xapian.Query.OP_AND, query, xapian.Query(xapian.Query.OP_OR, [ self.parse_query(narrow_query) for narrow_query in narrow_queries ])) if limit_to_registered_models: registered_models = self.build_registered_models_list() if len(registered_models) > 0: query = xapian.Query( xapian.Query.OP_AND, query, xapian.Query(xapian.Query.OP_OR, [ xapian.Query('%s%s' % (DOCUMENT_CT_TERM_PREFIX, model)) for model in registered_models ])) enquire = xapian.Enquire(database) enquire.set_query(query) if sort_by: sorter = xapian.MultiValueSorter() for sort_field in sort_by: if sort_field.startswith('-'): reverse = True sort_field = sort_field[1:] # Strip the '-' else: reverse = False # Reverse is inverted in Xapian -- http://trac.xapian.org/ticket/311 sorter.add(self._value_column(sort_field), reverse) enquire.set_sort_by_key_then_relevance(sorter, True) results = [] facets_dict = { 'fields': {}, 'dates': {}, 'queries': {}, } if not end_offset: end_offset = database.get_doccount() - start_offset matches = self._get_enquire_mset(database, enquire, start_offset, end_offset) for match in matches: app_label, module_name, pk, model_data = pickle.loads( self._get_document_data(database, match.document)) if highlight: model_data['highlighted'] = { self.content_field_name: self._do_highlight(model_data.get(self.content_field_name), query) } results.append( SearchResult(app_label, module_name, pk, match.percent, **model_data)) if facets: facets_dict['fields'] = self._do_field_facets(results, facets) if date_facets: facets_dict['dates'] = self._do_date_facets(results, date_facets) if query_facets: facets_dict['queries'] = self._do_query_facets( results, query_facets) return { 'results': results, 'hits': self._get_hit_count(database, enquire), 'facets': facets_dict, 'spelling_suggestion': spelling_suggestion, }