Ejemplo n.º 1
0
    def more_like_this(self,
                       model_instance,
                       additional_query_string=None,
                       start_offset=0,
                       end_offset=None,
                       models=None,
                       limit_to_registered_models=None,
                       result_class=None,
                       **kwargs):
        if not self.setup_complete:
            self.setup()

        field_name = self.content_field_name
        narrow_queries = set()
        narrowed_results = None
        self.index = self.index.refresh()

        if limit_to_registered_models is None:
            limit_to_registered_models = getattr(
                settings, "HAYSTACK_LIMIT_TO_REGISTERED_MODELS", True)

        if models and len(models):
            model_choices = sorted(get_model_ct(model) for model in models)
        elif limit_to_registered_models:
            # Using narrow queries, limit the results to only models handled
            # with the current routers.
            model_choices = self.build_models_list()
        else:
            model_choices = []

        if len(model_choices) > 0:
            if narrow_queries is None:
                narrow_queries = set()

            narrow_queries.add(" OR ".join(
                ["%s:%s" % (DJANGO_CT, rm) for rm in model_choices]))

        if additional_query_string and additional_query_string != "*":
            narrow_queries.add(additional_query_string)

        narrow_searcher = None

        if narrow_queries is not None:
            # Potentially expensive? I don't see another way to do it in
            # Whoosh...
            narrow_searcher = self.index.searcher()

            for nq in narrow_queries:
                recent_narrowed_results = narrow_searcher.search(
                    self.parser.parse(force_text(nq)), limit=None)

                if len(recent_narrowed_results) <= 0:
                    return {"results": [], "hits": 0}

                if narrowed_results:
                    narrowed_results.filter(recent_narrowed_results)
                else:
                    narrowed_results = recent_narrowed_results

        page_num, page_length = self.calculate_page(start_offset, end_offset)

        self.index = self.index.refresh()
        raw_results = EmptyResults()

        searcher = None
        if self.index.doc_count():
            query = "%s:%s" % (ID, get_identifier(model_instance))
            searcher = self.index.searcher()
            parsed_query = self.parser.parse(query)
            results = searcher.search(parsed_query)

            if len(results):
                raw_results = results[0].more_like_this(field_name,
                                                        top=end_offset)

            # Handle the case where the results have been narrowed.
            if narrowed_results is not None and hasattr(raw_results, "filter"):
                raw_results.filter(narrowed_results)

        try:
            raw_page = ResultsPage(raw_results, page_num, page_length)
        except ValueError:
            if not self.silently_fail:
                raise

            return {"results": [], "hits": 0, "spelling_suggestion": None}

        # Because as of Whoosh 2.5.1, it will return the wrong page of
        # results if you request something too high. :(
        if raw_page.pagenum < page_num:
            return {"results": [], "hits": 0, "spelling_suggestion": None}

        results = self._process_results(raw_page, result_class=result_class)

        if searcher:
            searcher.close()

        if hasattr(narrow_searcher, "close"):
            narrow_searcher.close()

        return results
Ejemplo n.º 2
0
 def more_like_this(self, model_instance, additional_query_string=None,
                    start_offset=0, end_offset=None,
                    limit_to_registered_models=None, result_class=None, **kwargs):
     if not self.setup_complete:
         self.setup()
     
     # Handle deferred models.
     if get_proxied_model and hasattr(model_instance, '_deferred') and model_instance._deferred:
         model_klass = get_proxied_model(model_instance._meta)
     else:
         model_klass = type(model_instance)
     
     field_name = self.content_field_name
     narrow_queries = set()
     narrowed_results = None
     self.index = self.index.refresh()
     
     if limit_to_registered_models is None:
         limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)
     
     if limit_to_registered_models:
         # Using narrow queries, limit the results to only models registered
         # with the current site.
         if narrow_queries is None:
             narrow_queries = set()
         
         registered_models = self.build_models_list()
         
         if len(registered_models) > 0:
             narrow_queries.add(' OR '.join(['%s:%s' % (DJANGO_CT, rm) for rm in registered_models]))
     
     if additional_query_string and additional_query_string != '*':
         narrow_queries.add(additional_query_string)
     
     narrow_searcher = None
     
     if narrow_queries is not None:
         # Potentially expensive? I don't see another way to do it in Whoosh...
         narrow_searcher = self.index.searcher()
         
         for nq in narrow_queries:
             recent_narrowed_results = narrow_searcher.search(self.parser.parse(force_unicode(nq)))
             
             if narrowed_results:
                 narrowed_results.filter(recent_narrowed_results)
             else:
                narrowed_results = recent_narrowed_results
     
     # Prevent against Whoosh throwing an error. Requires an end_offset
     # greater than 0.
     if not end_offset is None and end_offset <= 0:
         end_offset = 1
     
     # Determine the page.
     page_num = 0
     
     if end_offset is None:
         end_offset = 1000000
     
     if start_offset is None:
         start_offset = 0
     
     page_length = end_offset - start_offset
     
     if page_length and page_length > 0:
         page_num = start_offset / page_length
     
     # Increment because Whoosh uses 1-based page numbers.
     page_num += 1
     
     self.index = self.index.refresh()
     raw_results = EmptyResults()
     
     if self.index.doc_count():
         query = "%s:%s" % (ID, get_identifier(model_instance))
         searcher = self.index.searcher()
         parsed_query = self.parser.parse(query)
         results = searcher.search(parsed_query)
         
         if len(results):
             raw_results = results[0].more_like_this(field_name, top=end_offset)
         
         # Handle the case where the results have been narrowed.
         if narrowed_results and hasattr(raw_results, 'filter'):
             raw_results.filter(narrowed_results)
     
     try:
         raw_page = ResultsPage(raw_results, page_num, page_length)
     except ValueError:
         if not self.silently_fail:
             raise
         
         return {
             'results': [],
             'hits': 0,
             'spelling_suggestion': None,
         }
     
     results = self._process_results(raw_page, result_class=result_class)
     searcher.close()
     
     if hasattr(narrow_searcher, 'close'):
         narrow_searcher.close()
     
     return results
Ejemplo n.º 3
0
 def search(self, query_string, sort_by=None, start_offset=0, end_offset=None,
            fields='', highlight=False, facets=None, date_facets=None, query_facets=None,
            narrow_queries=None, spelling_query=None,
            limit_to_registered_models=None, result_class=None, **kwargs):
     if not self.setup_complete:
         self.setup()
     
     # A zero length query should return no results.
     if len(query_string) == 0:
         return {
             'results': [],
             'hits': 0,
         }
     
     query_string = force_unicode(query_string)
     
     # A one-character query (non-wildcard) gets nabbed by a stopwords
     # filter and should yield zero results.
     if len(query_string) <= 1 and query_string != u'*':
         return {
             'results': [],
             'hits': 0,
         }
     
     reverse = False
     
     if sort_by is not None:
         # Determine if we need to reverse the results and if Whoosh can
         # handle what it's being asked to sort by. Reversing is an
         # all-or-nothing action, unfortunately.
         sort_by_list = []
         reverse_counter = 0
         
         for order_by in sort_by:
             if order_by.startswith('-'):
                 reverse_counter += 1
         
         if len(sort_by) > 1 and reverse_counter > 1:
             raise SearchBackendError("Whoosh does not handle more than one field and any field being ordered in reverse.")
         
         for order_by in sort_by:
             if order_by.startswith('-'):
                 sort_by_list.append(order_by[1:])
                 
                 if len(sort_by_list) == 1:
                     reverse = True
             else:
                 sort_by_list.append(order_by)
                 
                 if len(sort_by_list) == 1:
                     reverse = False
             
         sort_by = sort_by_list[0]
     
     if facets is not None:
         warnings.warn("Whoosh does not handle faceting.", Warning, stacklevel=2)
     
     if date_facets is not None:
         warnings.warn("Whoosh does not handle date faceting.", Warning, stacklevel=2)
     
     if query_facets is not None:
         warnings.warn("Whoosh does not handle query faceting.", Warning, stacklevel=2)
     
     narrowed_results = None
     self.index = self.index.refresh()
     
     if limit_to_registered_models is None:
         limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)
     
     if limit_to_registered_models:
         # Using narrow queries, limit the results to only models registered
         # with the current site.
         if narrow_queries is None:
             narrow_queries = set()
         
         registered_models = self.build_registered_models_list()
         
         if len(registered_models) > 0:
             narrow_queries.add(' OR '.join(['%s:%s' % (DJANGO_CT, rm) for rm in registered_models]))
     
     narrow_searcher = None
     
     if narrow_queries is not None:
         # Potentially expensive? I don't see another way to do it in Whoosh...
         narrow_searcher = self.index.searcher()
         
         for nq in narrow_queries:
             recent_narrowed_results = narrow_searcher.search(self.parser.parse(force_unicode(nq)))
             
             if narrowed_results:
                 narrowed_results.filter(recent_narrowed_results)
             else:
                narrowed_results = recent_narrowed_results
     
     self.index = self.index.refresh()
     
     if self.index.doc_count():
         searcher = self.index.searcher()
         parsed_query = self.parser.parse(query_string)
         
         # In the event of an invalid/stopworded query, recover gracefully.
         if parsed_query is None:
             return {
                 'results': [],
                 'hits': 0,
             }
         
         # Prevent against Whoosh throwing an error. Requires an end_offset
         # greater than 0.
         if not end_offset is None and end_offset <= 0:
             end_offset = 1
         
         raw_results = searcher.search(parsed_query, limit=end_offset, sortedby=sort_by, reverse=reverse)
         
         # Handle the case where the results have been narrowed.
         if narrowed_results:
             raw_results.filter(narrowed_results)
         
         # Determine the page.
         page_num = 0
         
         if end_offset is None:
             end_offset = 1000000
         
         if start_offset is None:
             start_offset = 0
         
         page_length = end_offset - start_offset
         
         if page_length and page_length > 0:
             page_num = start_offset / page_length
         
         # Increment because Whoosh uses 1-based page numbers.
         page_num += 1
         
         try:
             raw_page = ResultsPage(raw_results, page_num, page_length)
         except ValueError:
             return {
                 'results': [],
                 'hits': 0,
                 'spelling_suggestion': None,
             }
         
         results = self._process_results(raw_page, highlight=highlight, query_string=query_string, spelling_query=spelling_query, result_class=result_class)
         searcher.close()
         
         if hasattr(narrow_searcher, 'close'):
             narrow_searcher.close()
         
         return results
     else:
         if getattr(settings, 'HAYSTACK_INCLUDE_SPELLING', False):
             if spelling_query:
                 spelling_suggestion = self.create_spelling_suggestion(spelling_query)
             else:
                 spelling_suggestion = self.create_spelling_suggestion(query_string)
         else:
             spelling_suggestion = None
         
         return {
             'results': [],
             'hits': 0,
             'spelling_suggestion': spelling_suggestion,
         }
Ejemplo n.º 4
0
    def more_like_this(self, model_instance, additional_query_string=None,
                       start_offset=0, end_offset=None, models=None,
                       limit_to_registered_models=None, result_class=None, **kwargs):
        if not self.setup_complete:
            self.setup()

        # Deferred models will have a different class ("RealClass_Deferred_fieldname")
        # which won't be in our registry:
        model_klass = model_instance._meta.concrete_model

        field_name = self.content_field_name
        narrow_queries = set()
        narrowed_results = None
        self.index = self.index.refresh()

        if limit_to_registered_models is None:
            limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)

        if models and len(models):
            model_choices = sorted(['%s.%s' % (model._meta.app_label, model._meta.module_name) for model in models])
        elif limit_to_registered_models:
            # Using narrow queries, limit the results to only models handled
            # with the current routers.
            model_choices = self.build_models_list()
        else:
            model_choices = []

        if len(model_choices) > 0:
            if narrow_queries is None:
                narrow_queries = set()

            narrow_queries.add(' OR '.join(['%s:%s' % (DJANGO_CT, rm) for rm in model_choices]))

        if additional_query_string and additional_query_string != '*':
            narrow_queries.add(additional_query_string)

        narrow_searcher = None
        
        #added by gan
        narrow_queries = None

        if narrow_queries is not None:
            # Potentially expensive? I don't see another way to do it in Whoosh...
            narrow_searcher = self.index.searcher(weighting=self.weight_score)

            for nq in narrow_queries:
                recent_narrowed_results = narrow_searcher.search(self.parser.parse(force_unicode(nq)))

                if len(recent_narrowed_results) <= 0:
                    return {
                        'results': [],
                        'hits': 0,
                    }

                if narrowed_results:
                    narrowed_results.filter(recent_narrowed_results)
                else:
                   narrowed_results = recent_narrowed_results

        # Prevent against Whoosh throwing an error. Requires an end_offset
        # greater than 0.
        if not end_offset is None and end_offset <= 0:
            end_offset = 1

        # Determine the page.
        page_num = 0

        if end_offset is None:
            end_offset = 1000000

        if start_offset is None:
            start_offset = 0

        page_length = end_offset - start_offset

        if page_length and page_length > 0:
            page_num = start_offset / page_length

        # Increment because Whoosh uses 1-based page numbers.
        page_num += 1

        self.index = self.index.refresh()
        raw_results = EmptyResults()

        if self.index.doc_count():
            query = "%s:%s" % (ID, get_identifier(model_instance))
            searcher = self.index.searcher(weighting=self.weight_score)
            parsed_query = self.parser.parse(query)
            results = searcher.search(parsed_query)

            #print 'begin.......'
            #for keyword, score in results.key_terms("text", docs=20, numterms=10):
            #    print keyword, score

            if len(results):
                raw_results = results[0].more_like_this(field_name, top=end_offset)

            # Handle the case where the results have been narrowed.
            if narrowed_results is not None and hasattr(raw_results, 'filter'):
                raw_results.filter(narrowed_results)

        try:
            raw_page = ResultsPage(raw_results, page_num, page_length)
        except ValueError:
            if not self.silently_fail:
                raise

            return {
                'results': [],
                'hits': 0,
                'spelling_suggestion': None,
            }

        results = self._process_results(raw_page, result_class=result_class, s=searcher)
        searcher.close()

        if hasattr(narrow_searcher, 'close'):
            narrow_searcher.close()

        return results