def get_enquire(self): enquire = xapian.Enquire(self.database.database) # enquire.set_weighting_scheme(xapian.BoolWeight()) # enquire.set_docid_order(xapian.Enquire.DONT_CARE) # if weighting_scheme: # enquire.set_weighting_scheme(xapian.BM25Weight(*self.weighting_scheme)) enquire.set_query(self.query) spies = {} sort_by = [] warnings = [] if self.facets: for name in self.facets: self.dead or 'alive' # Raises DeadException when needed name = name.strip().lower() slot = get_slot(name) if slot: spy = xapian.ValueCountMatchSpy(slot) enquire.add_matchspy(spy) spies[name] = spy else: warnings.append("Ignored document value name (%r)" % name) if self.sort_by: for sort_field in self.sort_by: self.dead or 'alive' # Raises DeadException when needed if sort_field.startswith('-'): reverse = True sort_field = sort_field[1:] # Strip the '-' else: reverse = False sort_by.append((sort_field, reverse)) sorter = xapian.MultiValueKeyMaker() for name, reverse in sort_by: self.dead or 'alive' # Raises DeadException when needed name = name.strip().lower() slot = get_slot(name) if slot: sorter.add_value(slot, reverse) else: warnings.append("Ignored document value name (%r)" % name) enquire.set_sort_by_key_then_relevance(sorter, self.sort_by_reversed) if self.distinct: if self.distinct is True: field = 'ID' else: field = self.distinct enquire.set_collapse_key(get_slot(field)) self.spies = spies self.warnings = warnings return enquire
def _set_sort_by(self, enquire, sort_by): sorter = xapian.MultiValueKeyMaker() for sort_field in sort_by: if sort_field.startswith('-'): reverse = True sort_field = sort_field[1:] # Strip the '-' else: reverse = False # Reverse is inverted in Xapian -- http://trac.xapian.org/ticket/311 sorter.add_value(self._value_column(sort_field), reverse) enquire.set_sort_by_key(sorter)
def search(dbpath, querystring, offset=0, pagesize=10): # offset - defines starting point within result set # pagesize - defines number of records to retrieve # Open the database we're going to search. db = xapian.Database(dbpath) # Set up a QueryParser with a stemmer and suitable prefixes queryparser = xapian.QueryParser() queryparser.set_stemmer(xapian.Stem("en")) queryparser.set_stemming_strategy(queryparser.STEM_SOME) queryparser.add_prefix("title", "S") queryparser.add_prefix("description", "XD") # And parse the query query = queryparser.parse_query(querystring) # Use an Enquire object on the database to run the query enquire = xapian.Enquire(db) enquire.set_query(query) # Start of example code. keymaker = xapian.MultiValueKeyMaker() keymaker.add_value(1, False) keymaker.add_value(3, True) enquire.set_sort_by_key_then_relevance(keymaker, False) # End of example code. # And print out something about each match matches = [] for match in enquire.get_mset(offset, pagesize): fields = json.loads(match.document.get_data().decode('utf8')) print( u"%(rank)i: #%(docid)3.3i %(name)s %(date)s\n Population %(pop)s" % { 'rank': match.rank + 1, 'docid': match.docid, 'name': fields.get('name', u''), 'date': support.format_date(fields.get('admitted', u'')), 'pop': support.format_numeral(int(fields.get('population', 0))), 'lat': fields.get('latitude', u''), 'lon': fields.get('longitude', u''), }) matches.append(match.docid) # Finally, make sure we log the query and displayed results support.log_matches(querystring, offset, pagesize, matches)
def _enquire(self, request, query, order_by, group_by): enquire = xapian.Enquire(self._db) queries = [] and_not_queries = [] boolean_queries = [] if query: query = self._extract_exact_search_terms(query, request) if query: parser = xapian.QueryParser() parser.set_database(self._db) for name, prop in self._props.items(): if not prop.prefix: continue if prop.boolean: parser.add_boolean_prefix(name, prop.prefix) else: parser.add_prefix(name, prop.prefix) parser.add_prefix('', prop.prefix) if prop.slot is not None and \ prop.typecast in [int, float, bool]: value_range = xapian.NumberValueRangeProcessor( prop.slot, name + ':') parser.add_valuerangeprocessor(value_range) parser.add_prefix('', '') query = parser.parse_query( query, xapian.QueryParser.FLAG_PHRASE | xapian.QueryParser.FLAG_BOOLEAN | xapian.QueryParser.FLAG_LOVEHATE | xapian.QueryParser.FLAG_PARTIAL | xapian.QueryParser.FLAG_WILDCARD | xapian.QueryParser.FLAG_PURE_NOT, '') queries.append(query) for name, value in request.items(): prop = self._props.get(name) if prop is None or not prop.prefix: continue sub_queries = [] not_queries = [] for needle in value if type(value) in (tuple, list) else [value]: if needle is None: continue needle = prop.to_string(needle)[0] if needle.startswith('!'): term = _term(prop.prefix, needle[1:]) not_queries.append(xapian.Query(term)) elif needle.startswith('-'): term = _term(prop.prefix, needle[1:]) and_not_queries.append(xapian.Query(term)) else: term = _term(prop.prefix, needle) sub_queries.append(xapian.Query(term)) if not_queries: not_query = xapian.Query(xapian.Query.OP_AND_NOT, [ xapian.Query(''), xapian.Query(xapian.Query.OP_OR, not_queries) ]) sub_queries.append(not_query) if sub_queries: if len(sub_queries) == 1: query = sub_queries[0] else: query = xapian.Query(xapian.Query.OP_OR, sub_queries) if prop.boolean: boolean_queries.append(query) else: queries.append(query) final = None if queries: final = xapian.Query(xapian.Query.OP_AND, queries) if boolean_queries: query = xapian.Query(xapian.Query.OP_AND, boolean_queries) if final is None: final = query else: final = xapian.Query(xapian.Query.OP_FILTER, [final, query]) if final is None: final = xapian.Query('') for i in and_not_queries: final = xapian.Query(xapian.Query.OP_AND_NOT, [final, i]) enquire.set_query(final) if hasattr(xapian, 'MultiValueKeyMaker'): sorter = xapian.MultiValueKeyMaker() if order_by: if order_by.startswith('+'): reverse = False order_by = order_by[1:] elif order_by.startswith('-'): reverse = True order_by = order_by[1:] else: reverse = False prop = self._props.get(order_by) enforce(prop is not None and prop.slot is not None, 'Cannot sort using %r property of %r', order_by, self.metadata.name) sorter.add_value(prop.slot, reverse) # Sort by ascending GUID to make order predictable all time sorter.add_value(0, False) enquire.set_sort_by_key(sorter, reverse=False) else: _logger.warning('In order to support sorting, ' 'Xapian should be at least 1.2.0') if group_by: prop = self._props.get(group_by) enforce(prop is not None and prop.slot is not None, 'Cannot group by %r property of %r', group_by, self.metadata.name) enquire.set_collapse_key(prop.slot) return enquire
def _blocking_perform_search(self): # WARNING this call may run in a thread, so it's *not* # allowed to touch gtk, otherwise hell breaks loose # performance only: this is only needed to avoid the # python __call__ overhead for each item if we can avoid it # use a unique instance of both enquire and xapian database # so concurrent queries don't result in an inconsistent database # an alternative would be to serialise queries enquire = xapian.Enquire(self.db.xapiandb) if self.filter and self.filter.required: xfilter = self.filter else: xfilter = None # go over the queries self.nr_apps, self.nr_pkgs = 0, 0 _matches = self._matches match_docids = self.match_docids for q in self.search_query: LOG.debug("initial query: '%s'" % q) # for searches we may want to disable show/hide terms = [term for term in q] exact_pkgname_query = (len(terms) == 1 and terms[0].startswith("XP")) # see if we should do a app query and skip the pkg query # see bug #891613 and #1043159 if exact_pkgname_query: with ExecutionTime("de-duplication"): q_app = xapian.Query(terms[0].replace("XP", "AP")) nr_apps, nr_pkgs = self._get_estimate_nr_apps_and_nr_pkgs( enquire, q_app, xfilter) if nr_apps == 1: q = q_app # this is a app query now exact_pkgname_query = False with ExecutionTime("calculate nr_apps and nr_pkgs: "): nr_apps, nr_pkgs = self._get_estimate_nr_apps_and_nr_pkgs( enquire, q, xfilter) self.nr_apps += nr_apps self.nr_pkgs += nr_pkgs # only show apps by default (unless in always visible mode) if self.nonapps_visible != NonAppVisibility.ALWAYS_VISIBLE: if not exact_pkgname_query: q = xapian.Query(xapian.Query.OP_AND, xapian.Query("ATapplication"), q) LOG.debug("nearly completely filtered query: '%s'" % q) # filter out docs of pkgs of which there exists a doc of the app # FIXME: make this configurable again? enquire.set_query(xapian.Query(xapian.Query.OP_AND_NOT, q, xapian.Query("XD"))) # sort results # cataloged time - what's new category if self.sortmode == SortMethods.BY_CATALOGED_TIME: sorter = xapian.MultiValueKeyMaker() if (self.db._axi_values and "catalogedtime" in self.db._axi_values): sorter.add_value( self.db._axi_values["catalogedtime"]) sorter.add_value(XapianValues.DB_CATALOGED_TIME) enquire.set_sort_by_key(sorter, reverse=True) elif self.sortmode == SortMethods.BY_TOP_RATED: from softwarecenter.backend.reviews import get_review_loader review_loader = get_review_loader(self.cache, self.db) sorter = TopRatedSorter(self.db, review_loader) enquire.set_sort_by_key(sorter, reverse=True) # search ranking - when searching elif self.sortmode == SortMethods.BY_SEARCH_RANKING: #enquire.set_sort_by_value(XapianValues.POPCON) # use the default enquire.set_sort_by_relevance() pass # display name - all categories / channels elif (self.db._axi_values and "display_name" in self.db._axi_values): enquire.set_sort_by_key(LocaleSorter(self.db), reverse=False) # fallback to pkgname - if needed? # fallback to pkgname - if needed? else: enquire.set_sort_by_value_then_relevance( XapianValues.PKGNAME, False) #~ try: if self.limit == 0: matches = enquire.get_mset(0, len(self.db), None, xfilter) else: matches = enquire.get_mset(0, self.limit, None, xfilter) LOG.debug("found ~%i matches" % matches.get_matches_estimated()) #~ except: #~ logging.exception("get_mset") #~ matches = [] # promote exact matches to a "app", this will make the # show/hide technical items work correctly if exact_pkgname_query and len(matches) == 1: self.nr_apps += 1 self.nr_pkgs -= 2 # add matches, but don't duplicate docids with ExecutionTime("append new matches to existing ones:"): for match in matches: if not match.docid in match_docids: _matches.append(match) match_docids.add(match.docid) # if we have no results, try forcing pkgs to be displayed # if not NonAppVisibility.NEVER_VISIBLE is set if (not _matches and self.nonapps_visible not in (NonAppVisibility.ALWAYS_VISIBLE, NonAppVisibility.NEVER_VISIBLE)): self.nonapps_visible = NonAppVisibility.ALWAYS_VISIBLE self._blocking_perform_search() # wake up the UI if run in a search thread self._perform_search_complete = True