def _query_from_search_node(self, search_node, is_not=False): query_list = [] for child in search_node.children: if isinstance(child, SearchNode): query_list.append( self._query_from_search_node(child, child.negated)) else: expression, term = child field, filter_type = search_node.split_expression(expression) # Handle when we've got a ``ValuesListQuerySet``... if hasattr(term, 'values_list'): term = list(term) if isinstance(term, (list, tuple)): term = [_marshal_term(t) for t in term] else: term = _marshal_term(term) if field == 'content': query_list.append(self._content_field(term, is_not)) else: if filter_type == 'contains': query_list.append( self._filter_contains(term, field, is_not)) elif filter_type == 'exact': query_list.append( self._filter_exact(term, field, is_not)) elif filter_type == 'gt': query_list.append(self._filter_gt(term, field, is_not)) elif filter_type == 'gte': query_list.append(self._filter_gte( term, field, is_not)) elif filter_type == 'lt': query_list.append(self._filter_lt(term, field, is_not)) elif filter_type == 'lte': query_list.append(self._filter_lte( term, field, is_not)) elif filter_type == 'startswith': query_list.append( self._filter_startswith(term, field, is_not)) elif filter_type == 'in': query_list.append(self._filter_in(term, field, is_not)) if search_node.connector == 'OR': return xapian.Query(xapian.Query.OP_OR, query_list) else: return xapian.Query(xapian.Query.OP_AND, query_list)
def test_highlight(self): self.assertEqual(self.backend.search(xapian.Query(), highlight=True), { 'hits': 0, 'results': [] }) self.assertEqual( self.backend.search(xapian.Query('indexed'), highlight=True)['hits'], 3) results = self.backend.search(xapian.Query('indexed'), highlight=True)['results'] self.assertEqual([result.highlighted['text'] for result in results], [ '<em>indexed</em>!\n1', '<em>indexed</em>!\n2', '<em>indexed</em>!\n3' ])
def test_date_facets_seconds(self): facets = {'datetime': {'start_date': datetime.datetime(2009, 2, 25, 1, 0, 57), 'end_date': datetime.datetime(2009, 2, 25, 1, 1, 1), 'gap_by': 'second'}} self.assertEqual(self.backend.search(xapian.Query(), date_facets=facets), {'hits': 0, 'results': []}) results = self.backend.search(xapian.Query('indexed'), date_facets=facets) self.assertEqual(results['hits'], 3) self.assertEqual(results['facets']['dates']['datetime'], [ (datetime.datetime(2009, 2, 25, 1, 1, 0), 0), (datetime.datetime(2009, 2, 25, 1, 0, 59), 1), (datetime.datetime(2009, 2, 25, 1, 0, 58), 1), (datetime.datetime(2009, 2, 25, 1, 0, 57), 1), ])
def __init__(self, subcategory=None): self.subcategory = subcategory if subcategory: # this is the set of recommendations for a given subcategory cat_title = u"Recommended For You in %s" % ( subcategory.untranslated_name) tr_title = utf8(_("Recommended For You in %s")) % utf8( subcategory.name) else: # this is the full set of recommendations for e.g. the lobby view cat_title = u"Recommended For You" tr_title = _("Recommended For You") super(RecommendedForYouCategory, self).__init__( cat_title, tr_title, None, xapian.Query(), flags=['available-only', 'not-installed-only'], item_limit=60) self.recommender_agent = RecommenderAgent() self.recommender_agent.connect( "recommend-me", self._recommend_me_result) self.recommender_agent.connect( "error", self._recommender_agent_error) self.recommender_agent.query_recommend_me()
def _add_category_to_query(query): """ helper that adds the current category to the query""" if not category_query: return query return xapian.Query(xapian.Query.OP_AND, category_query, query)
def find(self, wordlist): '''look up all the words in the wordlist. If none are found return an empty dictionary * more rules here ''' if not wordlist: return {} database = self._get_database() enquire = xapian.Enquire(database) stemmer = xapian.Stem("english") terms = [] for term in [ word.upper() for word in wordlist if self.minlength <= len(word) <= self.maxlength ]: if not self.is_stopword(term): terms.append(stemmer(s2b(term.lower()))) query = xapian.Query(xapian.Query.OP_AND, terms) enquire.set_query(query) matches = enquire.get_mset(0, database.get_doccount()) return [tuple(b2s(m.document.get_data()).split(':')) for m in matches]
def axi_search_pkgs(axi, pkgs_list): terms = ["XP" + item for item in pkgs_list] query = xapian.Query(xapian.Query.OP_OR, terms) enquire = xapian.Enquire(axi) enquire.set_query(query) mset = enquire.get_mset(0, axi.get_doccount()) return mset
def __init__(self, untranslated_name, name, iconname, query, only_unallocated=True, dont_display=False, flags=[], subcategories=[], sortmode=SortMethods.BY_ALPHABET, item_limit=0): GObject.GObject.__init__(self) if type(name) == str: self.name = unicode(name, 'utf8').encode('utf8') else: self.name = name.encode('utf8') self.untranslated_name = untranslated_name self.iconname = iconname for subcategory in subcategories: query = xapian.Query(xapian.Query.OP_OR, query, subcategory.query) self.query = query self.only_unallocated = only_unallocated self.subcategories = subcategories self.dont_display = dont_display self.flags = flags self.sortmode = sortmode self.item_limit = item_limit
def test_app_store(self): # get a enquire object enquirer = AppEnquire(self.cache, self.db) enquirer.set_query(xapian.Query("")) # get a AppListStore and run functions on it model = AppListStore(self.db, self.cache, self.icons) # test if set from matches works self.assertEqual(len(model), 0) model.set_from_matches(enquirer.matches) self.assertTrue(len(model) > 0) # ensure the first row has a xapian doc type self.assertEqual(type(model[0][0]), xapian.Document) # lazy loading of the docs self.assertEqual(model[100][0], None) # test the load range stuff model.load_range(indices=[100], step=15) self.assertEqual(type(model[100][0]), xapian.Document) # ensure buffer_icons works and loads stuff into the cache model.buffer_icons() self.assertEqual(len(model.icon_cache), 0) while Gtk.events_pending(): Gtk.main_iteration() self.assertTrue(len(model.icon_cache) > 0) # ensure clear works model.clear() self.assertEqual(model.current_matches, None)
def get_most_popular_applications_for_mimetype(self, mimetype, only_uninstalled=True, num=3): """ return a list of the most popular applications for the given mimetype """ # sort by popularity by default enquire = xapian.Enquire(self.xapiandb) enquire.set_sort_by_value_then_relevance(XapianValues.POPCON) # query mimetype query = xapian.Query("AM%s" % mimetype) enquire.set_query(query) # mset just needs to be "big enough"" matches = enquire.get_mset(0, 100) apps = [] for match in matches: doc = match.document app = Application(self.get_appname(doc), self.get_pkgname(doc), popcon=self.get_popcon(doc)) if only_uninstalled: if app.get_details(self).pkg_state == PkgStates.UNINSTALLED: apps.append(app) else: apps.append(app) if len(apps) == num: break return apps
def _create_query_for_field(self, field, value, analyzer=None): """generate a field query this functions creates a field->value query @param field: the fieldname to be used @type field: str @param value: the wanted value of the field @type value: str @param analyzer: Define query options (partial matching, exact matching, tokenizing, ...) as bitwise combinations of CommonIndexer.ANALYZER_???. This can override previously defined field analyzer settings. If analyzer is None (default), then the configured analyzer for the field is used. @type analyzer: int @return: the resulting query object @rtype: xapian.Query """ if analyzer is None: analyzer = self.analyzer if analyzer == self.ANALYZER_EXACT: # exact matching -> keep special characters return xapian.Query("%s%s" % (field.upper(), value)) # other queries need a parser object qp = xapian.QueryParser() qp.set_database(self.reader) if (analyzer & self.ANALYZER_PARTIAL > 0): # partial matching match_flags = xapian.QueryParser.FLAG_PARTIAL return qp.parse_query(value, match_flags, field.upper()) else: # everything else (not partial and not exact) match_flags = 0 return qp.parse_query(value, match_flags, field.upper())
def get_query_from_search_entry(search_term): import xapian if not search_term: return xapian.Query("") parser = xapian.QueryParser() user_query = parser.parse_query(search_term) return user_query
def __call__(self, begin, end): if len(begin) > 0: try: _begin = int(begin) if _begin < self.low or _begin > self.high: raise ValueError() except: return xapian.Query(xapian.Query.OP_INVALID) if len(end) > 0: try: _end = int(end) if _end < self.low or _end > self.high: raise ValueError() except: return xapian.Query(xapian.Query.OP_INVALID) return self.nrp(begin, end)
def search(): database = xapian.Database('indexes/') enquire = xapian.Enquire(database) running = 1 while int(running): str = raw_input("input the key words:") terms = [] a = jieba.cut_for_search(str) for b in a: terms.append(b.encode("utf-8")) qp = xapian.QueryParser() #建立查询分析 qp.set_database(database) qp.set_default_op(xapian.Query.OP_AND) #设置查询策略 #query = qp.parse_query(terms) query = xapian.Query(xapian.Query.OP_OR, terms) #查询函数,搞不懂 enquire.set_query(query) matches = enquire.get_mset(0, 10) print "%i results found" % matches.get_matches_estimated() for match in matches: a = match.document.get_data() d = eval(a) print "贴吧:", d["title"] print "作者:", d["reply"]["name"] print "回复:", d["reply"]["content"] print "时间:", d["reply"]["time"] running = raw_input("again?(1(yse)/0(no) :") print "thank you for using!"
def test_matchingterms_iter(): """Test Enquire.matching_terms iterator. """ db = setup_database() query = xapian.Query(xapian.Query.OP_OR, ("was", "it", "warm", "two")) enquire = xapian.Enquire(db) enquire.set_query(query) mset = enquire.get_mset(0, 10) for item in mset: # Make a list of the term names mterms = [] for term in enquire.matching_terms(item.docid): mterms.append(term) mterms2 = [] for term in enquire.matching_terms(item): mterms2.append(term) expect(mterms, mterms2) mterms = [] for term in enquire.matching_terms(mset.get_hit(0)): mterms.append(term) expect(mterms, ['it', 'two', 'warm', 'was'])
def get_query_for_cat(self, cat): LOG.debug("self.state.channel: %s" % self.state.channel) if self.state.channel and self.state.channel.query: query = xapian.Query(xapian.Query.OP_AND, cat.query, self.state.channel.query) return query return cat.query
def test_director_exception(): """Test handling of an exception raised in a director. """ db = setup_database() query = xapian.Query('it') enq = xapian.Enquire(db) enq.set_query(query) class TestException(Exception): def __init__(self, a, b): Exception.__init__(self, a + b) rset = xapian.RSet() rset.add_document(1) class EDecider(xapian.ExpandDecider): def __call__(self, term): raise TestException("foo", "bar") edecider = EDecider() expect_exception(TestException, "foobar", edecider, "foo") expect_exception(TestException, "foobar", enq.get_eset, 10, rset, edecider) class MDecider(xapian.MatchDecider): def __call__(self, doc): raise TestException("foo", "bar") mdecider = MDecider() expect_exception(TestException, "foobar", mdecider, xapian.Document()) expect_exception(TestException, "foobar", enq.get_mset, 0, 10, None, mdecider)
def test_eset_iter(): """Test iterators over ESets. """ db = setup_database() query = xapian.Query(xapian.Query.OP_OR, "was", "it") rset = xapian.RSet() rset.add_document(3) context("getting eset items without a query") enquire = xapian.Enquire(db) eset = enquire.get_eset(10, rset) items = [item for item in eset] expect(len(items), 3) expect(len(items), len(eset)) context("getting eset items with a query") enquire = xapian.Enquire(db) enquire.set_query(query) eset = enquire.get_eset(10, rset) items2 = [item for item in eset] expect(len(items2), 2) expect(len(items2), len(eset)) context("comparing eset items with a query to those without") expect(items2[0].term, items[0].term) expect(items2[1].term, items[2].term) context("comparing eset weights with a query to those without") expect(items2[0].weight, items[0].weight) expect(items2[1].weight, items[2].weight)
def test_order_by_django_id(self): """ We need this test because ordering on more than 10 entries was not correct at some point. """ self.sample_objs = [] number_list = list(range(1, 101)) for i in number_list: mock = XapianMockModel() mock.id = i mock.author = 'david%s' % i mock.pub_date = datetime.date(2009, 2, 25) - datetime.timedelta(days=i) mock.exp_date = datetime.date(2009, 2, 23) + datetime.timedelta(days=i) mock.value = i * 5 mock.flag = bool(i % 2) mock.slug = 'http://example.com/%d/' % i mock.url = 'http://example.com/%d/' % i mock.popularity = i * 2 self.sample_objs.append(mock) self.backend.clear() self.backend.update(self.index, self.sample_objs) results = self.backend.search(xapian.Query(''), sort_by=['-django_id']) self.assertEqual(pks(results['results']), list(reversed(number_list)))
def test_more_like_this(self): self.backend.update(self.index, self.sample_objs) self.assertEqual(self.backend.document_count(), 3) results = self.backend.more_like_this(self.sample_objs[0]) self.assertEqual(results['hits'], 2) self.assertEqual([result.pk for result in results['results']], [3, 2]) results = self.backend.more_like_this( self.sample_objs[0], additional_query=xapian.Query('david3')) self.assertEqual(results['hits'], 1) self.assertEqual([result.pk for result in results['results']], [3]) results = self.backend.more_like_this(self.sample_objs[0], limit_to_registered_models=True) self.assertEqual(results['hits'], 2) self.assertEqual([result.pk for result in results['results']], [3, 2]) # Ensure that swapping the ``result_class`` works. self.assertTrue( isinstance( self.backend.more_like_this( self.sample_objs[0], result_class=MockSearchResult)['results'][0], MockSearchResult))
def test_app_enquire(self): db = StoreDatabase(cache=self.cache) db.open() # test the AppEnquire engine enquirer = AppEnquire(self.cache, db) enquirer.set_query(xapian.Query("a"), nonblocking_load=False) self.assertTrue(len(enquirer.get_docids()) > 0)
def test_date_facets_month(self): facets = {'datetime': {'start_date': datetime.datetime(2008, 10, 26), 'end_date': datetime.datetime(2009, 3, 26), 'gap_by': 'month'}} self.assertEqual(self.backend.search(xapian.Query(), date_facets=facets), {'hits': 0, 'results': []}) results = self.backend.search(xapian.Query('indexed'), date_facets=facets) self.assertEqual(results['hits'], 3) self.assertEqual(results['facets']['dates']['datetime'], [ (datetime.datetime(2009, 2, 26, 0, 0), 0), (datetime.datetime(2009, 1, 26, 0, 0), 3), (datetime.datetime(2008, 12, 26, 0, 0), 0), (datetime.datetime(2008, 11, 26, 0, 0), 0), (datetime.datetime(2008, 10, 26, 0, 0), 0), ])
def _get_estimate_nr_apps_and_nr_pkgs(self, enquire, q, xfilter): # filter out docs of pkgs of which there exists a doc of the app enquire.set_query(xapian.Query(xapian.Query.OP_AND, q, xapian.Query("ATapplication"))) try: tmp_matches = enquire.get_mset(0, len(self.db), None, xfilter) except Exception: LOG.exception("_get_estimate_nr_apps_and_nr_pkgs failed") return (0, 0) nr_apps = tmp_matches.get_matches_estimated() enquire.set_query(xapian.Query(xapian.Query.OP_AND_NOT, q, xapian.Query("XD"))) tmp_matches = enquire.get_mset(0, len(self.db), None, xfilter) nr_pkgs = tmp_matches.get_matches_estimated() - nr_apps return (nr_apps, nr_pkgs)
def test_search(self): # no match query self.assertEqual(self.backend.search(xapian.Query()), { 'hits': 0, 'results': [] }) # all match query self.assertEqual(pks(self.backend.search(xapian.Query(''))['results']), [1, 2, 3]) # Other `result_class` self.assertTrue( isinstance( self.backend.search( xapian.Query('indexed'), result_class=XapianMockSearchResult)['results'][0], XapianMockSearchResult))
def _combine(op, query1, query2): if query1: if query2: return xapian.Query(op, query1, query2) else: return query1 else: return query2
def _all_query(self): """ Private method that returns a xapian.Query that returns all documents, Returns: A xapian.Query """ return xapian.Query('')
def test_raise_index_error_on_wrong_field(self): """ Regression test for #109. """ self.assertRaises(InvalidIndexError, self.backend.search, xapian.Query(''), facets=['dsdas'])
def _parse_include_tag(self, element): for include in element.getchildren(): if include.tag == "Or": query = xapian.Query() return self._parse_and_or_not_tag(include, query, xapian.Query.OP_OR) if include.tag == "And": query = xapian.Query("") return self._parse_and_or_not_tag(include, query, xapian.Query.OP_AND) # without "and" tag we take the first entry elif include.tag == "Category": return xapian.Query("AC" + include.text.lower()) else: LOG.warn("UNHANDLED: _parse_include_tag: %s" % include.tag) # empty query matches all return xapian.Query("")
def test_update(self): self.backend.update(self.index, self.sample_objs) self.assertEqual(self.backend.document_count(), 3) self.assertEqual([ result.pk for result in self.backend.search(xapian.Query(''))['results'] ], [1, 2, 3])
def make_query(keywords): if type(keywords) is unicode: keywords = keywords.encode('utf-8', 'ignore') and_query_list = [] keywords = keywords.split(' ') for keyword in keywords: if len(keyword) > 2 and keyword.startswith('"') and keyword.endswith( '"'): and_query_list.append(xapian.Query(keyword[1:-1], 1)) else: t = [] word2dict = seg_txt_2_dict(keyword) for word, value in word2dict.iteritems(): if word != keyword: t.append(xapian.Query(word, 1)) kt = xapian.Query(keyword, 1) if t: if len(t) > 1: query = xapian.Query(xapian.Query.OP_AND, t) query = xapian.Query(xapian.Query.OP_OR, [kt, query]) else: query = xapian.Query(xapian.Query.OP_OR, [kt, t[0]]) else: query = kt and_query_list.append(query) #for i in and_query_list: #print "!!!",i if len(and_query_list) > 1: query = xapian.Query(xapian.Query.OP_AND, and_query_list) else: query = and_query_list[0] return query