def is_xapiancachedb_need_update(self): xapian_srcFile = XAPIAN_DB_SOURCE_PATH xapian_destFile = os.path.join(UKSC_CACHE_DIR, "xapiandb") try: src_xapiandb = xapian.Database(xapian_srcFile) new_enquire = xapian.Enquire(src_xapiandb) new_query = xapian.Query("the_#ukxapiandb#_version") new_enquire.set_query(new_query) new_matches = new_enquire.get_mset(0, 1) for new_item in new_matches: new_doc = new_item.document if new_doc.get_data() == "XAPIANDB_VERSION": new_version = new_doc.get_value( 1) #valueslot:1 xapiandb version des_xapiandb = xapian.Database(xapian_destFile) old_enquire = xapian.Enquire(des_xapiandb) old_query = xapian.Query("the_#ukxapiandb#_version") old_enquire.set_query(old_query) old_matches = old_enquire.get_mset(0, 1) for old_item in old_matches: old_doc = old_item.document old_version = old_doc.get_value( 1) #valueslot:1 xapiandb version #if (Globals.DEBUG_SWITCH): print(("old xapiandb version:", old_version, " new xapiandb version:", new_version)) except: return True else: if (new_version > old_version): return True else: return False
def reload_database(self): # {{{ ''' reload the database. ''' # create the xapian handlers self.database_handle = xapian.WritableDatabase( self.database, xapian.DB_CREATE_OR_OPEN) self.query_parser = xapian.QueryParser() # needed for incremental search self.query_parser.set_database(self.database_handle) self.query_parser.set_stemmer(xapian.Stem(self.language)) self.query_parser.set_stemming_strategy(self.query_parser.STEM_SOME) self.query_parser.add_prefix("title", "S") self.term_generator = xapian.TermGenerator() self.term_generator.set_stemmer(xapian.Stem(self.language)) try: self.term_generator.set_stemming_strategy( self.term_generator.STEM_SOME) except AttributeError: pass self.enquire = xapian.Enquire(self.database_handle) self.sorted_e = xapian.Enquire(self.database_handle) # Value 2 is the lowercase form of the title self.sorted_e.set_sort_by_value(2, False)
def test_eset_iter(): """Test iterators over ESets. """ db = setup_database() query = xapian.Query(xapian.Query.OP_OR, "was", "it") rset = xapian.RSet() rset.add_document(3) context("getting eset items without a query") enquire = xapian.Enquire(db) eset = enquire.get_eset(10, rset) items = [item for item in eset] expect(len(items), 3) expect(len(items), len(eset)) context("getting eset items with a query") enquire = xapian.Enquire(db) enquire.set_query(query) eset = enquire.get_eset(10, rset) items2 = [item for item in eset] expect(len(items2), 2) expect(len(items2), len(eset)) context("comparing eset items with a query to those without") expect(items2[0].term, items[0].term) expect(items2[1].term, items[2].term) context("comparing eset weights with a query to those without") expect(items2[0].weight, items[0].weight) expect(items2[1].weight, items[2].weight)
def test_weight_normalise(): """Test normalising of query weights using the OP_SCALE_WEIGHT feature. This test first runs a search (asking for no results) to get the maximum possible weight for a query, and then checks that the results of MSet.get_max_possible() match this. This tests that the get_max_possible() value is correct (though it isn't guaranteed to be at a tight bound), and that the SCALE_WEIGHT query can compensate correctly. """ db = setup_database() for query in ( "it", "was", "it was", "it was four", "it was four five", "\"was it warm\" four notpresent", "notpresent", ): context( "checking query %r using OP_SCALE_WEIGHT to normalise the weights" % query) qp = xapian.QueryParser() query1 = qp.parse_query(query) enquire = xapian.Enquire(db) enquire.set_query(query1) mset1 = enquire.get_mset(0, 0) # Check the max_attained value is 0 - this gives us some reassurance # that the match didn't actually do the work of calculating any # results. expect(mset1.get_max_attained(), 0) max_possible = mset1.get_max_possible() if query == "notpresent": expect(max_possible, 0) continue mult = 1.0 / max_possible query2 = xapian.Query(xapian.Query.OP_SCALE_WEIGHT, query1, mult) enquire = xapian.Enquire(db) enquire.set_query(query2) mset2 = enquire.get_mset(0, 10) # max_possible should be 1 (excluding rounding errors) for mset2 expect(int(mset2.get_max_possible() * 1000000.0 + 0.5), 1000000) for item in mset2: expect(item.weight > 0, True) expect(item.weight <= 1, True)
def xapian_get_bitset(index, query): """ Queries a Xapian index. Returns: an intbitset containing all record ids """ if not DATABASES: xapian_init_databases() result = intbitset() database = DATABASES[index] enquire = xapian.Enquire(database) query_string = query qp = xapian.QueryParser() stemmer = xapian.Stem("english") qp.set_stemmer(stemmer) qp.set_database(database) qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME) pattern = qp.parse_query(query_string, xapian.QueryParser.FLAG_PHRASE) enquire.set_query(pattern) matches = enquire.get_mset(0, database.get_lastdocid()) for match in matches: result.add(match.docid) return result
def __init__(self, dbpath='simplehaha'): database = xapian.Database(dbpath) enquire = xapian.Enquire(database) qp = xapian.QueryParser() stemmer = xapian.Stem("english") qp.set_stemmer(stemmer) qp.set_database(database) qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME) self.qp = qp self.enquire = enquire self.emotionvi = 0 self.keywordsvi = 1 self.timestampvi = 2 self.loctvi = 3 self.reploctvi = 4 self.emotiononlyvi = 5 #usernamevi = 6 self.hashtagsvi = 7 #uidvi = 8 #repnameslistvi = 9 #widvi = 10 self.maxitems = 1000000000 pool = redis.ConnectionPool(host='localhost', port=6379, db=1) self.r = redis.Redis(connection_pool=pool) self.r.flushdb() self.lowkeywords_set_rds = 'lowkeywords' pool1 = redis.ConnectionPool(host='localhost', port=6379, db=2) self.r1 = redis.Redis(connection_pool=pool1) self.r1.flushdb() self.keywords_hash_rds = 'keywords_hash'
def test_reinstall_purchased_xapian(self): db = StoreDatabase("/var/cache/software-center/xapian", self.cache) db.open(use_axi=False) # now create purchased debs xapian index (in memory because # we store the repository passwords in here) old_db_len = len(db) query = add_from_purchased_but_needs_reinstall_data( self.available_to_me, db, self.cache) # ensure we have a new item (the available for reinstall one) self.assertEqual(len(db), old_db_len + 1) # query enquire = xapian.Enquire(db.xapiandb) enquire.set_query(query) matches = enquire.get_mset(0, len(db)) self.assertEqual(len(matches), 1) for m in matches: doc = db.xapiandb.get_document(m.docid) self.assertEqual(doc.get_value(XapianValues.PKGNAME), "photobomb") self.assertEqual( doc.get_value(XapianValues.ARCHIVE_SIGNING_KEY_ID), "1024R/75254D99") self.assertEqual( doc.get_value(XapianValues.ARCHIVE_DEB_LINE), "deb https://username:random3atoken@" "private-ppa.launchpad.net/commercial-ppa-uploaders" "/photobomb/ubuntu precise main")
def _build_index(self, filepath, recreate=False): """ save txt to LevelDB Input: - filepath: txt file path, support .gzip, .bzip2, and .txt file - recreate: bool, True will force recreate db, default is False """ cached_index = filepath + ".index" if os.path.exists(cached_index): if recreate: shutil.rmtree(cached_index) else: recreate = True stemmer = xapian.Stem("english") if not recreate: database = xapian.Database(cached_index) else: database = xapian.WritableDatabase(cached_index, xapian.DB_CREATE_OR_OPEN) indexer = xapian.TermGenerator() indexer.set_stemmer(stemmer) ext = os.path.splitext(filepath)[-1] if ext == ".bz2": import bz2 open_func = bz2.open elif ext == ".gz": import gzip open_func = gzip.open else: open_func = open with open_func(filepath, mode="rt", encoding="utf-8") as f: totN, totP, totS = 0, 0, 0 for l in tqdm(f, desc="Building index", unit=" lines"): l = l.strip() if len(l) < 1: if totS > 0: totP += 1 totS = 0 continue for sent in nltk.sent_tokenize(l): sent.strip() doc = xapian.Document() doc.set_data(sent) indexer.set_document(doc) indexer.index_text(sent) database.add_document(doc) totN += 1 totS += 1 self.parser = xapian.QueryParser() self.parser.set_stemmer(stemmer) self.parser.set_database(database) self.parser.set_stemming_strategy(xapian.QueryParser.STEM_SOME) self.enquire = xapian.Enquire(database)
def search(): database = xapian.Database('indexes/') enquire = xapian.Enquire(database) running = 1 while int(running): str = raw_input("input the key words:") terms = [] a = jieba.cut_for_search(str) for b in a: terms.append(b.encode("utf-8")) qp = xapian.QueryParser() #建立查询分析 qp.set_database(database) qp.set_default_op(xapian.Query.OP_AND) #设置查询策略 #query = qp.parse_query(terms) query = xapian.Query(xapian.Query.OP_OR, terms) #查询函数,搞不懂 enquire.set_query(query) matches = enquire.get_mset(0, 10) print "%i results found" % matches.get_matches_estimated() for match in matches: a = match.document.get_data() d = eval(a) print "贴吧:", d["title"] print "作者:", d["reply"]["name"] print "回复:", d["reply"]["content"] print "时间:", d["reply"]["time"] running = raw_input("again?(1(yse)/0(no) :") print "thank you for using!"
def test_matchingterms_iter(): """Test Enquire.matching_terms iterator. """ db = setup_database() query = xapian.Query(xapian.Query.OP_OR, ("was", "it", "warm", "two")) enquire = xapian.Enquire(db) enquire.set_query(query) mset = enquire.get_mset(0, 10) for item in mset: # Make a list of the term names mterms = [] for term in enquire.matching_terms(item.docid): mterms.append(term) mterms2 = [] for term in enquire.matching_terms(item): mterms2.append(term) expect(mterms, mterms2) mterms = [] for term in enquire.matching_terms(mset.get_hit(0)): mterms.append(term) expect(mterms, ['it', 'two', 'warm', 'was'])
def get_most_popular_applications_for_mimetype(self, mimetype, only_uninstalled=True, num=3): """ return a list of the most popular applications for the given mimetype """ # sort by popularity by default enquire = xapian.Enquire(self.xapiandb) enquire.set_sort_by_value_then_relevance(XapianValues.POPCON) # query mimetype query = xapian.Query("AM%s" % mimetype) enquire.set_query(query) # mset just needs to be "big enough"" matches = enquire.get_mset(0, 100) apps = [] for match in matches: doc = match.document app = Application(self.get_appname(doc), self.get_pkgname(doc), popcon=self.get_popcon(doc)) if only_uninstalled: if app.get_details(self).pkg_state == PkgStates.UNINSTALLED: apps.append(app) else: apps.append(app) if len(apps) == num: break return apps
def search_query(claim): stopWords=set(stopwords.words('english')) claim = word_tokenize(claim) claim = " ".join([w for w in claim if w not in stopWords]) #print(claim) claim = noun_phrases(claim) #print(claim) db=xapian.Database('/home/xusheng/Downloads/ano-titles') query_parser=xapian.QueryParser() query_parser.set_stemmer(xapian.Stem('en')) query_parser.set_stemming_strategy(query_parser.STEM_SOME) #query = query_parser.parse_query("title:"+claim) query =query_parser.parse_query(claim) enquire=xapian.Enquire(db) enquire.set_query(query) matches=[] for match in enquire.get_mset(0,5): match_doc=json.loads(match.document.get_data().decode('utf8')) #the match data is parse as python dict. doc_title=match_doc.get('title') matches.append(match_doc) return matches
def search(self, searchterm, extractlength=32): # Parse query string query = self.__queryparser.parse_query(searchterm) # Set offset and limit for pagination offset, limit = 0, self.__db.get_doccount() # Start query session enquire = xapian.Enquire(self.__db) enquire.set_query(query) # Display matches matches = enquire.get_mset(offset, limit) results = [] for match in matches: content = match.document.get_data() extract = TextMachine(extractlength, '*%s*').process(searchterm, content) result = { "rank": match.rank, "docid": match.docid, "text": extract } results.append(result) return (matches.get_matches_estimated(), matches.size(), results)
def __init__(self, path=None, name='master_timeline_weibo', stub=None, include_remote=False, schema=Schema, schema_version=SCHEMA_VERSION): def create(dbpath): return _database(dbpath) def merge(db1, db2): db1.add_database(db2) return db1 if stub: # 如果是list,默认全部为文件 if isinstance(stub, list): self.database = reduce(merge, map(_stub_database, stub)) elif os.path.isfile(stub): self.database = _stub_database(stub) elif os.path.isdir(stub): self.database = reduce(merge, map(_stub_database, [os.path.join(stub, p) for p in os.listdir(stub)])) else: self.database = reduce(merge, map(create, [os.path.join(path, p) for p in os.listdir(path) if p.startswith('_%s' % name)])) self.schema = getattr(schema, 'v%s' % schema_version) enquire = xapian.Enquire(self.database) enquire.set_weighting_scheme(xapian.BoolWeight()) # 使用最简单的weight模型提升效率 enquire.set_docid_order(xapian.Enquire.DONT_CARE) # 不关心mset的顺序 if 'collapse_valueno' in self.schema: enquire.set_collapse_key(self.schema['collapse_valueno']) self.enquire = enquire self.include_remote = include_remote
def _update_channel_list_installed_view(self): # see comments for _update_channel_list_available_view() method above child = self.iter_children(self.installed_iter) iters_to_kill = set() while child: iters_to_kill.add(child) child = self.iter_next(child) # iterate the channels and add as subnodes of the installed node for channel in self.channel_manager.channels_installed_only: # check for no installed items for each channel and do not # append the channel item in this case enquire = xapian.Enquire(self.db.xapiandb) query = channel.query enquire.set_query(query) matches = enquire.get_mset(0, len(self.db)) # only check channels that have a small number of items add_channel_item = True if len(matches) < 200: add_channel_item = False for m in matches: doc = m.document pkgname = self.db.get_pkgname(doc) if (pkgname in self.cache and self.cache[pkgname].is_installed): add_channel_item = True break if add_channel_item: self.append(self.installed_iter, [ channel.icon, channel.display_name, ViewPages.CHANNEL, channel, None ]) # delete the old ones for child in iters_to_kill: self.remove(child)
def xmlrpc_search(self, text, page): # TODO: Run queries in threads because it's blocking operation. if page < 0: return try: query = self.query_parser.parse_query(text) except xapian.QueryParserError: return enquire = xapian.Enquire(self.db) enquire.set_query(query) self.db.reopen() def process_match(match): doc = match.document return dict(id=doc.get_value(Indexer.ID), user=doc.get_value(Indexer.USER), date=float(doc.get_value(Indexer.DATE_ORIG)), type=doc.get_value(Indexer.TYPE), tags_info=doc.get_value(Indexer.TAGS_INFO), text=doc.get_data().decode('utf-8'), percent=match.percent) matches = enquire.get_mset(page * self.PAGE_SIZE, self.PAGE_SIZE) estimated = matches.get_matches_estimated() results = map(process_match, matches) return dict(estimated=estimated, results=results)
def search(self, query, offset, count, check_at_least, site_ids=(), filetype=SEARCH_ALL): """Query the index. The `query` argument is the user supplied query string. The `sites` and `filetype` arguments can be used to restrict the domain of the search. """ if type(query) is not unicode: query = query.decode('utf-8') enquire = xapian.Enquire(self._db) xapian_query = self._parse_query(query, site_ids, filetype) enquire.set_query(xapian_query) mset = enquire.get_mset(offset, count, check_at_least) results = [] for match in mset: result = {} doc = match.get_document() result['url'] = doc.get_data().decode('utf-8') value = doc.get_value(IndexProcessor.IS_DIR_SLOT).decode('utf-8') result['is_dir'] = (value == IndexProcessor.TRUE_VALUE) results.append(result) estimated_total = mset.get_matches_estimated() return (estimated_total, results)
def test_director_exception(): """Test handling of an exception raised in a director. """ db = setup_database() query = xapian.Query('it') enq = xapian.Enquire(db) enq.set_query(query) class TestException(Exception): def __init__(self, a, b): Exception.__init__(self, a + b) rset = xapian.RSet() rset.add_document(1) class EDecider(xapian.ExpandDecider): def __call__(self, term): raise TestException("foo", "bar") edecider = EDecider() expect_exception(TestException, "foobar", edecider, "foo") expect_exception(TestException, "foobar", enq.get_eset, 10, rset, edecider) class MDecider(xapian.MatchDecider): def __call__(self, doc): raise TestException("foo", "bar") mdecider = MDecider() expect_exception(TestException, "foobar", mdecider, xapian.Document()) expect_exception(TestException, "foobar", enq.get_mset, 0, 10, None, mdecider)
def _rmtree(self, site_id, dirpath): """Remove documents for entries in the given directory tree. The document of the root of the directory tree is also removed. """ enquire = xapian.Enquire(self._db) enquire.set_docid_order(xapian.Enquire.DONT_CARE) site_id_query = xapian.Query(self.SITE_ID_PREFIX + site_id) # Remove document of the directory itself. path_query = xapian.Query(xapian.Query.OP_VALUE_RANGE, self.PATH_SLOT, dirpath, dirpath) query = xapian.Query(xapian.Query.OP_FILTER, site_id_query, path_query) enquire.set_query(query) for match in enquire.get_mset(0, self._db.get_doccount()): doc = match.get_document() self._db.delete_document(doc.get_docid()) # Remove documents of the decendants. dirname_start = dirpath.rstrip(u'/') + u'/' dirname_end = dirname_start + u'\U0010ffff' dirname_query = xapian.Query(xapian.Query.OP_VALUE_RANGE, self.DIRNAME_SLOT, dirname_start, dirname_end) query = xapian.Query(xapian.Query.OP_FILTER, site_id_query, dirname_query) enquire.set_query(query) for match in enquire.get_mset(0, self._db.get_doccount()): doc = match.get_document() self._db.delete_document(doc.get_docid())
def searcher(self): path = os.path.join(self.options.dir, "%s_xappy" % self.options.indexname) self.db = xapian.Database(path) self.enq = xapian.Enquire(self.db) self.qp = xapian.QueryParser() self.qp.set_database(self.db)
def test_scale_weight(): """Test query OP_SCALE_WEIGHT feature. """ db = setup_database() for mult in (0, 1, 2.5): context( "checking queries with OP_SCALE_WEIGHT with a multiplier of %r" % mult) query1 = xapian.Query("it") query2 = xapian.Query(xapian.Query.OP_SCALE_WEIGHT, query1, mult) enquire = xapian.Enquire(db) enquire.set_query(query1) mset1 = enquire.get_mset(0, 10) enquire.set_query(query2) mset2 = enquire.get_mset(0, 10) if mult <= 0: expected = [(0, item.docid) for item in mset1] expected.sort() else: expected = [(int(item.weight * mult * 1000000), item.docid) for item in mset1] expect([(int(item.weight * 1000000), item.docid) for item in mset2], expected) context("checking queries with OP_SCALE_WEIGHT with a multiplier of -1") query1 = xapian.Query("it") expect_exception( xapian.InvalidArgumentError, "Xapian::Query: SCALE_WEIGHT requires a non-negative parameter.", xapian.Query, xapian.Query.OP_SCALE_WEIGHT, query1, -1)
def search(dbpath, querystring, offset=0, pagesize=100, ident=0): # offset - defines starting point within result set # pagesize - defines number of records to retrieve # Open the database we're going to search. db = xapian.Database(dbpath) # Set up a QueryParser with a stemmer and suitable prefixes queryparser = xapian.QueryParser() queryparser.set_stemmer(xapian.Stem("pt")) queryparser.set_stemming_strategy(queryparser.STEM_SOME) # Start of prefix configuration. #queryparser.add_prefix("text", "XD") # End of prefix configuration. # And parse the query query = queryparser.parse_query(querystring) # Use an Enquire object on the database to run the query enquire = xapian.Enquire(db) enquire.set_query(query) # And print out something about each match matches = [] for match in enquire.get_mset(offset, pagesize): field = match.document.get_data() print(u"%(ident)s Q0 %(id)s %(rank)i %(weight)s danielatkinson_filipemoreira" % { 'ident': ident, 'rank': match.rank, 'weight': match.weight, 'id': field.split(":")[0] }) matches.append(match.docid)
def query(keywords): """ Get changelog entries matching the given keywords """ xdb = xapian.Database(MINECHANGELOGS_INDEXDIR) q = None for a in keywords: a = a.strip() if not a: continue if ' ' in a: a = a.split() p = xapian.Query(xapian.Query.OP_PHRASE, a) else: p = xapian.Query(a) if q is None: q = p else: q = xapian.Query(xapian.Query.OP_OR, q, p) if q is None: return enquire = xapian.Enquire(xdb) enquire.set_query(q) enquire.set_sort_by_value(0, True) first = 0 while True: matches = enquire.get_mset(first, 100) count = matches.size() if count == 0: break for m in matches: yield m.document.get_data() first += 100
def search(dbpath, querystring, offset=0, pagesize=10): # offset - defines starting point within result set # pagesize - defines number of records to retrieve # Open the database we're going to search. db = xapian.Database(dbpath) # Set up a QueryParser with a stemmer and suitable prefixes queryparser = xapian.QueryParser() queryparser.set_stemmer(xapian.Stem("en")) queryparser.set_stemming_strategy(queryparser.STEM_SOME) queryparser.add_prefix("title", "S") queryparser.add_prefix("description", "XD") # And parse the query query = queryparser.parse_query(querystring) # Use an Enquire object on the database to run the query enquire = xapian.Enquire(db) enquire.set_query(query) # And print out something about each match matches = [] for match in enquire.get_mset(offset, pagesize): fields = json.loads(match.document.get_data()) print u"%(rank)i: #%(docid)3.3i %(title)s" % { 'rank': match.rank + 1, 'docid': match.docid, 'title': fields.get('TITLE', u''), } matches.append(match.docid) # Finally, make sure we log the query and displayed results support.log_matches(querystring, offset, pagesize, matches)
def search(dbpath, querystring, offset=0, pagesize=10): # offset - defines starting point within result set # pagesize - defines number of records to retrive db = xapian.Database(dbpath) queryparser = xapian.QueryParser() # choose a language queryparser.set_stemmer(xapian.Stem("en")) queryparser.set_stemming_strategy(queryparser.STEM_SOME) queryparser.add_prefix("title", "S") queryparser.add_prefix("description", "XD") query = queryparser.parse_query(querystring) enquire = xapian.Enquire(db) enquire.set_query(query) matches = [] ret = "" for match in enquire.get_mset(offset, pagesize): fields = json.loads(match.document.get_data()) tmp = u"%(rank)i: #%(docid)3.3i %(title)s" % { 'rank': match.rank + 1, 'docid': match.docid, 'title': fields.get('TITLE', u''), } ret += tmp ret += '\n' matches.append(match.docid) support.log_matches(querystring, offset, pagesize, matches) return ret ### END of function
def keyPressed(self, event): self.list.delete(0, END) start = time() if self.entry.get(): query_parser = xapian.QueryParser() enq = xapian.Enquire(ti) query = query_parser.parse_query( self.entry.get(), query_parser.FLAG_WILDCARD) print query.get_description() enq.set_query(query) elapsed = time() - start result = enq.get_mset(0, 100) count = 0 for doc in result: count += 1 ln = doc[4].get_data() r = doc[1] i = ln.rindex(':') d = ln[:i].strip() l = int(ln[i + 1:]) self.list.insert( END, '%.2f:%s:%i:%s' % (r, d, l, linecache.getline(d, l + 1).strip())) self.label.config(text='%i lines in %.2fs' % (count, elapsed))
def find(self, wordlist): '''look up all the words in the wordlist. If none are found return an empty dictionary * more rules here ''' if not wordlist: return {} database = self._get_database() enquire = xapian.Enquire(database) stemmer = xapian.Stem("english") terms = [] for term in [ word.upper() for word in wordlist if self.minlength <= len(word) <= self.maxlength ]: if not self.is_stopword(term): terms.append(stemmer(s2b(term.lower()))) query = xapian.Query(xapian.Query.OP_AND, terms) enquire.set_query(query) matches = enquire.get_mset(0, database.get_doccount()) return [tuple(b2s(m.document.get_data()).split(':')) for m in matches]
def handle_query(self, q): database = xapian.Database(self.db_path) enquire = xapian.Enquire(database) qp = xapian.QueryParser() stemmer = xapian.Stem("english") qp.set_stemmer(stemmer) qp.set_database(database) qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME) query = qp.parse_query(q) # Find the top 100 results for the query. enquire.set_query(query) matches = enquire.get_mset(0, 100) results = [] for m in matches: data = m.document.get_data() if not isinstance(data, string_types): data = data.decode("utf-8") context = self.extract_context(data) results.append((m.document.get_value(self.DOC_PATH), m.document.get_value(self.DOC_TITLE), ''.join(context))) return results
def search(self, server_guid, store_guid, folder_ids, fields_terms, query, log): """ handle query; see links in the top for a description of the Xapian API """ db = self.open_db(server_guid, store_guid, log=log) if not db: return [], '' qp = xapian.QueryParser() qp.add_prefix("sourcekey", "XK:") qp.add_prefix("folderid", "XF:") suggest = [] for fields, terms in fields_terms: for field in fields: qp.add_prefix('mapi%d' % field, "XM%d:" % field) for term in terms: suggest.append(db.get_spelling_suggestion(term) or term) log.info('performing query: %s' % query) qp.set_database(db) query = qp.parse_query( query, xapian.QueryParser.FLAG_BOOLEAN | xapian.QueryParser.FLAG_PHRASE | xapian.QueryParser.FLAG_WILDCARD) enquire = xapian.Enquire(db) enquire.set_query(query) matches = [] for match in enquire.get_mset(0, db.get_doccount( )): # XXX catch exception if database is being updated? matches.append(match.document.get_value(0)) db.close() return matches, None # XXX get_spelling_suggestion, decode utf-*? ' '.join(suggest)
def parse_query(parser, search_strings, verbose=True): str_to_prefix = {'section': 'AE', 'type': 'AT', 'category': 'AC'} for st in search_strings: (search_prefix, search_term) = st.split(":") if search_prefix == "section": t = str_to_prefix[search_prefix] s = search_term.lower() query = xapian.Query(t + s) for pre in ["universe", "multiverse", "restricted"]: query = xapian.Query(xapian.Query.OP_OR, query, xapian.Query("%s%s/%s" % (t, pre, s))) query = xapian.Query(xapian.Query.OP_OR, query, xapian.Query("XS%s/%s" % (pre, s))) else: query = xapian.Query(str_to_prefix[search_prefix] + search_term.lower()) enquire = xapian.Enquire(db) enquire.set_query(query) with ExecutionTime("Search took"): mset = enquire.get_mset(0, db.get_doccount()) print "Found %i documents for search '%s'" % (len(mset), st) if verbose: for m in mset: doc = m.document appname = doc.get_data() pkgname = doc.get_value(XAPIAN_VALUE_PKGNAME) print "%s ; %s" % (appname, pkgname) print