def search(dbpath, querystring, offset=0, pagesize=10): # offset - defines starting point within result set # pagesize - defines number of records to retrieve # Open the database we're going to search. db = xapian.Database(dbpath) # Set up a QueryParser with a stemmer and suitable prefixes queryparser = xapian.QueryParser() queryparser.set_stemmer(xapian.Stem("en")) queryparser.set_stemming_strategy(queryparser.STEM_SOME) queryparser.add_prefix("title", "S") queryparser.add_prefix("description", "XD") # and add in range processors # Start of custom RP code class PopulationRangeProcessor(xapian.RangeProcessor): def __init__(self, slot, low, high): super(PopulationRangeProcessor, self).__init__() self.nrp = xapian.NumberRangeProcessor(slot) self.low = low self.high = high def __call__(self, begin, end): if len(begin) > 0: try: _begin = int(begin) if _begin < self.low or _begin > self.high: raise ValueError() except: return xapian.Query(xapian.Query.OP_INVALID) if len(end) > 0: try: _end = int(end) if _end < self.low or _end > self.high: raise ValueError() except: return xapian.Query(xapian.Query.OP_INVALID) return self.nrp(begin, end) queryparser.add_rangeprocessor( PopulationRangeProcessor(3, 500000, 50000000) ) # End of custom RP code # Start of date example code queryparser.add_rangeprocessor( xapian.DateRangeProcessor(2, xapian.RP_DATE_PREFER_MDY, 1860) ) queryparser.add_rangeprocessor( xapian.NumberRangeProcessor(1) ) # End of date example code # And parse the query query = queryparser.parse_query(querystring) # Use an Enquire object on the database to run the query enquire = xapian.Enquire(db) enquire.set_query(query) # And print out something about each match matches = [] for match in enquire.get_mset(offset, pagesize): fields = json.loads(match.document.get_data()) population = support.format_numeral(int(fields.get('population', 0))) date = support.format_date(fields.get('admitted')) print(u"""\ %(rank)i: #%(docid)3.3i %(name)s %(date)s Population %(pop)s""" % { 'rank': match.rank + 1, 'docid': match.docid, 'name': fields.get('name', u''), 'date': date, 'pop': population, 'lat': fields.get('latitude', u''), 'lon': fields.get('longitude', u''), }) matches.append(match.docid) # Finally, make sure we log the query and displayed results support.log_matches(querystring, offset, pagesize, matches)
def test_all(): # Test the version number reporting functions give plausible results. v = "%d.%d.%d" % (xapian.major_version(), xapian.minor_version(), xapian.revision()) v2 = xapian.version_string() expect(v2, v, "Unexpected version output") # A regexp check would be better, but seems to create a bogus "leak" of -1 # objects in Python 3. expect(len(xapian.__version__.split('.')), 3, 'xapian.__version__ not X.Y.Z') expect((xapian.__version__.split('.'))[0], '1', 'xapian.__version__ not "1.Y.Z"') def access_cvar(): res = xapian.cvar print "Unhandled constants: ", res return res # Check that SWIG isn't generating cvar (regression test for ticket#297). expect_exception(AttributeError, "'module' object has no attribute 'cvar'", access_cvar) stem = xapian.Stem("english") expect(str(stem), "Xapian::Stem(english)", "Unexpected str(stem)") doc = xapian.Document() doc.set_data("a\0b") if doc.get_data() == "a": raise TestFail("get_data+set_data truncates at a zero byte") expect(doc.get_data(), "a\0b", "get_data+set_data doesn't transparently handle a zero byte") doc.set_data("is there anybody out there?") doc.add_term("XYzzy") doc.add_posting(stem("is"), 1) doc.add_posting(stem("there"), 2) doc.add_posting(stem("anybody"), 3) doc.add_posting(stem("out"), 4) doc.add_posting(stem("there"), 5) db = xapian.WritableDatabase('', xapian.DB_BACKEND_INMEMORY) db.add_document(doc) expect(db.get_doccount(), 1, "Unexpected db.get_doccount()") terms = ["smoke", "test", "terms"] expect_query(xapian.Query(xapian.Query.OP_OR, terms), "(smoke OR test OR terms)") query1 = xapian.Query(xapian.Query.OP_PHRASE, ("smoke", "test", "tuple")) query2 = xapian.Query(xapian.Query.OP_XOR, (xapian.Query("smoke"), query1, "string")) expect_query(query1, "(smoke PHRASE 3 test PHRASE 3 tuple)") expect_query(query2, "(smoke XOR (smoke PHRASE 3 test PHRASE 3 tuple) XOR string)") subqs = ["a", "b"] expect_query(xapian.Query(xapian.Query.OP_OR, subqs), "(a OR b)") expect_query(xapian.Query(xapian.Query.OP_VALUE_RANGE, 0, '1', '4'), "VALUE_RANGE 0 1 4") # Check database factory functions are wrapped as expected: expect_exception(xapian.DatabaseNotFoundError, None, xapian.Database, "nosuchdir/nosuchdb", xapian.DB_BACKEND_STUB) expect_exception(xapian.DatabaseNotFoundError, None, xapian.WritableDatabase, "nosuchdir/nosuchdb", xapian.DB_OPEN|xapian.DB_BACKEND_STUB) expect_exception(xapian.NetworkError, None, xapian.remote_open, "/bin/false", "") expect_exception(xapian.NetworkError, None, xapian.remote_open_writable, "/bin/false", "") expect_exception(xapian.NetworkError, None, xapian.remote_open, "127.0.0.1", 0, 1) expect_exception(xapian.NetworkError, None, xapian.remote_open_writable, "127.0.0.1", 0, 1) # Check wrapping of MatchAll and MatchNothing: expect_query(xapian.Query.MatchAll, "<alldocuments>") expect_query(xapian.Query.MatchNothing, "") # Feature test for Query.__iter__ term_count = 0 for term in query2: term_count += 1 expect(term_count, 4, "Unexpected number of terms in query2") enq = xapian.Enquire(db) # Check Xapian::BAD_VALUENO is wrapped suitably. enq.set_collapse_key(xapian.BAD_VALUENO) enq.set_query(xapian.Query(xapian.Query.OP_OR, "there", "is")) mset = enq.get_mset(0, 10) expect(mset.size(), 1, "Unexpected mset.size()") expect(len(mset), 1, "Unexpected mset.size()") # Feature test for Enquire.matching_terms(docid) term_count = 0 for term in enq.matching_terms(mset.get_hit(0)): term_count += 1 expect(term_count, 2, "Unexpected number of matching terms") # Feature test for MSet.__iter__ msize = 0 for match in mset: msize += 1 expect(msize, mset.size(), "Unexpected number of entries in mset") terms = " ".join(enq.matching_terms(mset.get_hit(0))) expect(terms, "is there", "Unexpected terms") # Feature test for ESet.__iter__ rset = xapian.RSet() rset.add_document(1) eset = enq.get_eset(10, rset) term_count = 0 for term in eset: term_count += 1 expect(term_count, 3, "Unexpected number of expand terms") # Feature test for Database.__iter__ term_count = 0 for term in db: term_count += 1 expect(term_count, 5, "Unexpected number of terms in db") # Feature test for Database.allterms term_count = 0 for term in db.allterms(): term_count += 1 expect(term_count, 5, "Unexpected number of terms in db.allterms") # Feature test for Database.postlist count = 0 for posting in db.postlist("there"): count += 1 expect(count, 1, "Unexpected number of entries in db.postlist('there')") # Feature test for Database.postlist with empty term (alldocspostlist) count = 0 for posting in db.postlist(""): count += 1 expect(count, 1, "Unexpected number of entries in db.postlist('')") # Feature test for Database.termlist count = 0 for term in db.termlist(1): count += 1 expect(count, 5, "Unexpected number of entries in db.termlist(1)") # Feature test for Database.positionlist count = 0 for term in db.positionlist(1, "there"): count += 1 expect(count, 2, "Unexpected number of entries in db.positionlist(1, 'there')") # Feature test for Document.termlist count = 0 for term in doc.termlist(): count += 1 expect(count, 5, "Unexpected number of entries in doc.termlist()") # Feature test for TermIter.skip_to term = doc.termlist() term.skip_to('n') while True: try: x = next(term) except StopIteration: break if x.term < 'n': raise TestFail("TermIter.skip_to didn't skip term '%s'" % x.term) # Feature test for Document.values count = 0 for term in doc.values(): count += 1 expect(count, 0, "Unexpected number of entries in doc.values") # Check exception handling for Xapian::DocNotFoundError expect_exception(xapian.DocNotFoundError, "Docid 3 not found", db.get_document, 3) # Check value of OP_ELITE_SET expect(xapian.Query.OP_ELITE_SET, 10, "Unexpected value for OP_ELITE_SET") # Feature test for MatchDecider doc = xapian.Document() doc.set_data("Two") doc.add_posting(stem("out"), 1) doc.add_posting(stem("outside"), 1) doc.add_posting(stem("source"), 2) doc.add_value(0, "yes") db.add_document(doc) class testmatchdecider(xapian.MatchDecider): def __call__(self, doc): return doc.get_value(0) == "yes" query = xapian.Query(stem("out")) enquire = xapian.Enquire(db) enquire.set_query(query) mset = enquire.get_mset(0, 10, None, testmatchdecider()) expect(mset.size(), 1, "Unexpected number of documents returned by match decider") expect(mset.get_docid(0), 2, "MatchDecider mset has wrong docid in") # Feature test for ExpandDecider class testexpanddecider(xapian.ExpandDecider): def __call__(self, term): return (not term.startswith('a')) enquire = xapian.Enquire(db) rset = xapian.RSet() rset.add_document(1) eset = enquire.get_eset(10, rset, xapian.Enquire.USE_EXACT_TERMFREQ, testexpanddecider()) eset_terms = [item.term for item in eset] expect(len(eset_terms), eset.size(), "Unexpected number of terms returned by expand") if [t for t in eset_terms if t.startswith('a')]: raise TestFail("ExpandDecider was not used") # Check min_wt argument to get_eset() works (new in 1.2.5). eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ) expect([i.weight for i in eset][-1] < 1.9, True, "test get_eset() without min_wt") eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ, None, 1.9) expect([i.weight for i in eset][-1] >= 1.9, True, "test get_eset() min_wt") # Check QueryParser parsing error. qp = xapian.QueryParser() expect_exception(xapian.QueryParserError, "Syntax: <expression> AND <expression>", qp.parse_query, "test AND") # Check QueryParser pure NOT option qp = xapian.QueryParser() expect_query(qp.parse_query("NOT test", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT), "(0 * <alldocuments> AND_NOT test@1)") # Check QueryParser partial option qp = xapian.QueryParser() qp.set_database(db) qp.set_default_op(xapian.Query.OP_AND) qp.set_stemming_strategy(qp.STEM_SOME) qp.set_stemmer(xapian.Stem('en')) expect_query(qp.parse_query("foo ox", qp.FLAG_PARTIAL), "(Zfoo@1 AND (WILDCARD SYNONYM ox OR Zox@2))") expect_query(qp.parse_query("foo outside", qp.FLAG_PARTIAL), "(Zfoo@1 AND (WILDCARD SYNONYM outside OR Zoutsid@2))") # Test supplying unicode strings expect_query(xapian.Query(xapian.Query.OP_OR, (u'foo', u'bar')), '(foo OR bar)') expect_query(xapian.Query(xapian.Query.OP_OR, ('foo', u'bar\xa3')), '(foo OR bar\xc2\xa3)') expect_query(xapian.Query(xapian.Query.OP_OR, ('foo', 'bar\xc2\xa3')), '(foo OR bar\xc2\xa3)') expect_query(xapian.Query(xapian.Query.OP_OR, u'foo', u'bar'), '(foo OR bar)') expect_query(qp.parse_query(u"NOT t\xe9st", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT), "(0 * <alldocuments> AND_NOT Zt\xc3\xa9st@1)") doc = xapian.Document() doc.set_data(u"Unicode with an acc\xe9nt") doc.add_posting(stem(u"out\xe9r"), 1) expect(doc.get_data(), u"Unicode with an acc\xe9nt".encode('utf-8')) term = doc.termlist().next().term expect(term, u"out\xe9r".encode('utf-8')) # Check simple stopper stop = xapian.SimpleStopper() qp.set_stopper(stop) expect(stop('a'), False) expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2 AND Za@3)") stop.add('a') expect(stop('a'), True) expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2)") # Feature test for custom Stopper class my_b_stopper(xapian.Stopper): def __call__(self, term): return term == "b" def get_description(self): return u"my_b_stopper" stop = my_b_stopper() expect(stop.get_description(), u"my_b_stopper") qp.set_stopper(stop) expect(stop('a'), False) expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2 AND Za@3)") expect(stop('b'), True) expect_query(qp.parse_query(u"foo bar b", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2)") # Test SimpleStopper initialised from a file. try: srcdir = os.environ['srcdir'] except KeyError: srcdir = '.' stop = xapian.SimpleStopper(srcdir + '/../shortstop.list') expect(stop('a'), True) expect(stop('am'), False) expect(stop('an'), True) expect(stop('the'), True) expect_exception(xapian.InvalidArgumentError, None, xapian.SimpleStopper, 'nosuchfile') # Test TermGenerator termgen = xapian.TermGenerator() doc = xapian.Document() termgen.set_document(doc) termgen.index_text('foo bar baz foo') expect([(item.term, item.wdf, [pos for pos in item.positer]) for item in doc.termlist()], [('bar', 1, [2]), ('baz', 1, [3]), ('foo', 2, [1, 4])]) # Check DateRangeProcessor works context("checking that DateRangeProcessor works") qp = xapian.QueryParser() rpdate = xapian.DateRangeProcessor(1, xapian.RP_DATE_PREFER_MDY, 1960) qp.add_rangeprocessor(rpdate) query = qp.parse_query('12/03/99..12/04/01') expect(str(query), 'Query(VALUE_RANGE 1 19991203 20011204)') # Feature test for xapian.FieldProcessor context("running feature test for xapian.FieldProcessor") class testfieldprocessor(xapian.FieldProcessor): def __call__(self, s): if s == 'spam': raise Exception('already spam') return xapian.Query("spam") qp.add_prefix('spam', testfieldprocessor()) qp.add_boolean_prefix('boolspam', testfieldprocessor()) qp.add_boolean_prefix('boolspam2', testfieldprocessor(), False) # Old-style qp.add_boolean_prefix('boolspam3', testfieldprocessor(), '') qp.add_boolean_prefix('boolspam4', testfieldprocessor(), 'group') qp.add_boolean_prefix('boolspam5', testfieldprocessor(), None) query = qp.parse_query('spam:ignored') expect(str(query), 'Query(spam)') expect_exception(Exception, 'already spam', qp.parse_query, 'spam:spam') # Regression tests copied from PHP (probably always worked in python, but # let's check...) context("running regression tests for issues which were found in PHP") # PHP overload resolution involving boolean types failed. enq.set_sort_by_value(1, True) # Regression test - fixed in 0.9.10.1. oqparser = xapian.QueryParser() oquery = oqparser.parse_query("I like tea") # Regression test for bug fixed in 1.4.4: # https://bugs.debian.org/849722 oqparser.add_boolean_prefix('tag', 'K', '') # Make sure other cases also work: oqparser.add_boolean_prefix('zag', 'XR', False) # Old-style oqparser.add_boolean_prefix('rag', 'XR', None) oqparser.add_boolean_prefix('nag', 'XB', '') oqparser.add_boolean_prefix('bag', 'XB', 'blergh') oqparser.add_boolean_prefix('gag', 'XB', u'blergh') oqparser.add_boolean_prefix('jag', 'XB', b'blergh') # Regression test for bug#192 - fixed in 1.0.3. enq.set_cutoff(100) # Test setting and getting metadata expect(db.get_metadata('Foo'), '') db.set_metadata('Foo', 'Foo') expect(db.get_metadata('Foo'), 'Foo') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, '') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.set_metadata, '', 'Foo') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, '') # Test OP_SCALE_WEIGHT and corresponding constructor expect_query(xapian.Query(xapian.Query.OP_SCALE_WEIGHT, xapian.Query('foo'), 5), "5 * foo")