def _decode_simple_value(field_cls, data): """Used to decode values in stored fields. """ # Overload the Integer type, cf _encode_simple_value if issubclass(field_cls, Integer): return int(sortable_unserialise(data)) elif issubclass(field_cls, Decimal): return decimal(sortable_unserialise(data)) # A common field or a new field return field_cls.decode(data)
def two_range(self, field, purpose, q): """Check the result of a range search which should return 2 items. """ r = [x for x in q.search(0, 10)] self.assertEqual(len(r), 2) val = xapian.sortable_unserialise(r[0].get_value("foo", "collsort")) self.assertTrue(3 <= val) self.assertTrue(val <= 4) val = xapian.sortable_unserialise(r[1].get_value("foo", "collsort")) self.assertTrue(4 <= val) self.assertTrue(val <= 5)
def group_poi(request): response = {} try: response['status'] = 'OK' group_type = request.GET.get('gt', 'city_code') if group_type == 'admin_code': admin_code_spy = xapian.ValueCountMatchSpy(1) elif group_type == 'prov_code': admin_code_spy = xapian.ValueCountMatchSpy(4) else: admin_code_spy = xapian.ValueCountMatchSpy(5) with contextlib.closing(get_xapian_conn()) as xapian_database: poi_query_parser = get_poi_query_parser() poi_query_parser.set_database(xapian_database) make_group_matches( request, poi_query_parser, admin_code_spy, xapian_database) group_result = {} for value in admin_code_spy.values(): code = int(xapian.sortable_unserialise(value.term)) group_result[code] = value.termfreq response['results'] = group_result except BaseException as e: logger.exception(e) response['results'] = [] response['size'] = 0 response['status'] = 'ERROR_PARAMETERS' return json.dumps(response, ensure_ascii=False, encoding='utf-8')
def _remove_cached_items(self, docid=None, xapid=None): """Remove from the cache any items for the specified document. The document may be specified by xappy docid, or by xapian document id. """ if self.cache_manager is None: raise errors.IndexerError("CacheManager has been applied to this " "index, but is not currently set.") doc, xapid = self._get_xapdoc(docid, xapid) if doc is None: return #print "Removing docid=%d" % xapid # FIXME: this will only remove the hits from the set cache # manager, if we have multiple applied caches, the others won't be # updated. This means that currently, if multiple caches are applied # and document removals happen, some of the caches will get out of # date; multiple caches are therefore not really suitable for use in # production systems - they are however useful for experimenting with # different caching algorithms. for value in doc.values(): base_slot = self._cache_manager_slot_start upper_slot = self._cache_manager_slot_start + self.cache_manager.num_cached_queries() if not (base_slot <= value.num < upper_slot): continue rank = int(self._cache_manager_max_hits - xapian.sortable_unserialise(value.value)) self.cache_manager.remove_hits( value.num - self._cache_manager_slot_start, ((rank, xapid),))
def get_popcon(self, doc): """ Return a popcon value from a xapian document """ popcon_raw = doc.get_value(XapianValues.POPCON) if popcon_raw: popcon = xapian.sortable_unserialise(popcon_raw) else: popcon = 0 return popcon
def single_range(self, field, purpose, q): """Check the result of a range search which should return 1 item. """ r = [x for x in q.search(0, 10)] self.assertEqual(len(r), 1) val = xapian.sortable_unserialise(r[0].get_value(field, purpose)) self.assertTrue(3 <= val) self.assertTrue(val <= 4.01)
def deconvert(self, data): if data is None: return data if self.ftype == PyFieldMeta.TYPE_LONG: data = data or long(0) return long(data) elif self.ftype == PyFieldMeta.TYPE_FLOAT: return xapian.sortable_unserialise(data) else: return data.decode('utf-8')
def search_database(keywords,result_limit,limit): c_key=('query_'+keywords+str(result_limit)).encode('utf-8') CT=cache.get(c_key) if CT!=None: print "using cache",c_key return CT database = xapian.Database(databasePath) enquire = xapian.Enquire(database) queryParser = xapian.QueryParser() queryParser.set_stemmer(xapian.Stem('english')) queryParser.set_database(database) queryParser.set_stemming_strategy(xapian.QueryParser.STEM_SOME) query = queryParser.parse_query(keywords) rex=re.compile(r'[0-9]+|[a-zA-Z]+|[\x80-\xff3]{3}') all_terms=rex.findall(keywords.encode('utf-8')) query_list = [] for word in all_terms: query = xapian.Query(word) query_list.append(query) if len(query_list) != 1: query = xapian.Query(xapian.Query.OP_AND, query_list) else: query = query_list[0] offset= 0 sorter = MultiValueSorter() sorter.add(1) sorter.add(2) enquire.set_query(query) enquire.set_sort_by_key(sorter) result_list=[] result_got=0 max_try=0 while True: print "loop",result_got print "limit",result_limit matches = enquire.get_mset(offset, limit) for match in matches: str_content=match.document.get_data() if str_content.find(keywords)==-1: continue query_content=cut_str(str_content,50) result_got+=1 result_list.append({"index":result_got,"query":query_content,"freq":xapian.sortable_unserialise(match.document.get_value(FREQ)),"click":xapian.sortable_unserialise(match.document.get_value(CLICK)),"date":match.document.get_value(DATE)}) offset+=limit max_try+=1 if result_got>result_limit: break if max_try>15: break print cache.set(c_key,[result_list,result_got],3600) print "cached",c_key return result_list,result_got
def extract(self, document): if self.number: value = document.get_value(self.number) content_type = self._get_content_type(value) if self._is_float_or_interger(content_type): value = xapian.sortable_unserialise(value) return value return None
def remove_cached_items(self, iconn, doc, xapid): #print "Removing docid=%d" % xapid for value in doc.values(): base_slot = cache_manager_slot_start(iconn, self.id) upper_slot = base_slot + self.num_cached_queries() if not (base_slot <= value.num < upper_slot): continue rank = int(CACHE_MANAGER_MAX_HITS - xapian.sortable_unserialise(value.value)) self.remove_hits( value.num - base_slot, ((rank, xapid),))
def display_differences(self, ids1, ids2, name1, name2): ids1_unique = ids1 - ids2 ids2_unique = ids2 - ids1 if ids1_unique or ids2_unique: print "results for %s and %s differ" % (name1, name2) if ids1_unique: print "ids only in %s: " % name1, ids1_unique if ids2_unique: print "ids only in %s: " % name2, ids2_unique for i in ids1 ^ ids2: d = self.sconn.get_document(i) print "value: ", xapian.sortable_unserialise(d.get_value('price', 'collsort')) print "termlist: ", map (lambda t: t.term, d._doc.termlist())
def doc2dict(doc): od = OrderedDict() url = doc.get_value(VALUE_URL) od['url'] = url title = doc.get_value(VALUE_TITLE) if title: od['title'] = title.decode('UTF-8') tags = doc.get_value(VALUE_TAGS) od['tags'] = tags.decode('UTF-8').split(u'\x1f') if tags else [] created = xapian.sortable_unserialise(doc.get_value(VALUE_CREATED)) od['created'] = arrow.get(created) archived_val = doc.get_value(VALUE_ARCHIVED) if archived_val: archived = xapian.sortable_unserialise(archived_val) od['archived'] = arrow.get(archived) od['notes'] = doc.get_data().decode('UTF-8') return od
def _generate_records(self, mset, select=set(["*"])): """ 仅返回item_id,item_type,外部再从memcached、db中读取详细数据 """ for m in mset: result = {"_did" : m.docid, "_score" : m.percent, "_rank" : m.rank, "_collapse_count" : m.collapse_count, "_weight" : m.weight} result['item_id'] = int(xapian.sortable_unserialise(m.document.get_value(DOC_ITEM_ID))) #int result['item_type'] = m.document.get_value(DOC_ITEM_TYPE) #string if select: doc = m.document data_str = doc.get_data() if len(data_str): data_dict = cPickle.loads(data_str) for key, value in data_dict.items(): if key in select or "*" in select: result[key] = value yield result
def size(self): """Return the size of the application without dependencies Note that this will return the download size if the app is not installed and the installed size if it is installed. """ if self._pkg: if not self._pkg.installed: if self._app.archive_suite: ver = self._get_version_for_archive_suite(self._pkg, self._app.archive_suite) if ver: return ver.size return self._pkg.candidate.size else: return self._pkg.installed.size elif self._doc: size = self._doc.get_value(XapianValues.DOWNLOAD_SIZE) if size: return xapian.sortable_unserialise(self._doc.get_value(XapianValues.DOWNLOAD_SIZE))
def size(self): """Return the size of the application without dependencies Note that this will return the download size if the app is not installed and the installed size if it is installed. """ if self._pkg: if not self._pkg.installed: if self._app.archive_suite: ver = self._get_version_for_archive_suite( self._pkg, self._app.archive_suite) if ver: return ver.size return self._pkg.candidate.size else: return self._pkg.installed.size elif self._doc: size = self._doc.get_value(XapianValues.DOWNLOAD_SIZE) if size: return xapian.sortable_unserialise( self._doc.get_value(XapianValues.DOWNLOAD_SIZE))
def remove_cached_items(self, iconn, doc, xapid): slots_info = self._get_slots_info(iconn) if not slots_info: return index = 0 base_slot, upper_slot, cm = slots_info[index] for value in doc.values(): slot_number = value.num if slot_number >= upper_slot: index += 1 if index == len(slots_info): return base_slot, upper_slot, cm = slots_info[index] if not (base_slot <= slot_number < upper_slot): continue rank = int(CACHE_MANAGER_MAX_HITS - xapian.sortable_unserialise(value.value)) cm.remove_hits( slot_number - base_slot, ((rank, xapid),))
def _remove_cached_items(self, docid=None, xapid=None): """Remove from the cache any items for the specified document. The document may be specified by xappy docid, or by xapian document id. """ if self.cache_manager is None: raise errors.IndexerError("CacheManager has been applied to this " "index, but is not currently set.") doc, xapid = self._get_xapdoc(docid, xapid) if doc is None: return #print "Removing docid=%d" % xapid for value in doc.values(): if value.num < self._cache_manager_slot_start: continue rank = int(self._cache_manager_max_hits - xapian.sortable_unserialise(value.value)) self.cache_manager.remove_hits( value.num - self._cache_manager_slot_start, ((rank, xapid),))
#parser.set_stemming_strategy(xapian.QueryParser.STEM_ALL) parser.set_database(db) #parser.add_prefix("pkg", "AP") query = parser.parse_query(search_term, xapian.QueryParser.FLAG_PARTIAL| xapian.QueryParser.FLAG_WILDCARD) enquire = xapian.Enquire(db) enquire.set_sort_by_value_then_relevance(XAPIAN_VALUE_POPCON) enquire.set_query(query) matches = enquire.get_mset(0, db.get_doccount()) print "Matches:" for m in matches: doc = m.document popcon = doc.get_value(XAPIAN_VALUE_POPCON) print doc.get_data(), "popcon:", xapian.sortable_unserialise(popcon) #for t in doc.termlist(): # print "'%s': %s (%s); " % (t.term, t.wdf, t.termfreq), #print "\n" appname = doc.get_data() # calculate a eset print "ESet:" rset = xapian.RSet() for m in matches: rset.add_document(m.docid) for m in enquire.get_eset(10, rset): print m.term # calulate the expansions
def test_all(): # Test the version number reporting functions give plausible results. v = "%d.%d.%d" % (xapian.major_version(), xapian.minor_version(), xapian.revision()) v2 = xapian.version_string() expect(v2, v, "Unexpected version output") # A regexp check would be better, but seems to create a bogus "leak" of -1 # objects in Python 3. expect(len(xapian.__version__.split('.')), 3, 'xapian.__version__ not X.Y.Z') expect((xapian.__version__.split('.'))[0], '1', 'xapian.__version__ not "1.Y.Z"') def access_cvar(): res = xapian.cvar print("Unhandled constants: ", res) return res # Check that SWIG isn't generating cvar (regression test for ticket#297). # # Python 3.5 generates a different exception message here to earlier # versions, so we need a check which matches both. expect_exception(AttributeError, lambda msg: msg.find("has no attribute 'cvar'") != -1, access_cvar) stem = xapian.Stem(b"english") expect(str(stem), "Xapian::Stem(english)", "Unexpected str(stem)") doc = xapian.Document() doc.set_data(b"a\0b") if doc.get_data() == b"a": raise TestFail("get_data+set_data truncates at a zero byte") expect(doc.get_data(), b"a\0b", "get_data+set_data doesn't transparently handle a zero byte") doc.set_data(b"is there anybody out there?") doc.add_term(b"XYzzy") doc.add_posting(stem(b"is"), 1) doc.add_posting(stem(b"there"), 2) doc.add_posting(stem(b"anybody"), 3) doc.add_posting(stem(b"out"), 4) doc.add_posting(stem(b"there"), 5) db = xapian.WritableDatabase('', xapian.DB_BACKEND_INMEMORY) db.add_document(doc) expect(db.get_doccount(), 1, "Unexpected db.get_doccount()") terms = ["smoke", "test", "terms"] expect_query( xapian.Query(xapian.Query.OP_OR, [t.encode('utf-8') for t in terms]), "(smoke OR test OR terms)") query1 = xapian.Query(xapian.Query.OP_PHRASE, (b"smoke", b"test", b"tuple")) query2 = xapian.Query(xapian.Query.OP_XOR, (xapian.Query(b"smoke"), query1, b"string")) expect_query(query1, "(smoke PHRASE 3 test PHRASE 3 tuple)") expect_query( query2, "(smoke XOR (smoke PHRASE 3 test PHRASE 3 tuple) XOR string)") subqs = ["a", "b"] expect_query( xapian.Query(xapian.Query.OP_OR, [s.encode('utf-8') for s in subqs]), "(a OR b)") expect_query(xapian.Query(xapian.Query.OP_VALUE_RANGE, 0, b'1', b'4'), "VALUE_RANGE 0 1 4") # Check database factory functions are wrapped as expected (or not wrapped # in the first cases): expect_exception( AttributeError, lambda msg: msg.find("has no attribute 'open_stub'") != -1, lambda: xapian.open_stub(b"nosuchdir/nosuchdb")) expect_exception( AttributeError, lambda msg: msg.find("has no attribute 'open_stub'") != -1, lambda: xapian.open_stub(b"nosuchdir/nosuchdb", xapian.DB_OPEN)) expect_exception( xapian.DatabaseOpeningError, None, lambda: xapian.Database(b"nosuchdir/nosuchdb", xapian.DB_BACKEND_STUB)) expect_exception( xapian.DatabaseOpeningError, None, lambda: xapian.WritableDatabase( b"nosuchdir/nosuchdb", xapian.DB_OPEN | xapian.DB_BACKEND_STUB)) expect_exception( xapian.DatabaseOpeningError, None, lambda: xapian.Database( b"nosuchdir/nosuchdb", xapian.DB_BACKEND_GLASS)) expect_exception( xapian.DatabaseCreateError, None, lambda: xapian.WritableDatabase( b"nosuchdir/nosuchdb", xapian.DB_CREATE | xapian.DB_BACKEND_GLASS)) expect_exception( xapian.FeatureUnavailableError, None, lambda: xapian.Database( b"nosuchdir/nosuchdb", xapian.DB_BACKEND_CHERT)) expect_exception( xapian.FeatureUnavailableError, None, lambda: xapian.WritableDatabase( b"nosuchdir/nosuchdb", xapian.DB_CREATE | xapian.DB_BACKEND_CHERT)) expect_exception(xapian.NetworkError, None, xapian.remote_open, b"/bin/false", b"") expect_exception(xapian.NetworkError, None, xapian.remote_open_writable, b"/bin/false", b"") expect_exception(xapian.NetworkError, None, xapian.remote_open, b"127.0.0.1", 0, 1) expect_exception(xapian.NetworkError, None, xapian.remote_open_writable, b"127.0.0.1", 0, 1) # Check wrapping of MatchAll and MatchNothing: expect_query(xapian.Query.MatchAll, "<alldocuments>") expect_query(xapian.Query.MatchNothing, "") # Feature test for Query.__iter__ term_count = 0 for term in query2: term_count += 1 expect(term_count, 4, "Unexpected number of terms in query2") enq = xapian.Enquire(db) enq.set_query(xapian.Query(xapian.Query.OP_OR, b"there", b"is")) mset = enq.get_mset(0, 10) expect(mset.size(), 1, "Unexpected mset.size()") expect(len(mset), 1, "Unexpected mset.size()") # Feature test for Enquire.matching_terms(docid) term_count = 0 for term in enq.matching_terms(mset.get_hit(0)): term_count += 1 expect(term_count, 2, "Unexpected number of matching terms") # Feature test for MSet.__iter__ msize = 0 for match in mset: msize += 1 expect(msize, mset.size(), "Unexpected number of entries in mset") terms = b" ".join(enq.matching_terms(mset.get_hit(0))) expect(terms, b"is there", "Unexpected terms") # Feature test for ESet.__iter__ rset = xapian.RSet() rset.add_document(1) eset = enq.get_eset(10, rset) term_count = 0 for term in eset: term_count += 1 expect(term_count, 3, "Unexpected number of expand terms") # Feature test for Database.__iter__ term_count = 0 for term in db: term_count += 1 expect(term_count, 5, "Unexpected number of terms in db") # Feature test for Database.allterms term_count = 0 for term in db.allterms(): term_count += 1 expect(term_count, 5, "Unexpected number of terms in db.allterms") # Feature test for Database.postlist count = 0 for posting in db.postlist(b"there"): count += 1 expect(count, 1, "Unexpected number of entries in db.postlist('there')") # Feature test for Database.postlist with empty term (alldocspostlist) count = 0 for posting in db.postlist(b""): count += 1 expect(count, 1, "Unexpected number of entries in db.postlist('')") # Feature test for Database.termlist count = 0 for term in db.termlist(1): count += 1 expect(count, 5, "Unexpected number of entries in db.termlist(1)") # Feature test for Database.positionlist count = 0 for term in db.positionlist(1, b"there"): count += 1 expect(count, 2, "Unexpected number of entries in db.positionlist(1, 'there')") # Feature test for Document.termlist count = 0 for term in doc.termlist(): count += 1 expect(count, 5, "Unexpected number of entries in doc.termlist()") # Feature test for TermIter.skip_to term = doc.termlist() term.skip_to(b'n') while True: try: x = next(term) except StopIteration: break if x.term < b'n': raise TestFail("TermIter.skip_to didn't skip term '%s'" % x.term.decode('utf-8')) # Feature test for Document.values count = 0 for term in list(doc.values()): count += 1 expect(count, 0, "Unexpected number of entries in doc.values") # Check exception handling for Xapian::DocNotFoundError expect_exception(xapian.DocNotFoundError, "Docid 3 not found", db.get_document, 3) # Check value of OP_ELITE_SET expect(xapian.Query.OP_ELITE_SET, 10, "Unexpected value for OP_ELITE_SET") # Feature test for MatchDecider doc = xapian.Document() doc.set_data(b"Two") doc.add_posting(stem(b"out"), 1) doc.add_posting(stem(b"outside"), 1) doc.add_posting(stem(b"source"), 2) doc.add_value(0, b"yes") db.add_document(doc) class testmatchdecider(xapian.MatchDecider): def __call__(self, doc): return doc.get_value(0) == b"yes" query = xapian.Query(stem(b"out")) enquire = xapian.Enquire(db) enquire.set_query(query) mset = enquire.get_mset(0, 10, None, testmatchdecider()) expect(mset.size(), 1, "Unexpected number of documents returned by match decider") expect(mset.get_docid(0), 2, "MatchDecider mset has wrong docid in") # Feature test for ExpandDecider class testexpanddecider(xapian.ExpandDecider): def __call__(self, term): return (not term.startswith(b'a')) enquire = xapian.Enquire(db) rset = xapian.RSet() rset.add_document(1) eset = enquire.get_eset(10, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, testexpanddecider()) eset_terms = [item.term for item in eset] expect(len(eset_terms), eset.size(), "Unexpected number of terms returned by expand") if [t for t in eset_terms if t.startswith(b'a')]: raise TestFail("ExpandDecider was not used") # Check min_wt argument to get_eset() works (new in 1.2.5). eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ) expect([i.weight for i in eset][-1] < 1.9, True, "test get_eset() without min_wt") eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, None, 1.9) expect([i.weight for i in eset][-1] >= 1.9, True, "test get_eset() min_wt") # Check QueryParser parsing error. qp = xapian.QueryParser() expect_exception(xapian.QueryParserError, "Syntax: <expression> AND <expression>", qp.parse_query, b"test AND") # Check QueryParser pure NOT option qp = xapian.QueryParser() expect_query( qp.parse_query(b"NOT test", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT), "(<alldocuments> AND_NOT test@1)") # Check QueryParser partial option qp = xapian.QueryParser() qp.set_database(db) qp.set_default_op(xapian.Query.OP_AND) qp.set_stemming_strategy(qp.STEM_SOME) qp.set_stemmer(xapian.Stem(b'en')) expect_query(qp.parse_query(b"foo o", qp.FLAG_PARTIAL), "(Zfoo@1 AND ((SYNONYM WILDCARD OR o) OR Zo@2))") expect_query(qp.parse_query(b"foo outside", qp.FLAG_PARTIAL), "(Zfoo@1 AND ((SYNONYM WILDCARD OR outside) OR Zoutsid@2))") # Test supplying unicode strings expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar')), '(foo OR bar)') expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar\xa3')), '(foo OR bar\\xa3)') expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar\xc2\xa3')), '(foo OR bar\u00a3)') expect_query(xapian.Query(xapian.Query.OP_OR, b'foo', b'bar'), '(foo OR bar)') expect_query( qp.parse_query(b"NOT t\xe9st", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT), "(<alldocuments> AND_NOT Zt\u00e9st@1)") doc = xapian.Document() doc.set_data(b"Unicode with an acc\xe9nt") doc.add_posting(stem(b"out\xe9r"), 1) expect(doc.get_data(), b"Unicode with an acc\xe9nt") term = next(doc.termlist()).term expect(term, b"out\xe9r") # Check simple stopper stop = xapian.SimpleStopper() qp.set_stopper(stop) expect(stop(b'a'), False) expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2 AND Za@3)") stop.add(b'a') expect(stop(b'a'), True) expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2)") # Feature test for custom Stopper class my_b_stopper(xapian.Stopper): def __call__(self, term): return term == b"b" def get_description(self): return "my_b_stopper" stop = my_b_stopper() expect(stop.get_description(), "my_b_stopper") qp.set_stopper(stop) expect(stop(b'a'), False) expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2 AND Za@3)") expect(stop(b'b'), True) expect_query(qp.parse_query(b"foo bar b", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2)") # Test TermGenerator termgen = xapian.TermGenerator() doc = xapian.Document() termgen.set_document(doc) termgen.index_text(b'foo bar baz foo') expect([(item.term, item.wdf, [pos for pos in item.positer]) for item in doc.termlist()], [(b'bar', 1, [2]), (b'baz', 1, [3]), (b'foo', 2, [1, 4])]) # Check DateValueRangeProcessor works context("checking that DateValueRangeProcessor works") qp = xapian.QueryParser() vrpdate = xapian.DateValueRangeProcessor(1, 1, 1960) qp.add_valuerangeprocessor(vrpdate) query = qp.parse_query(b'12/03/99..12/04/01') expect(str(query), 'Query(VALUE_RANGE 1 19991203 20011204)') # Regression test for bug#193, fixed in 1.0.3. context("running regression test for bug#193") vrp = xapian.NumberValueRangeProcessor(0, b'$', True) a = '$10' b = '20' slot, a, b = vrp(a, b.encode('utf-8')) expect(slot, 0) expect(xapian.sortable_unserialise(a), 10) expect(xapian.sortable_unserialise(b), 20) # Feature test for xapian.FieldProcessor context("running feature test for xapian.FieldProcessor") class testfieldprocessor(xapian.FieldProcessor): def __call__(self, s): if s == 'spam': raise Exception('already spam') return xapian.Query("spam") qp.add_prefix('spam', testfieldprocessor()) qp.add_boolean_prefix('boolspam', testfieldprocessor()) query = qp.parse_query('spam:ignored') expect(str(query), 'Query(spam)') # FIXME: This doesn't currently work: # expect_exception(Exception, 'already spam', qp.parse_query, 'spam:spam') # Regression tests copied from PHP (probably always worked in python, but # let's check...) context("running regression tests for issues which were found in PHP") # PHP overload resolution involving boolean types failed. enq.set_sort_by_value(1, True) # Regression test - fixed in 0.9.10.1. oqparser = xapian.QueryParser() oquery = oqparser.parse_query(b"I like tea") # Regression test for bug#192 - fixed in 1.0.3. enq.set_cutoff(100) # Test setting and getting metadata expect(db.get_metadata(b'Foo'), b'') db.set_metadata(b'Foo', b'Foo') expect(db.get_metadata(b'Foo'), b'Foo') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, b'') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.set_metadata, b'', b'Foo') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, b'') # Test OP_SCALE_WEIGHT and corresponding constructor expect_query( xapian.Query(xapian.Query.OP_SCALE_WEIGHT, xapian.Query(b'foo'), 5), "5 * foo")
def test_all(): # Test the version number reporting functions give plausible results. v = "%d.%d.%d" % (xapian.major_version(), xapian.minor_version(), xapian.revision()) v2 = xapian.version_string() expect(v2, v, "Unexpected version output") def access_cvar(): return xapian.cvar # Check that SWIG isn't generating cvar (regression test for ticket#297). expect_exception(AttributeError, "'module' object has no attribute 'cvar'", access_cvar) stem = xapian.Stem("english") expect(str(stem), "Xapian::Stem(english)", "Unexpected str(stem)") doc = xapian.Document() doc.set_data("a\0b") if doc.get_data() == "a": raise TestFail("get_data+set_data truncates at a zero byte") expect(doc.get_data(), "a\0b", "get_data+set_data doesn't transparently handle a zero byte") doc.set_data("is there anybody out there?") doc.add_term("XYzzy") doc.add_posting(stem("is"), 1) doc.add_posting(stem("there"), 2) doc.add_posting(stem("anybody"), 3) doc.add_posting(stem("out"), 4) doc.add_posting(stem("there"), 5) db = xapian.inmemory_open() db.add_document(doc) expect(db.get_doccount(), 1, "Unexpected db.get_doccount()") terms = ["smoke", "test", "terms"] expect_query(xapian.Query(xapian.Query.OP_OR, terms), "(smoke OR test OR terms)") query1 = xapian.Query(xapian.Query.OP_PHRASE, ("smoke", "test", "tuple")) query2 = xapian.Query(xapian.Query.OP_XOR, (xapian.Query("smoke"), query1, "string")) expect_query(query1, "(smoke PHRASE 3 test PHRASE 3 tuple)") expect_query( query2, "(smoke XOR (smoke PHRASE 3 test PHRASE 3 tuple) XOR string)") subqs = ["a", "b"] expect_query(xapian.Query(xapian.Query.OP_OR, subqs), "(a OR b)") expect_query(xapian.Query(xapian.Query.OP_VALUE_RANGE, 0, '1', '4'), "VALUE_RANGE 0 1 4") expect_query(xapian.Query.MatchAll, "<alldocuments>") expect_query(xapian.Query.MatchNothing, "") # Feature test for Query.__iter__ term_count = 0 for term in query2: term_count += 1 expect(term_count, 4, "Unexpected number of terms in query2") enq = xapian.Enquire(db) enq.set_query(xapian.Query(xapian.Query.OP_OR, "there", "is")) mset = enq.get_mset(0, 10) expect(mset.size(), 1, "Unexpected mset.size()") expect(len(mset), 1, "Unexpected mset.size()") # Feature test for Enquire.matching_terms(docid) term_count = 0 for term in enq.matching_terms(mset.get_hit(0)): term_count += 1 expect(term_count, 2, "Unexpected number of matching terms") # Feature test for MSet.__iter__ msize = 0 for match in mset: msize += 1 expect(msize, mset.size(), "Unexpected number of entries in mset") terms = " ".join(enq.matching_terms(mset.get_hit(0))) expect(terms, "is there", "Unexpected terms") # Feature test for ESet.__iter__ rset = xapian.RSet() rset.add_document(1) eset = enq.get_eset(10, rset) term_count = 0 for term in eset: term_count += 1 expect(term_count, 3, "Unexpected number of expand terms") # Feature test for Database.__iter__ term_count = 0 for term in db: term_count += 1 expect(term_count, 5, "Unexpected number of terms in db") # Feature test for Database.allterms term_count = 0 for term in db.allterms(): term_count += 1 expect(term_count, 5, "Unexpected number of terms in db.allterms") # Feature test for Database.postlist count = 0 for posting in db.postlist("there"): count += 1 expect(count, 1, "Unexpected number of entries in db.postlist('there')") # Feature test for Database.postlist with empty term (alldocspostlist) count = 0 for posting in db.postlist(""): count += 1 expect(count, 1, "Unexpected number of entries in db.postlist('')") # Feature test for Database.termlist count = 0 for term in db.termlist(1): count += 1 expect(count, 5, "Unexpected number of entries in db.termlist(1)") # Feature test for Database.positionlist count = 0 for term in db.positionlist(1, "there"): count += 1 expect(count, 2, "Unexpected number of entries in db.positionlist(1, 'there')") # Feature test for Document.termlist count = 0 for term in doc.termlist(): count += 1 expect(count, 5, "Unexpected number of entries in doc.termlist()") # Feature test for TermIter.skip_to term = doc.termlist() term.skip_to('n') while True: try: x = next(term) except StopIteration: break if x.term < 'n': raise TestFail("TermIter.skip_to didn't skip term '%s'" % x.term) # Feature test for Document.values count = 0 for term in doc.values(): count += 1 expect(count, 0, "Unexpected number of entries in doc.values") # Check exception handling for Xapian::DocNotFoundError expect_exception(xapian.DocNotFoundError, "Docid 3 not found", db.get_document, 3) # Check value of OP_ELITE_SET expect(xapian.Query.OP_ELITE_SET, 10, "Unexpected value for OP_ELITE_SET") # Feature test for MatchDecider doc = xapian.Document() doc.set_data("Two") doc.add_posting(stem("out"), 1) doc.add_posting(stem("outside"), 1) doc.add_posting(stem("source"), 2) doc.add_value(0, "yes") db.add_document(doc) class testmatchdecider(xapian.MatchDecider): def __call__(self, doc): return doc.get_value(0) == "yes" query = xapian.Query(stem("out")) enquire = xapian.Enquire(db) enquire.set_query(query) mset = enquire.get_mset(0, 10, None, testmatchdecider()) expect(mset.size(), 1, "Unexpected number of documents returned by match decider") expect(mset.get_docid(0), 2, "MatchDecider mset has wrong docid in") # Feature test for ExpandDecider class testexpanddecider(xapian.ExpandDecider): def __call__(self, term): return (not term.startswith('a')) enquire = xapian.Enquire(db) rset = xapian.RSet() rset.add_document(1) eset = enquire.get_eset(10, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, testexpanddecider()) eset_terms = [term[xapian.ESET_TNAME] for term in eset.items] expect(len(eset_terms), eset.size(), "Unexpected number of terms returned by expand") if [t for t in eset_terms if t.startswith('a')]: raise TestFail("ExpandDecider was not used") # Check min_wt argument to get_eset() works (new in 1.2.5). eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ) expect(eset.items[-1][xapian.ESET_WT] < 1.9, True, "test get_eset() without min_wt") eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, None, 1.9) expect(eset.items[-1][xapian.ESET_WT] >= 1.9, True, "test get_eset() min_wt") # Check QueryParser parsing error. qp = xapian.QueryParser() expect_exception(xapian.QueryParserError, "Syntax: <expression> AND <expression>", qp.parse_query, "test AND") # Check QueryParser pure NOT option qp = xapian.QueryParser() expect_query( qp.parse_query("NOT test", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT), "(<alldocuments> AND_NOT test:(pos=1))") # Check QueryParser partial option qp = xapian.QueryParser() qp.set_database(db) qp.set_default_op(xapian.Query.OP_AND) qp.set_stemming_strategy(qp.STEM_SOME) qp.set_stemmer(xapian.Stem('en')) expect_query( qp.parse_query("foo o", qp.FLAG_PARTIAL), "(Zfoo:(pos=1) AND ((out:(pos=2) SYNONYM outsid:(pos=2)) OR Zo:(pos=2)))" ) expect_query(qp.parse_query("foo outside", qp.FLAG_PARTIAL), "(Zfoo:(pos=1) AND Zoutsid:(pos=2))") # Test supplying unicode strings expect_query(xapian.Query(xapian.Query.OP_OR, (u'foo', u'bar')), '(foo OR bar)') expect_query(xapian.Query(xapian.Query.OP_OR, ('foo', u'bar\xa3')), '(foo OR bar\xc2\xa3)') expect_query(xapian.Query(xapian.Query.OP_OR, ('foo', 'bar\xc2\xa3')), '(foo OR bar\xc2\xa3)') expect_query(xapian.Query(xapian.Query.OP_OR, u'foo', u'bar'), '(foo OR bar)') expect_query( qp.parse_query(u"NOT t\xe9st", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT), "(<alldocuments> AND_NOT Zt\xc3\xa9st:(pos=1))") doc = xapian.Document() doc.set_data(u"Unicode with an acc\xe9nt") doc.add_posting(stem(u"out\xe9r"), 1) expect(doc.get_data(), u"Unicode with an acc\xe9nt".encode('utf-8')) term = doc.termlist().next().term expect(term, u"out\xe9r".encode('utf-8')) # Check simple stopper stop = xapian.SimpleStopper() qp.set_stopper(stop) expect(stop('a'), False) expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo:(pos=1) AND Zbar:(pos=2) AND Za:(pos=3))") stop.add('a') expect(stop('a'), True) expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo:(pos=1) AND Zbar:(pos=2))") # Feature test for custom Stopper class my_b_stopper(xapian.Stopper): def __call__(self, term): return term == "b" def get_description(self): return u"my_b_stopper" stop = my_b_stopper() expect(stop.get_description(), u"my_b_stopper") qp.set_stopper(stop) expect(stop('a'), False) expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo:(pos=1) AND Zbar:(pos=2) AND Za:(pos=3))") expect(stop('b'), True) expect_query(qp.parse_query(u"foo bar b", qp.FLAG_BOOLEAN), "(Zfoo:(pos=1) AND Zbar:(pos=2))") # Test TermGenerator termgen = xapian.TermGenerator() doc = xapian.Document() termgen.set_document(doc) termgen.index_text('foo bar baz foo') expect([(item.term, item.wdf, [pos for pos in item.positer]) for item in doc.termlist()], [('bar', 1, [2]), ('baz', 1, [3]), ('foo', 2, [1, 4])]) # Check DateValueRangeProcessor works context("checking that DateValueRangeProcessor works") qp = xapian.QueryParser() vrpdate = xapian.DateValueRangeProcessor(1, 1, 1960) qp.add_valuerangeprocessor(vrpdate) query = qp.parse_query('12/03/99..12/04/01') expect(str(query), 'Xapian::Query(VALUE_RANGE 1 19991203 20011204)') # Regression test for bug#193, fixed in 1.0.3. context("running regression test for bug#193") vrp = xapian.NumberValueRangeProcessor(0, '$', True) a = '$10' b = '20' slot, a, b = vrp(a, b) expect(slot, 0) expect(xapian.sortable_unserialise(a), 10) expect(xapian.sortable_unserialise(b), 20) # Regression tests copied from PHP (probably always worked in python, but # let's check...) context("running regression tests for issues which were found in PHP") # PHP overload resolution involving boolean types failed. enq.set_sort_by_value(1, True) # Regression test - fixed in 0.9.10.1. oqparser = xapian.QueryParser() oquery = oqparser.parse_query("I like tea") # Regression test for bug#192 - fixed in 1.0.3. enq.set_cutoff(100) # Test setting and getting metadata expect(db.get_metadata('Foo'), '') db.set_metadata('Foo', 'Foo') expect(db.get_metadata('Foo'), 'Foo') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, '') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.set_metadata, '', 'Foo') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, '') # Test OP_SCALE_WEIGHT and corresponding constructor expect_query( xapian.Query(xapian.Query.OP_SCALE_WEIGHT, xapian.Query('foo'), 5), "5 * foo")
def test_all(): # Test the version number reporting functions give plausible results. v = "%d.%d.%d" % (xapian.major_version(), xapian.minor_version(), xapian.revision()) v2 = xapian.version_string() expect(v2, v, "Unexpected version output") def access_cvar(): return xapian.cvar # Check that SWIG isn't generating cvar (regression test for ticket#297). expect_exception(AttributeError, "'module' object has no attribute 'cvar'", access_cvar) stem = xapian.Stem("english") expect(str(stem), "Xapian::Stem(english)", "Unexpected str(stem)") doc = xapian.Document() doc.set_data("a\0b") if doc.get_data() == "a": raise TestFail("get_data+set_data truncates at a zero byte") expect(doc.get_data(), "a\0b", "get_data+set_data doesn't transparently handle a zero byte") doc.set_data("is there anybody out there?") doc.add_term("XYzzy") doc.add_posting(stem("is"), 1) doc.add_posting(stem("there"), 2) doc.add_posting(stem("anybody"), 3) doc.add_posting(stem("out"), 4) doc.add_posting(stem("there"), 5) db = xapian.inmemory_open() db.add_document(doc) expect(db.get_doccount(), 1, "Unexpected db.get_doccount()") terms = ["smoke", "test", "terms"] expect_query(xapian.Query(xapian.Query.OP_OR, terms), "(smoke OR test OR terms)") query1 = xapian.Query(xapian.Query.OP_PHRASE, ("smoke", "test", "tuple")) query2 = xapian.Query(xapian.Query.OP_XOR, (xapian.Query("smoke"), query1, "string")) expect_query(query1, "(smoke PHRASE 3 test PHRASE 3 tuple)") expect_query(query2, "(smoke XOR (smoke PHRASE 3 test PHRASE 3 tuple) XOR string)") subqs = ["a", "b"] expect_query(xapian.Query(xapian.Query.OP_OR, subqs), "(a OR b)") expect_query(xapian.Query(xapian.Query.OP_VALUE_RANGE, 0, "1", "4"), "VALUE_RANGE 0 1 4") # Check database factory functions are wrapped as expected: expect_exception(xapian.DatabaseOpeningError, None, xapian.open_stub, "nosuchdir/nosuchdb") expect_exception(xapian.DatabaseOpeningError, None, xapian.open_stub, "nosuchdir/nosuchdb", xapian.DB_OPEN) expect_exception(xapian.DatabaseOpeningError, None, xapian.brass_open, "nosuchdir/nosuchdb") expect_exception(xapian.DatabaseCreateError, None, xapian.brass_open, "nosuchdir/nosuchdb", xapian.DB_CREATE) expect_exception(xapian.DatabaseOpeningError, None, xapian.chert_open, "nosuchdir/nosuchdb") expect_exception(xapian.DatabaseCreateError, None, xapian.chert_open, "nosuchdir/nosuchdb", xapian.DB_CREATE) expect_exception(xapian.NetworkError, None, xapian.remote_open, "/bin/false", "") expect_exception(xapian.NetworkError, None, xapian.remote_open_writable, "/bin/false", "") expect_exception(xapian.NetworkError, None, xapian.remote_open, "127.0.0.1", 0, 1) expect_exception(xapian.NetworkError, None, xapian.remote_open_writable, "127.0.0.1", 0, 1) # Check wrapping of MatchAll and MatchNothing: expect_query(xapian.Query.MatchAll, "<alldocuments>") expect_query(xapian.Query.MatchNothing, "") # Feature test for Query.__iter__ term_count = 0 for term in query2: term_count += 1 expect(term_count, 4, "Unexpected number of terms in query2") enq = xapian.Enquire(db) enq.set_query(xapian.Query(xapian.Query.OP_OR, "there", "is")) mset = enq.get_mset(0, 10) expect(mset.size(), 1, "Unexpected mset.size()") expect(len(mset), 1, "Unexpected mset.size()") # Feature test for Enquire.matching_terms(docid) term_count = 0 for term in enq.matching_terms(mset.get_hit(0)): term_count += 1 expect(term_count, 2, "Unexpected number of matching terms") # Feature test for MSet.__iter__ msize = 0 for match in mset: msize += 1 expect(msize, mset.size(), "Unexpected number of entries in mset") terms = " ".join(enq.matching_terms(mset.get_hit(0))) expect(terms, "is there", "Unexpected terms") # Feature test for ESet.__iter__ rset = xapian.RSet() rset.add_document(1) eset = enq.get_eset(10, rset) term_count = 0 for term in eset: term_count += 1 expect(term_count, 3, "Unexpected number of expand terms") # Feature test for Database.__iter__ term_count = 0 for term in db: term_count += 1 expect(term_count, 5, "Unexpected number of terms in db") # Feature test for Database.allterms term_count = 0 for term in db.allterms(): term_count += 1 expect(term_count, 5, "Unexpected number of terms in db.allterms") # Feature test for Database.postlist count = 0 for posting in db.postlist("there"): count += 1 expect(count, 1, "Unexpected number of entries in db.postlist('there')") # Feature test for Database.postlist with empty term (alldocspostlist) count = 0 for posting in db.postlist(""): count += 1 expect(count, 1, "Unexpected number of entries in db.postlist('')") # Feature test for Database.termlist count = 0 for term in db.termlist(1): count += 1 expect(count, 5, "Unexpected number of entries in db.termlist(1)") # Feature test for Database.positionlist count = 0 for term in db.positionlist(1, "there"): count += 1 expect(count, 2, "Unexpected number of entries in db.positionlist(1, 'there')") # Feature test for Document.termlist count = 0 for term in doc.termlist(): count += 1 expect(count, 5, "Unexpected number of entries in doc.termlist()") # Feature test for TermIter.skip_to term = doc.termlist() term.skip_to("n") while True: try: x = next(term) except StopIteration: break if x.term < "n": raise TestFail("TermIter.skip_to didn't skip term '%s'" % x.term) # Feature test for Document.values count = 0 for term in doc.values(): count += 1 expect(count, 0, "Unexpected number of entries in doc.values") # Check exception handling for Xapian::DocNotFoundError expect_exception(xapian.DocNotFoundError, "Docid 3 not found", db.get_document, 3) # Check value of OP_ELITE_SET expect(xapian.Query.OP_ELITE_SET, 10, "Unexpected value for OP_ELITE_SET") # Feature test for MatchDecider doc = xapian.Document() doc.set_data("Two") doc.add_posting(stem("out"), 1) doc.add_posting(stem("outside"), 1) doc.add_posting(stem("source"), 2) doc.add_value(0, "yes") db.add_document(doc) class testmatchdecider(xapian.MatchDecider): def __call__(self, doc): return doc.get_value(0) == "yes" query = xapian.Query(stem("out")) enquire = xapian.Enquire(db) enquire.set_query(query) mset = enquire.get_mset(0, 10, None, testmatchdecider()) expect(mset.size(), 1, "Unexpected number of documents returned by match decider") expect(mset.get_docid(0), 2, "MatchDecider mset has wrong docid in") # Feature test for ExpandDecider class testexpanddecider(xapian.ExpandDecider): def __call__(self, term): return not term.startswith("a") enquire = xapian.Enquire(db) rset = xapian.RSet() rset.add_document(1) eset = enquire.get_eset(10, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, testexpanddecider()) eset_terms = [term[xapian.ESET_TNAME] for term in eset.items] expect(len(eset_terms), eset.size(), "Unexpected number of terms returned by expand") if [t for t in eset_terms if t.startswith("a")]: raise TestFail("ExpandDecider was not used") # Check min_wt argument to get_eset() works (new in 1.2.5). eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ) expect(eset.items[-1][xapian.ESET_WT] < 1.9, True, "test get_eset() without min_wt") eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, None, 1.9) expect(eset.items[-1][xapian.ESET_WT] >= 1.9, True, "test get_eset() min_wt") # Check QueryParser parsing error. qp = xapian.QueryParser() expect_exception(xapian.QueryParserError, "Syntax: <expression> AND <expression>", qp.parse_query, "test AND") # Check QueryParser pure NOT option qp = xapian.QueryParser() expect_query(qp.parse_query("NOT test", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT), "(<alldocuments> AND_NOT test@1)") # Check QueryParser partial option qp = xapian.QueryParser() qp.set_database(db) qp.set_default_op(xapian.Query.OP_AND) qp.set_stemming_strategy(qp.STEM_SOME) qp.set_stemmer(xapian.Stem("en")) expect_query(qp.parse_query("foo o", qp.FLAG_PARTIAL), "(Zfoo@1 AND ((out@2 SYNONYM outsid@2) OR Zo@2))") expect_query(qp.parse_query("foo outside", qp.FLAG_PARTIAL), "(Zfoo@1 AND Zoutsid@2)") # Test supplying unicode strings expect_query(xapian.Query(xapian.Query.OP_OR, (u"foo", u"bar")), "(foo OR bar)") expect_query(xapian.Query(xapian.Query.OP_OR, ("foo", u"bar\xa3")), "(foo OR bar\xc2\xa3)") expect_query(xapian.Query(xapian.Query.OP_OR, ("foo", "bar\xc2\xa3")), "(foo OR bar\xc2\xa3)") expect_query(xapian.Query(xapian.Query.OP_OR, u"foo", u"bar"), "(foo OR bar)") expect_query( qp.parse_query(u"NOT t\xe9st", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT), "(<alldocuments> AND_NOT Zt\xc3\xa9st@1)" ) doc = xapian.Document() doc.set_data(u"Unicode with an acc\xe9nt") doc.add_posting(stem(u"out\xe9r"), 1) expect(doc.get_data(), u"Unicode with an acc\xe9nt".encode("utf-8")) term = doc.termlist().next().term expect(term, u"out\xe9r".encode("utf-8")) # Check simple stopper stop = xapian.SimpleStopper() qp.set_stopper(stop) expect(stop("a"), False) expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2 AND Za@3)") stop.add("a") expect(stop("a"), True) expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2)") # Feature test for custom Stopper class my_b_stopper(xapian.Stopper): def __call__(self, term): return term == "b" def get_description(self): return u"my_b_stopper" stop = my_b_stopper() expect(stop.get_description(), u"my_b_stopper") qp.set_stopper(stop) expect(stop("a"), False) expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2 AND Za@3)") expect(stop("b"), True) expect_query(qp.parse_query(u"foo bar b", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2)") # Test TermGenerator termgen = xapian.TermGenerator() doc = xapian.Document() termgen.set_document(doc) termgen.index_text("foo bar baz foo") expect( [(item.term, item.wdf, [pos for pos in item.positer]) for item in doc.termlist()], [("bar", 1, [2]), ("baz", 1, [3]), ("foo", 2, [1, 4])], ) # Check DateValueRangeProcessor works context("checking that DateValueRangeProcessor works") qp = xapian.QueryParser() vrpdate = xapian.DateValueRangeProcessor(1, 1, 1960) qp.add_valuerangeprocessor(vrpdate) query = qp.parse_query("12/03/99..12/04/01") expect(str(query), "Query(0 * VALUE_RANGE 1 19991203 20011204)") # Regression test for bug#193, fixed in 1.0.3. context("running regression test for bug#193") vrp = xapian.NumberValueRangeProcessor(0, "$", True) a = "$10" b = "20" slot, a, b = vrp(a, b) expect(slot, 0) expect(xapian.sortable_unserialise(a), 10) expect(xapian.sortable_unserialise(b), 20) # Regression tests copied from PHP (probably always worked in python, but # let's check...) context("running regression tests for issues which were found in PHP") # PHP overload resolution involving boolean types failed. enq.set_sort_by_value(1, True) # Regression test - fixed in 0.9.10.1. oqparser = xapian.QueryParser() oquery = oqparser.parse_query("I like tea") # Regression test for bug#192 - fixed in 1.0.3. enq.set_cutoff(100) # Test setting and getting metadata expect(db.get_metadata("Foo"), "") db.set_metadata("Foo", "Foo") expect(db.get_metadata("Foo"), "Foo") expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, "") expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.set_metadata, "", "Foo") expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, "") # Test OP_SCALE_WEIGHT and corresponding constructor expect_query(xapian.Query(xapian.Query.OP_SCALE_WEIGHT, xapian.Query("foo"), 5), "5 * foo")
def query(self, querystring=None, qtype=None, begin=None, end=None, keywords=[], hashtags=[], synonymslist=[], emotiononly=False): if qtype == 'hy': self.qp.add_valuerangeprocessor( xapian.NumberValueRangeProcessor(self.timestampvi, '')) querystring = begin + '..' + end if emotiononly: self.qp.add_valuerangeprocessor( xapian.NumberValueRangeProcessor(self.emotiononlyvi, 'f', False)) querystring += ' 1.0..1.0f' query = self.qp.parse_query(querystring) print "Parsed query is: %s" % [str(query)] self.enquire.set_query(query) #matches = self.enquire.get_mset(0, self.maxitems) matches = self.enquire.get_mset(0, 10000) # Display the results. print "%i results found." % matches.size() if not self.lowkeywords_proc(matches): return emotions_list, keywords_list = self.keywords_and_emotions_list_proc( matches) return emotions_list, keywords_list if qtype == 'yq': self.qp.add_valuerangeprocessor( xapian.NumberValueRangeProcessor(self.timestampvi, '')) querystring = begin + '..' + end query = self.qp.parse_query(querystring) print "Parsed query is: %s" % [str(query)] self.enquire.set_query(query) #matches = self.enquire.get_mset(0,10) matches = self.enquire.get_mset(0, self.maxitems) # Display the results. print "%i results found." % matches.size() keywords_arr = [] for m in matches: #hashtag hashtags = json.loads(m.document.get_value(self.hashtagsvi)) #keywords keywords_hash = json.loads( m.document.get_value(self.keywordsvi)) keywords_arr.append(keywords_hash) #keywords_counter += Counter(json.loads(m.document.get_value(self.keywordsvi))) print 'mapreduce begin: ', str( time.strftime("%H:%M:%S", time.gmtime())) mapper = SimpleMapReduce(hasharr_to_list, count_words) word_counts = mapper(keywords_arr) keywords_hash = {} for word, count in word_counts: keywords_hash[word] = count for synonyms in synonymslist: if len(synonyms) >= 2 and synonyms[0] in keywords_hash: for word in synonyms[1:]: if word in keywords_hash: keywords_hash[synonyms[0]] += keywords_hash[word] del keywords_hash[word] print 'mapreduce end: ', str( time.strftime("%H:%M:%S", time.gmtime())) #print keywords_counter return hashtags, keywords_hash if qtype == 'lh': self.qp.add_valuerangeprocessor( xapian.NumberValueRangeProcessor(self.timestampvi, '')) timequerystr = begin + '..' + end timequery = self.qp.parse_query(timequerystr) hashtags = ['H' + hashtag.lower() for hashtag in hashtags] keywords = [keyword.lower() for keyword in keywords] keywords.extend(hashtags) if len(keywords) > 0: wordsquery = xapian.Query(xapian.Query.OP_OR, keywords) else: return None query = xapian.Query(xapian.Query.OP_AND, [timequery, wordsquery]) print "Parsed query is: %s" % [str(query)] self.enquire.set_query(query) self.enquire.set_sort_by_value(self.timestampvi, False) #matches = self.enquire.get_mset(0,10) matches = self.enquire.get_mset(0, self.maxitems) # Display the results. print "%i results found." % matches.size() results = [] for m in matches: result = {} result['location'] = m.document.get_value(self.loctvi) result['repost_location'] = m.document.get_value( self.reploctvi) result['timestamp'] = xapian.sortable_unserialise( m.document.get_value(self.timestampvi)) results.append(result) return results
import heapq import os import sys import xapian sys.path.insert(0, "../") from softwarecenter.enums import * from softwarecenter.utils import * if __name__ == "__main__": topn = 20 if len(sys.argv) > 1: topn = int(sys.argv[1]) pathname = os.path.join(XAPIAN_BASE_PATH, "xapian") db = xapian.Database(pathname) heap = [] for m in db.postlist(""): doc = db.get_document(m.docid) pkgname = doc.get_value(XAPIAN_VALUE_PKGNAME) appname = doc.get_value(XAPIAN_VALUE_APPNAME) summary = doc.get_value(XAPIAN_VALUE_SUMMARY) popcon = xapian.sortable_unserialise(doc.get_value(XAPIAN_VALUE_POPCON)) heapq.heappush(heap, (popcon, appname, pkgname, summary)) for (popcon, appname, pkgname, summary) in heapq.nlargest(topn, heap): print "[%i] %s - %s [%s]" % (popcon, appname, summary, pkgname)
def test_all(): # Test the version number reporting functions give plausible results. v = "%d.%d.%d" % (xapian.major_version(), xapian.minor_version(), xapian.revision()) v2 = xapian.version_string() expect(v2, v, "Unexpected version output") # A regexp check would be better, but seems to create a bogus "leak" of -1 # objects in Python 3. expect(len(xapian.__version__.split('.')), 3, 'xapian.__version__ not X.Y.Z') expect((xapian.__version__.split('.'))[0], '1', 'xapian.__version__ not "1.Y.Z"') def access_cvar(): res = xapian.cvar print("Unhandled constants: ", res) return res # Check that SWIG isn't generating cvar (regression test for ticket#297). expect_exception(AttributeError, "'module' object has no attribute 'cvar'", access_cvar) stem = xapian.Stem(b"english") expect(str(stem), "Xapian::Stem(english)", "Unexpected str(stem)") doc = xapian.Document() doc.set_data(b"a\0b") if doc.get_data() == b"a": raise TestFail("get_data+set_data truncates at a zero byte") expect(doc.get_data(), b"a\0b", "get_data+set_data doesn't transparently handle a zero byte") doc.set_data(b"is there anybody out there?") doc.add_term(b"XYzzy") doc.add_posting(stem(b"is"), 1) doc.add_posting(stem(b"there"), 2) doc.add_posting(stem(b"anybody"), 3) doc.add_posting(stem(b"out"), 4) doc.add_posting(stem(b"there"), 5) db = xapian.inmemory_open() db.add_document(doc) expect(db.get_doccount(), 1, "Unexpected db.get_doccount()") terms = ["smoke", "test", "terms"] expect_query(xapian.Query(xapian.Query.OP_OR, [t.encode('utf-8') for t in terms]), "(smoke OR test OR terms)") query1 = xapian.Query(xapian.Query.OP_PHRASE, (b"smoke", b"test", b"tuple")) query2 = xapian.Query(xapian.Query.OP_XOR, (xapian.Query(b"smoke"), query1, b"string")) expect_query(query1, "(smoke PHRASE 3 test PHRASE 3 tuple)") expect_query(query2, "(smoke XOR (smoke PHRASE 3 test PHRASE 3 tuple) XOR string)") subqs = ["a", "b"] expect_query(xapian.Query(xapian.Query.OP_OR, [s.encode('utf-8') for s in subqs]), "(a OR b)") expect_query(xapian.Query(xapian.Query.OP_VALUE_RANGE, 0, b'1', b'4'), "VALUE_RANGE 0 1 4") # Check database factory functions are wrapped as expected (or not wrapped # in the first cases): expect_exception(AttributeError, "'module' object has no attribute 'open_stub'", lambda : xapian.open_stub(b"nosuchdir/nosuchdb")) expect_exception(AttributeError, "'module' object has no attribute 'open_stub'", lambda : xapian.open_stub(b"nosuchdir/nosuchdb", xapian.DB_OPEN)) expect_exception(AttributeError, "'module' object has no attribute 'chert_open'", lambda : xapian.chert_open(b"nosuchdir/nosuchdb")) expect_exception(AttributeError, "'module' object has no attribute 'chert_open'", lambda : xapian.chert_open(b"nosuchdir/nosuchdb", xapian.DB_CREATE)) expect_exception(xapian.DatabaseOpeningError, None, lambda : xapian.Database(b"nosuchdir/nosuchdb", xapian.DB_BACKEND_STUB)) expect_exception(xapian.DatabaseOpeningError, None, lambda : xapian.WritableDatabase(b"nosuchdir/nosuchdb", xapian.DB_OPEN|xapian.DB_BACKEND_STUB)) expect_exception(xapian.DatabaseOpeningError, None, lambda : xapian.Database(b"nosuchdir/nosuchdb", xapian.DB_BACKEND_GLASS)) expect_exception(xapian.DatabaseCreateError, None, lambda : xapian.WritableDatabase(b"nosuchdir/nosuchdb", xapian.DB_CREATE|xapian.DB_BACKEND_GLASS)) expect_exception(xapian.DatabaseOpeningError, None, lambda : xapian.Database(b"nosuchdir/nosuchdb", xapian.DB_BACKEND_CHERT)) expect_exception(xapian.DatabaseCreateError, None, lambda : xapian.WritableDatabase(b"nosuchdir/nosuchdb", xapian.DB_CREATE|xapian.DB_BACKEND_CHERT)) expect_exception(xapian.NetworkError, None, xapian.remote_open, b"/bin/false", b"") expect_exception(xapian.NetworkError, None, xapian.remote_open_writable, b"/bin/false", b"") expect_exception(xapian.NetworkError, None, xapian.remote_open, b"127.0.0.1", 0, 1) expect_exception(xapian.NetworkError, None, xapian.remote_open_writable, b"127.0.0.1", 0, 1) # Check wrapping of MatchAll and MatchNothing: expect_query(xapian.Query.MatchAll, "<alldocuments>") expect_query(xapian.Query.MatchNothing, "") # Feature test for Query.__iter__ term_count = 0 for term in query2: term_count += 1 expect(term_count, 4, "Unexpected number of terms in query2") enq = xapian.Enquire(db) enq.set_query(xapian.Query(xapian.Query.OP_OR, b"there", b"is")) mset = enq.get_mset(0, 10) expect(mset.size(), 1, "Unexpected mset.size()") expect(len(mset), 1, "Unexpected mset.size()") # Feature test for Enquire.matching_terms(docid) term_count = 0 for term in enq.matching_terms(mset.get_hit(0)): term_count += 1 expect(term_count, 2, "Unexpected number of matching terms") # Feature test for MSet.__iter__ msize = 0 for match in mset: msize += 1 expect(msize, mset.size(), "Unexpected number of entries in mset") terms = b" ".join(enq.matching_terms(mset.get_hit(0))) expect(terms, b"is there", "Unexpected terms") # Feature test for ESet.__iter__ rset = xapian.RSet() rset.add_document(1) eset = enq.get_eset(10, rset) term_count = 0 for term in eset: term_count += 1 expect(term_count, 3, "Unexpected number of expand terms") # Feature test for Database.__iter__ term_count = 0 for term in db: term_count += 1 expect(term_count, 5, "Unexpected number of terms in db") # Feature test for Database.allterms term_count = 0 for term in db.allterms(): term_count += 1 expect(term_count, 5, "Unexpected number of terms in db.allterms") # Feature test for Database.postlist count = 0 for posting in db.postlist(b"there"): count += 1 expect(count, 1, "Unexpected number of entries in db.postlist('there')") # Feature test for Database.postlist with empty term (alldocspostlist) count = 0 for posting in db.postlist(b""): count += 1 expect(count, 1, "Unexpected number of entries in db.postlist('')") # Feature test for Database.termlist count = 0 for term in db.termlist(1): count += 1 expect(count, 5, "Unexpected number of entries in db.termlist(1)") # Feature test for Database.positionlist count = 0 for term in db.positionlist(1, b"there"): count += 1 expect(count, 2, "Unexpected number of entries in db.positionlist(1, 'there')") # Feature test for Document.termlist count = 0 for term in doc.termlist(): count += 1 expect(count, 5, "Unexpected number of entries in doc.termlist()") # Feature test for TermIter.skip_to term = doc.termlist() term.skip_to(b'n') while True: try: x = next(term) except StopIteration: break if x.term < b'n': raise TestFail("TermIter.skip_to didn't skip term '%s'" % x.term.decode('utf-8')) # Feature test for Document.values count = 0 for term in list(doc.values()): count += 1 expect(count, 0, "Unexpected number of entries in doc.values") # Check exception handling for Xapian::DocNotFoundError expect_exception(xapian.DocNotFoundError, "Docid 3 not found", db.get_document, 3) # Check value of OP_ELITE_SET expect(xapian.Query.OP_ELITE_SET, 10, "Unexpected value for OP_ELITE_SET") # Feature test for MatchDecider doc = xapian.Document() doc.set_data(b"Two") doc.add_posting(stem(b"out"), 1) doc.add_posting(stem(b"outside"), 1) doc.add_posting(stem(b"source"), 2) doc.add_value(0, b"yes") db.add_document(doc) class testmatchdecider(xapian.MatchDecider): def __call__(self, doc): return doc.get_value(0) == b"yes" query = xapian.Query(stem(b"out")) enquire = xapian.Enquire(db) enquire.set_query(query) mset = enquire.get_mset(0, 10, None, testmatchdecider()) expect(mset.size(), 1, "Unexpected number of documents returned by match decider") expect(mset.get_docid(0), 2, "MatchDecider mset has wrong docid in") # Feature test for ExpandDecider class testexpanddecider(xapian.ExpandDecider): def __call__(self, term): return (not term.startswith(b'a')) enquire = xapian.Enquire(db) rset = xapian.RSet() rset.add_document(1) eset = enquire.get_eset(10, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, testexpanddecider()) eset_terms = [item.term for item in eset] expect(len(eset_terms), eset.size(), "Unexpected number of terms returned by expand") if [t for t in eset_terms if t.startswith(b'a')]: raise TestFail("ExpandDecider was not used") # Check min_wt argument to get_eset() works (new in 1.2.5). eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ) expect([i.weight for i in eset][-1] < 1.9, True, "test get_eset() without min_wt") eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, None, 1.9) expect([i.weight for i in eset][-1] >= 1.9, True, "test get_eset() min_wt") # Check QueryParser parsing error. qp = xapian.QueryParser() expect_exception(xapian.QueryParserError, "Syntax: <expression> AND <expression>", qp.parse_query, b"test AND") # Check QueryParser pure NOT option qp = xapian.QueryParser() expect_query(qp.parse_query(b"NOT test", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT), "(<alldocuments> AND_NOT test@1)") # Check QueryParser partial option qp = xapian.QueryParser() qp.set_database(db) qp.set_default_op(xapian.Query.OP_AND) qp.set_stemming_strategy(qp.STEM_SOME) qp.set_stemmer(xapian.Stem(b'en')) expect_query(qp.parse_query(b"foo o", qp.FLAG_PARTIAL), "(Zfoo@1 AND ((out@2 SYNONYM outsid@2) OR Zo@2))") expect_query(qp.parse_query(b"foo outside", qp.FLAG_PARTIAL), "(Zfoo@1 AND Zoutsid@2)") # Test supplying unicode strings expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar')), '(foo OR bar)') expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar\xa3')), '(foo OR bar\\xa3)') expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar\xc2\xa3')), '(foo OR bar\u00a3)') expect_query(xapian.Query(xapian.Query.OP_OR, b'foo', b'bar'), '(foo OR bar)') expect_query(qp.parse_query(b"NOT t\xe9st", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT), "(<alldocuments> AND_NOT Zt\u00e9st@1)") doc = xapian.Document() doc.set_data(b"Unicode with an acc\xe9nt") doc.add_posting(stem(b"out\xe9r"), 1) expect(doc.get_data(), b"Unicode with an acc\xe9nt") term = next(doc.termlist()).term expect(term, b"out\xe9r") # Check simple stopper stop = xapian.SimpleStopper() qp.set_stopper(stop) expect(stop(b'a'), False) expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2 AND Za@3)") stop.add(b'a') expect(stop(b'a'), True) expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2)") # Feature test for custom Stopper class my_b_stopper(xapian.Stopper): def __call__(self, term): return term == b"b" def get_description(self): return "my_b_stopper" stop = my_b_stopper() expect(stop.get_description(), "my_b_stopper") qp.set_stopper(stop) expect(stop(b'a'), False) expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2 AND Za@3)") expect(stop(b'b'), True) expect_query(qp.parse_query(b"foo bar b", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2)") # Test TermGenerator termgen = xapian.TermGenerator() doc = xapian.Document() termgen.set_document(doc) termgen.index_text(b'foo bar baz foo') expect([(item.term, item.wdf, [pos for pos in item.positer]) for item in doc.termlist()], [(b'bar', 1, [2]), (b'baz', 1, [3]), (b'foo', 2, [1, 4])]) # Check DateValueRangeProcessor works context("checking that DateValueRangeProcessor works") qp = xapian.QueryParser() vrpdate = xapian.DateValueRangeProcessor(1, 1, 1960) qp.add_valuerangeprocessor(vrpdate) query = qp.parse_query(b'12/03/99..12/04/01') expect(str(query), 'Query(0 * VALUE_RANGE 1 19991203 20011204)') # Regression test for bug#193, fixed in 1.0.3. context("running regression test for bug#193") vrp = xapian.NumberValueRangeProcessor(0, b'$', True) a = '$10' b = '20' slot, a, b = vrp(a, b.encode('utf-8')) expect(slot, 0) expect(xapian.sortable_unserialise(a), 10) expect(xapian.sortable_unserialise(b), 20) # Feature test for xapian.FieldProcessor context("running feature test for xapian.FieldProcessor") class testfieldprocessor(xapian.FieldProcessor): def __call__(self, s): if s == 'spam': raise Exception('already spam') return xapian.Query("spam") qp.add_prefix('spam', testfieldprocessor()) qp.add_boolean_prefix('boolspam', testfieldprocessor()) query = qp.parse_query('spam:ignored') expect(str(query), 'Query(spam)') # FIXME: This doesn't currently work: # expect_exception(Exception, 'already spam', qp.parse_query, 'spam:spam') # Regression tests copied from PHP (probably always worked in python, but # let's check...) context("running regression tests for issues which were found in PHP") # PHP overload resolution involving boolean types failed. enq.set_sort_by_value(1, True) # Regression test - fixed in 0.9.10.1. oqparser = xapian.QueryParser() oquery = oqparser.parse_query(b"I like tea") # Regression test for bug#192 - fixed in 1.0.3. enq.set_cutoff(100) # Test setting and getting metadata expect(db.get_metadata(b'Foo'), b'') db.set_metadata(b'Foo', b'Foo') expect(db.get_metadata(b'Foo'), b'Foo') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, b'') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.set_metadata, b'', b'Foo') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, b'') # Test OP_SCALE_WEIGHT and corresponding constructor expect_query(xapian.Query(xapian.Query.OP_SCALE_WEIGHT, xapian.Query(b'foo'), 5), "5 * foo")
def test_all(): # Test the version number reporting functions give plausible results. v = "%d.%d.%d" % (xapian.major_version(), xapian.minor_version(), xapian.revision()) v2 = xapian.version_string() expect(v2, v, "Unexpected version output") stem = xapian.Stem("english") expect(stem.get_description(), "Xapian::Stem(english)", "Unexpected stem.get_description()") doc = xapian.Document() doc.set_data("a\0b") if doc.get_data() == "a": raise TestFail("get_data+set_data truncates at a zero byte") expect(doc.get_data(), "a\0b", "get_data+set_data doesn't transparently handle a zero byte") doc.set_data("is there anybody out there?") doc.add_term("XYzzy") doc.add_posting(stem("is"), 1) doc.add_posting(stem("there"), 2) doc.add_posting(stem("anybody"), 3) doc.add_posting(stem("out"), 4) doc.add_posting(stem("there"), 5) db = xapian.inmemory_open() db.add_document(doc) expect(db.get_doccount(), 1, "Unexpected db.get_doccount()") terms = ["smoke", "test", "terms"] expect_query(xapian.Query(xapian.Query.OP_OR, terms), "(smoke OR test OR terms)") query1 = xapian.Query(xapian.Query.OP_PHRASE, ("smoke", "test", "tuple")) query2 = xapian.Query(xapian.Query.OP_XOR, (xapian.Query("smoke"), query1, "string")) expect_query(query1, "(smoke PHRASE 3 test PHRASE 3 tuple)") expect_query(query2, "(smoke XOR (smoke PHRASE 3 test PHRASE 3 tuple) XOR string)") subqs = ["a", "b"] expect_query(xapian.Query(xapian.Query.OP_OR, subqs), "(a OR b)") expect_query(xapian.Query(xapian.Query.OP_VALUE_RANGE, 0, '1', '4'), "VALUE_RANGE 0 1 4") # Feature test for Query.__iter__ term_count = 0 for term in query2: term_count += 1 expect(term_count, 4, "Unexpected number of terms in query2") enq = xapian.Enquire(db) enq.set_query(xapian.Query(xapian.Query.OP_OR, "there", "is")) mset = enq.get_mset(0, 10) expect(mset.size(), 1, "Unexpected mset.size()") # Feature test for Enquire.matching_terms(docid) term_count = 0 for term in enq.matching_terms(mset.get_hit(0)): term_count += 1 expect(term_count, 2, "Unexpected number of matching terms") # Feature test for MSet.__iter__ msize = 0 for match in mset: msize += 1 expect(msize, mset.size(), "Unexpected number of entries in mset") terms = " ".join(enq.matching_terms(mset.get_hit(0))) expect(terms, "is there", "Unexpected terms") # Feature test for ESet.__iter__ rset = xapian.RSet() rset.add_document(1) eset = enq.get_eset(10, rset) term_count = 0 for term in eset: term_count += 1 expect(term_count, 3, "Unexpected number of expand terms") # Feature test for Database.__iter__ term_count = 0 for term in db: term_count += 1 expect(term_count, 5, "Unexpected number of terms in db") # Feature test for Database.allterms term_count = 0 for term in db.allterms(): term_count += 1 expect(term_count, 5, "Unexpected number of terms in db.allterms") # Feature test for Database.postlist count = 0 for posting in db.postlist("there"): count += 1 expect(count, 1, "Unexpected number of entries in db.postlist('there')") # Feature test for Database.postlist with empty term (alldocspostlist) count = 0 for posting in db.postlist(""): count += 1 expect(count, 1, "Unexpected number of entries in db.postlist('')") # Feature test for Database.termlist count = 0 for term in db.termlist(1): count += 1 expect(count, 5, "Unexpected number of entries in db.termlist(1)") # Feature test for Database.positionlist count = 0 for term in db.positionlist(1, "there"): count += 1 expect(count, 2, "Unexpected number of entries in db.positionlist(1, 'there')") # Feature test for Document.termlist count = 0 for term in doc.termlist(): count += 1 expect(count, 5, "Unexpected number of entries in doc.termlist()") # Feature test for TermIter.skip_to term = doc.termlist() term.skip_to('n') while True: try: x = term.next() except StopIteration: break if x.term < 'n': raise TestFail("TermIter.skip_to didn't skip term '%s'" % x.term) # Feature test for Document.values count = 0 for term in doc.values(): count += 1 expect(count, 0, "Unexpected number of entries in doc.values") # Check exception handling for Xapian::DocNotFoundError expect_exception(xapian.DocNotFoundError, "Docid 3 not found", db.get_document, 3) # Check value of OP_ELITE_SET expect(xapian.Query.OP_ELITE_SET, 10, "Unexpected value for OP_ELITE_SET") # Feature test for MatchDecider doc = xapian.Document() doc.set_data("Two") doc.add_posting(stem("out"), 1) doc.add_posting(stem("outside"), 1) doc.add_posting(stem("source"), 2) doc.add_value(0, "yes") db.add_document(doc) class testmatchdecider(xapian.MatchDecider): def __call__(self, doc): return doc.get_value(0) == "yes" query = xapian.Query(stem("out")) enquire = xapian.Enquire(db) enquire.set_query(query) mset = enquire.get_mset(0, 10, None, testmatchdecider()) expect(mset.size(), 1, "Unexpected number of documents returned by match decider") expect(mset.get_docid(0), 2, "MatchDecider mset has wrong docid in") # Feature test for ExpandDecider class testexpanddecider(xapian.ExpandDecider): def __call__(self, term): return (not term.startswith('a')) enquire = xapian.Enquire(db) rset = xapian.RSet() rset.add_document(1) eset = enquire.get_eset(10, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, testexpanddecider()) eset_terms = [term[xapian.ESET_TNAME] for term in eset.items] expect(len(eset_terms), eset.size(), "Unexpected number of terms returned by expand") if filter(lambda t: t.startswith('a'), eset_terms): raise TestFail("ExpandDecider was not used") # Check QueryParser parsing error. qp = xapian.QueryParser() expect_exception(xapian.QueryParserError, "Syntax: <expression> AND <expression>", qp.parse_query, "test AND") # Check QueryParser pure NOT option qp = xapian.QueryParser() expect_query(qp.parse_query("NOT test", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT), "(<alldocuments> AND_NOT test:(pos=1))") # Check QueryParser partial option qp = xapian.QueryParser() qp.set_database(db) qp.set_default_op(xapian.Query.OP_AND) qp.set_stemming_strategy(qp.STEM_SOME) qp.set_stemmer(xapian.Stem('en')) expect_query(qp.parse_query("foo o", qp.FLAG_PARTIAL), "(Zfoo:(pos=1) AND (out:(pos=2) OR outsid:(pos=2) OR Zo:(pos=2)))") expect_query(qp.parse_query("foo outside", qp.FLAG_PARTIAL), "(Zfoo:(pos=1) AND Zoutsid:(pos=2))") # Test supplying unicode strings expect_query(xapian.Query(xapian.Query.OP_OR, (u'foo', u'bar')), '(foo OR bar)') expect_query(xapian.Query(xapian.Query.OP_OR, ('foo', u'bar\xa3')), '(foo OR bar\xc2\xa3)') expect_query(xapian.Query(xapian.Query.OP_OR, ('foo', 'bar\xc2\xa3')), '(foo OR bar\xc2\xa3)') expect_query(xapian.Query(xapian.Query.OP_OR, u'foo', u'bar'), '(foo OR bar)') expect_query(qp.parse_query(u"NOT t\xe9st", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT), "(<alldocuments> AND_NOT Zt\xc3\xa9st:(pos=1))") doc = xapian.Document() doc.set_data(u"Unicode with an acc\xe9nt") doc.add_posting(stem(u"out\xe9r"), 1) expect(doc.get_data(), u"Unicode with an acc\xe9nt".encode('utf-8')) term = doc.termlist().next().term expect(term, u"out\xe9r".encode('utf-8')) # Check simple stopper stop = xapian.SimpleStopper() qp.set_stopper(stop) expect(stop('a'), False) expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo:(pos=1) AND Zbar:(pos=2) AND Za:(pos=3))") stop.add('a') expect(stop('a'), True) expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo:(pos=1) AND Zbar:(pos=2))") # Feature test for custom Stopper class my_b_stopper(xapian.Stopper): def __call__(self, term): return term == "b" def get_description(self): return u"my_b_stopper" stop = my_b_stopper() expect(stop.get_description(), u"my_b_stopper") qp.set_stopper(stop) expect(stop('a'), False) expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo:(pos=1) AND Zbar:(pos=2) AND Za:(pos=3))") expect(stop('b'), True) expect_query(qp.parse_query(u"foo bar b", qp.FLAG_BOOLEAN), "(Zfoo:(pos=1) AND Zbar:(pos=2))") # Test TermGenerator termgen = xapian.TermGenerator() doc = xapian.Document() termgen.set_document(doc) termgen.index_text('foo bar baz foo') expect([(item.term, item.wdf, [pos for pos in item.positer]) for item in doc.termlist()], [('bar', 1, [2]), ('baz', 1, [3]), ('foo', 2, [1, 4])]) # Check DateValueRangeProcessor works context("checking that DateValueRangeProcessor works") qp = xapian.QueryParser() vrpdate = xapian.DateValueRangeProcessor(1, 1, 1960) qp.add_valuerangeprocessor(vrpdate) query = qp.parse_query('12/03/99..12/04/01') expect(str(query), 'Xapian::Query(VALUE_RANGE 1 19991203 20011204)') # Regression test for bug#193, fixed in 1.0.3. context("running regression test for bug#193") vrp = xapian.NumberValueRangeProcessor(0, '$', True) a = '$10' b = '20' slot, a, b = vrp(a, b) expect(slot, 0) expect(xapian.sortable_unserialise(a), 10) expect(xapian.sortable_unserialise(b), 20) # Regression tests copied from PHP (probably always worked in python, but # let's check...) context("running regression tests for issues which were found in PHP") # PHP overload resolution involving boolean types failed. enq.set_sort_by_value(1, True) # Regression test - fixed in 0.9.10.1. oqparser = xapian.QueryParser() oquery = oqparser.parse_query("I like tea") # Regression test for bug#192 - fixed in 1.0.3. enq.set_cutoff(100) # Test setting and getting metadata expect(db.get_metadata('Foo'), '') db.set_metadata('Foo', 'Foo') expect(db.get_metadata('Foo'), 'Foo') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, '') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.set_metadata, '', 'Foo') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, '') # Test OP_SCALE_WEIGHT and corresponding constructor expect_query(xapian.Query(xapian.Query.OP_SCALE_WEIGHT, xapian.Query('foo'), 5), "5 * foo")
def get_weight(self, doc): val = doc.get_value(self.field, self.purpose) val = xapian.sortable_unserialise(val) if val > self.maxval: return self.maxval return val
import heapq import os import sys import xapian sys.path.insert(0, "../") from softwarecenter.enums import XapianValues from softwarecenter.paths import XAPIAN_BASE_PATH if __name__ == "__main__": topn = 20 if len(sys.argv) > 1: topn = int(sys.argv[1]) pathname = os.path.join(XAPIAN_BASE_PATH, "xapian") db = xapian.Database(pathname) heap = [] for m in db.postlist(""): doc = db.get_document(m.docid) pkgname = doc.get_value(XapianValues.PKGNAME) appname = doc.get_value(XapianValues.APPNAME) summary = doc.get_value(XapianValues.SUMMARY) popcon = xapian.sortable_unserialise(doc.get_value( XapianValues.POPCON)) heapq.heappush(heap, (popcon, appname, pkgname, summary)) for (popcon, appname, pkgname, summary) in heapq.nlargest(topn, heap): print "[%i] %s - %s [%s]" % (popcon, appname, summary, pkgname)
def decode_sortable_date(r): t = -xapian.sortable_unserialise(r) if t == 0: return None return time.strftime(ISO_8601, time.localtime(t))
def popcon_max(self): popcon_max = xapian.sortable_unserialise(self.xapiandb.get_metadata("popcon_max_desktop")) assert popcon_max > 0 return popcon_max
import heapq import os import sys import xapian sys.path.insert(0, "../") from softwarecenter.enums import XapianValues from softwarecenter.paths import XAPIAN_BASE_PATH if __name__ == "__main__": topn = 20 if len(sys.argv) > 1: topn = int(sys.argv[1]) pathname = os.path.join(XAPIAN_BASE_PATH, "xapian") db = xapian.Database(pathname) heap = [] for m in db.postlist(""): doc = db.get_document(m.docid) pkgname = doc.get_value(XapianValues.PKGNAME) appname = doc.get_value(XapianValues.APPNAME) summary = doc.get_value(XapianValues.SUMMARY) popcon = xapian.sortable_unserialise(doc.get_value(XapianValues.POPCON)) heapq.heappush(heap, (popcon, appname, pkgname, summary)) for (popcon, appname, pkgname, summary) in heapq.nlargest(topn, heap): print "[%i] %s - %s [%s]" % (popcon, appname, summary, pkgname)
#parser.set_stemming_strategy(xapian.QueryParser.STEM_ALL) parser.set_database(db) #parser.add_prefix("pkg", "AP") query = parser.parse_query( search_term, xapian.QueryParser.FLAG_PARTIAL | xapian.QueryParser.FLAG_WILDCARD) enquire = xapian.Enquire(db) enquire.set_sort_by_value_then_relevance(XapianValues.POPCON) enquire.set_query(query) matches = enquire.get_mset(0, db.get_doccount()) print "Matches:" for m in matches: doc = m.document popcon = doc.get_value(XapianValues.POPCON) print doc.get_data(), "popcon:", xapian.sortable_unserialise(popcon) #for t in doc.termlist(): # print "'%s': %s (%s); " % (t.term, t.wdf, t.termfreq), #print "\n" appname = doc.get_data() # calculate a eset print "ESet:" rset = xapian.RSet() for m in matches: rset.add_document(m.docid) for m in enquire.get_eset(10, rset): print m.term # calulate the expansions completions = []
def popcon_max(self): popcon_max = xapian.sortable_unserialise( self.xapiandb.get_metadata("popcon_max_desktop")) assert popcon_max > 0 return popcon_max
def query(self, querystring=None, qtype=None, begin=None, end=None, keywords=[], hashtags=[], synonymslist=[], emotiononly=False): if qtype == 'hy': self.qp.add_valuerangeprocessor(xapian.NumberValueRangeProcessor(self.timestampvi, '')) querystring = begin + '..' + end if emotiononly: self.qp.add_valuerangeprocessor(xapian.NumberValueRangeProcessor(self.emotiononlyvi, 'f', False)) querystring += ' 1.0..1.0f' query = self.qp.parse_query(querystring) print "Parsed query is: %s" % [str(query)] self.enquire.set_query(query) #matches = self.enquire.get_mset(0, self.maxitems) matches = self.enquire.get_mset(0, 10000) # Display the results. print "%i results found." % matches.size() if not self.lowkeywords_proc(matches): return emotions_list, keywords_list = self.keywords_and_emotions_list_proc(matches) return emotions_list, keywords_list if qtype == 'yq': self.qp.add_valuerangeprocessor(xapian.NumberValueRangeProcessor(self.timestampvi, '')) querystring = begin + '..' + end query = self.qp.parse_query(querystring) print "Parsed query is: %s" % [str(query)] self.enquire.set_query(query) #matches = self.enquire.get_mset(0,10) matches = self.enquire.get_mset(0, self.maxitems) # Display the results. print "%i results found." % matches.size() keywords_arr = [] for m in matches: #hashtag hashtags = json.loads(m.document.get_value(self.hashtagsvi)) #keywords keywords_hash = json.loads(m.document.get_value(self.keywordsvi)) keywords_arr.append(keywords_hash) #keywords_counter += Counter(json.loads(m.document.get_value(self.keywordsvi))) print 'mapreduce begin: ', str(time.strftime("%H:%M:%S", time.gmtime())) mapper = SimpleMapReduce(hasharr_to_list, count_words) word_counts = mapper(keywords_arr) keywords_hash = {} for word, count in word_counts: keywords_hash[word] = count for synonyms in synonymslist: if len(synonyms) >= 2 and synonyms[0] in keywords_hash: for word in synonyms[1:]: if word in keywords_hash: keywords_hash[synonyms[0]] += keywords_hash[word] del keywords_hash[word] print 'mapreduce end: ', str(time.strftime("%H:%M:%S", time.gmtime())) #print keywords_counter return hashtags, keywords_hash if qtype == 'lh': self.qp.add_valuerangeprocessor(xapian.NumberValueRangeProcessor(self.timestampvi, '')) timequerystr = begin + '..' + end timequery = self.qp.parse_query(timequerystr) hashtags = ['H' + hashtag.lower() for hashtag in hashtags] keywords = [keyword.lower() for keyword in keywords] keywords.extend(hashtags) if len(keywords) > 0: wordsquery = xapian.Query(xapian.Query.OP_OR, keywords) else: return None query = xapian.Query(xapian.Query.OP_AND, [timequery, wordsquery]) print "Parsed query is: %s" % [str(query)] self.enquire.set_query(query) self.enquire.set_sort_by_value(self.timestampvi, False) #matches = self.enquire.get_mset(0,10) matches = self.enquire.get_mset(0, self.maxitems) # Display the results. print "%i results found." % matches.size() results = [] for m in matches: result = {} result['location'] = m.document.get_value(self.loctvi) result['repost_location'] = m.document.get_value(self.reploctvi) result['timestamp'] = xapian.sortable_unserialise(m.document.get_value(self.timestampvi)) results.append(result) return results
def xapian_search(request): if not xapian_avail: return HttpResponse('{}') search_string = request.GET.get('keywords', None) try: search_limit = int(request.GET.get('slimit', str(settings.MAX_MATCHES_SEARCH))) except: search_limit = settings.MAX_MATCHES_SEARCH try: view_limit = int(request.GET.get('vlimit', str(settings.MAX_MATCHES_SHOW))) except: view_limit = settings.MAX_MATCHES_SHOW offset = int(request.GET.get('offset', 0)) search_max_result_length = int(request.GET.get('maxchars', '0')) zoomlevel = int(request.GET.get('zoomlevel', 12)) cluster = float(request.GET.get('cluster', 0)) details = str(request.GET.get('details', "no")) from_date = str(request.GET.get('datefrom', None)) to_date = str(request.GET.get('dateto', None)) seqnum= str(request.GET.get('seqnum', 0)) coords = { 'x1': float(request.GET.get('x1', 0)), 'y1': float(request.GET.get('y1', 0)), 'x2': float(request.GET.get('x2', 0)), 'y2': float(request.GET.get('y2', 0)), } statuses = None if 'status' in request.GET: statuses = [int(status) for status in request.GET.getlist('status')] tags = None if 'tag' in request.GET: tags = [ "_key_"+str(tag) for tag in request.GET.getlist('tag')] elif 'no_tags' in request.GET: tags = [ "_no_key" ] request.session['mapzoomlevel'] = zoomlevel request.session['mapcenterlat'] = ( coords['y1'] + coords['y2'] ) / 2 request.session['mapcenterlng'] = ( coords['x1'] + coords['x2'] ) / 2 if int(request.GET.get('nosearch', 0)) > 0: return HttpResponse('{}') if from_date is not "None": date_array = from_date.split('/') if len(date_array) > 2: if int(date_array[2]+date_array[1]+date_array[0]) > 20090101: request.session['filter_from_date'] = str( date_array[1]+"/"+date_array[0]+"/"+date_array[2] ) else: request.session['filter_from_date'] = "01/01/2009" else: from_date="01/01/2009" if to_date is not "None": date_array = to_date.split('/') if len(date_array) > 2: if int(date_array[2]+date_array[1]+date_array[0]) < int( datetime.now().strftime('%y%m%d')): request.session['filter_to_date'] = str( date_array[1]+"/"+date_array[0]+"/"+date_array[2] ) else: request.session['filter_to_date'] = str( datetime.now().strftime('%m/%d/%Y')) else: to_date=str( datetime.now().strftime('%d/%m/%Y')) if search_string: request.session['filter_words'] = search_string else: request.session['filter_words'] = '' returnarray = [] # database = xapian.Database(settings.XAPIAN_MSG_DATABASE_HOME) database = xapian.Database( os.path.join(settings.TRACKER_HOME, 'db/' 'xapian-msg-index/')) enquire = xapian.Enquire(database) # First we'll restrict the search space by geographical data: lng_keywords = [] if coords['x1'] != 0 and coords['x2'] != 0: for keyword in get_latlng_keywords(coords['x1'],coords['x2']): lng_keywords.append("_glngrange_"+keyword) lng_query = xapian.Query(xapian.Query.OP_VALUE_RANGE, settings.XAPIAN_LONGITUDE_VALUE, xapian.sortable_serialise(coords['x1']), xapian.sortable_serialise(coords['x2'])) else: lng_query = None lat_keywords = [] if coords['y1']!= 0 and coords['y2'] != 0: for keyword in get_latlng_keywords(coords['y1'],coords['y2']): lat_keywords.append("_glatrange_"+keyword) lat_query = xapian.Query(xapian.Query.OP_VALUE_RANGE, settings.XAPIAN_LATITUDE_VALUE, xapian.sortable_serialise(coords['y1']), xapian.sortable_serialise(coords['y2'])) else: lat_query = None if lat_query and lng_query: lat_wordquery = xapian.Query(xapian.Query.OP_OR, lat_keywords) lng_wordquery = xapian.Query(xapian.Query.OP_OR, lng_keywords) place_wordquery = xapian.Query(xapian.Query.OP_AND, lat_wordquery, lng_wordquery) place_query = xapian.Query(xapian.Query.OP_AND, lat_query, lng_query) type_query = xapian.Query(xapian.Query.OP_AND, ["_place"]) place_query = xapian.Query(xapian.Query.OP_AND, type_query, place_query) query = xapian.Query(xapian.Query.OP_AND, place_wordquery, place_query) enquire.set_query(query) matches = enquire.get_mset(offset, search_limit+offset) total_matches=matches.get_matches_estimated() screen_id_counter = 0 # Filter places with keywords and date: word_issues = None terms = None if search_string or from_date or to_date or tags: word_issues = {} issue_query = xapian.Query(xapian.Query.OP_AND, ["_issue"]) if search_string: stemmer = xapian.Stem("finnish") tmp_terms = re.split (r'[\n-/:-?]', to_lower_case(search_string)) terms = [] for term in tmp_terms: if len(term) > 0 and not is_stopword(term): stemmed = stemmer(term) # if stemmed not in variables.stoplist: terms.append(stemmer(to_lower_case(term))) keyword_query = xapian.Query(xapian.Query.OP_OR, terms) issue_query = xapian.Query(xapian.Query.OP_AND, issue_query, keyword_query) if from_date or to_date: if from_date == "None": from_date = str("19790626095523.000"); else: from_array = from_date.split('/') from_date = str(from_array[2]+from_array[1]+from_array[0]+"000000.000"); if to_date == "None" : to_date = str( datetime.now().strftime('%Y%m%d%H%M%S.000') ); else: to_array = to_date.split('/') to_date = str(to_array[2]+to_array[1]+to_array[0]+"235959.999"); # to_date_date = datetime(to_date) # to_date= to_date_date.strftime('%Y%m%d%H%M%S.000') modified_query = xapian.Query(xapian.Query.OP_VALUE_RANGE, settings.XAPIAN_MODIFIED_FIELD, xapian.sortable_serialise(float(from_date)), xapian.sortable_serialise(float(to_date)) ) created_query = xapian.Query(xapian.Query.OP_VALUE_RANGE, settings.XAPIAN_CREATED_FIELD, xapian.sortable_serialise(float(from_date)), xapian.sortable_serialise(float(to_date)) ) date_query = xapian.Query(xapian.Query.OP_OR, created_query, modified_query) issue_query = xapian.Query(xapian.Query.OP_AND, issue_query, date_query) if tags: tag_query = xapian.Query(xapian.Query.OP_AND, tags) issue_query = xapian.Query(xapian.Query.OP_AND, issue_query, tag_query) enquire.set_query(issue_query) word_matches = enquire.get_mset(offset, view_limit) total_matches=word_matches.get_matches_estimated() if total_matches == 0: word_issues = { "-1" : 1} else: for m in word_matches: word_issues[str(m[xapian.MSET_DOCUMENT].get_value(settings.XAPIAN_ID_FIELD))]=1 if lat_query and lng_query: places = {} if word_issues: for m in matches: issue = m[xapian.MSET_DOCUMENT].get_value(settings.XAPIAN_PARENT_ISSUE_FIELD) if word_issues.has_key(issue): y = float(xapian.sortable_unserialise(m[xapian.MSET_DOCUMENT].get_value(settings.XAPIAN_LATITUDE_VALUE))) x = float(xapian.sortable_unserialise(m[xapian.MSET_DOCUMENT].get_value(settings.XAPIAN_LONGITUDE_VALUE))) # if y >= coords['y1'] and y <= coords['y2'] and x >= coords['x1'] and x <= coords['x2']: places[ (m[xapian.MSET_DOCUMENT].get_value(settings.XAPIAN_PARENT_ISSUE_FIELD)) +":"+(m[xapian.MSET_DOCUMENT].get_value(settings.XAPIAN_PARENT_MESSAGE_FIELD)) +":"+(m[xapian.MSET_DOCUMENT].get_value(settings.XAPIAN_ID_FIELD)) +":"+str(m[xapian.MSET_DOCUMENT].get_data()) ] = [y, x] else: for m in matches: y = float(xapian.sortable_unserialise(m[xapian.MSET_DOCUMENT].get_value(settings.XAPIAN_LATITUDE_VALUE))) x = float(xapian.sortable_unserialise(m[xapian.MSET_DOCUMENT].get_value(settings.XAPIAN_LONGITUDE_VALUE))) # if y >= coords['y1'] and y <= coords['y2'] and x >= coords['x1'] and x <= coords['x2']: places[ (m[xapian.MSET_DOCUMENT].get_value(settings.XAPIAN_PARENT_ISSUE_FIELD)) +":"+(m[xapian.MSET_DOCUMENT].get_value(settings.XAPIAN_PARENT_MESSAGE_FIELD)) +":"+(m[xapian.MSET_DOCUMENT].get_value(settings.XAPIAN_ID_FIELD)) +":"+str(m[xapian.MSET_DOCUMENT].get_data()) ] = [y, x] results = "" # bad kludge to calculate amount of issues and messages in a cluster! pmessages = {} pissues = {} issue_data = [] issue_ids = {} shown_issues = {} if lat_query and lng_query: if zoomlevel < 16 and len(places)> 1 and cluster > 0: total_matches=0 clusters = map_utils.cluster_map_markers(places, zoomlevel, cluster_threshold_pixels=cluster*cluster_thr_for_zoomlevel[zoomlevel]/50) issue_ids = [] message_ids = [] clusters.reverse() for cluster in clusters: # Reverse the array so clusters are processed and sent first, followed by the single cases. # print cluster if len(cluster[0]) == 1: m = cluster[0][0] if 1 == 1: id = m.split(":")[0] # print "it's an issue! " + str(id) if shown_issues.has_key(id): continue shown_issues[id] = 1 total_matches += 1 # print "Issue id: " + str(issue_id) if 1 == 1: enquire.set_query(xapian.Query(xapian.Query.OP_AND, ["_issue_"+m.split(":")[0]])) match = enquire.get_mset(0, 1) if len(match) > 0: issue_dict=json.loads(match[0][xapian.MSET_DOCUMENT].get_data()) print m.split(":")[0] print match[0][xapian.MSET_DOCUMENT].get_data() print len(issue_dict) if details == "no": if issue_dict["options"].has_key("comments"): del issue_dict["options"]['comments'] issue_dict.update({'screen_id' :screen_id_counter}) screen_id_counter += 1 issue_data.append(issue_dict) else: print "Api.xapian_search a: looking for _issue_" + m.split(":")[0] + " but could not find it!" else: issuecount = 0 messagecount = 0 for iss in cluster[0]: if pissues.has_key(str(iss)): issuecount += 1 issue_ids.append(iss) elif pmessages.has_key(str(iss)): messagecount += 1 message_ids.append(iss) issue_places = [ places[(place_id)] for place_id in cluster[0] ] cluster_places = [ place_id.split(":")[2] for place_id in cluster[0] ] cluster_issues = [ place_id.split(":")[0] for place_id in cluster[0] ] cluster_addresses = [ place_id.split(":")[3] for place_id in cluster[0] ] placelinks ={} for i in range(1,len(cluster_places)): if not cluster_issues[i] in placelinks: placelinks[cluster_issues[i]]=( {"place": cluster_addresses[i], "link": "/r/"+cluster_issues[i]+"/"} ) title = str(len(placelinks)) + _(" messages in this area") sw = { 'lat': str(min([place[0] for place in issue_places])), 'lng': str(min([place[1] for place in issue_places]))} ne = { 'lat': str(max([place[0] for place in issue_places])), 'lng': str(max([place[1] for place in issue_places]))} issue_data.append({ 'title': title, 'options': { 'author' : "TODO: not here yet!", 'date' : "TODO: not here yet!", 'points' : [ {'lng':cluster[1][0], 'lat':cluster[1][1]} ], 'score': 'NONE', 'id': 'NONE', 'status': 'NONE', 'link': 'NONE', 'icon': {'name':'/images/merkki_klusteri_'+ str(min([max([len(placelinks),2]), 4])) +'.png', 'activeiconname': '/images/merkki_klusteri_'+ str(min([max([len(placelinks),2]), 4])) +'_fully_red.png', 'partlyactiveiconname': '/images/merkki_klusteri_'+ str(min([max([len(placelinks),2]), 4])) +'_partly_red.png', 'w': 41, 'h': 46, 'ax': 10, 'ay': 44, }, 'places':cluster_places, 'issues' : len(placelinks), # 'messages' : message_ids, 'type':'cluster', 'sw': sw, 'ne': ne, 'placelinks' : [{"place": value["place"],"link": value["link"] } for value in placelinks.values()], }, 'point': {'lon': cluster[1][0], 'lat':cluster[1][1]}, 'screen_id' : screen_id_counter}) screen_id_counter+= 1 else: total_matches=len(places) for m in places.keys(): id = m.split(":")[0] enquire.set_query(xapian.Query(xapian.Query.OP_AND, ["_issue_"+m.split(":")[0]])) match = enquire.get_mset(0, 1) if len(match) > 0: issue_dict=json.loads(match[0][xapian.MSET_DOCUMENT].get_data()) if details == "no": if issue_dict["options"].has_key("comments"): del issue_dict["options"]['comments'] issue_dict.update({'screen_id' :screen_id_counter}) screen_id_counter += 1 issue_data.append(issue_dict) else: print "Api.xapian_search b: looking for _issue_" + m.split(":")[0] + " but could not find it!" else: issue_data=[] for m in word_matches: itemtype=str(m[xapian.MSET_DOCUMENT].get_value(settings.XAPIAN_DATATYPE_FIELD)) itemid=str(m[xapian.MSET_DOCUMENT].get_value(settings.XAPIAN_ID_FIELD)) issue_dict = json.loads(m[xapian.MSET_DOCUMENT].get_data()) if details == "no": if issue_dict["options"].has_key("comments"): del issue_dict["options"]['comments'] issue_dict.update({'screen_id' :screen_id_counter}) screen_id_counter += 1 issue_data.append(issue_dict) # this rather complicated procedure goes through the text fields # and finds the relevant words there: if terms: for issue in issue_data: newtitle = "" for word in re.split(r'([\n-/:-?])', issue["title"]): stemmed = stemmer(to_lower_case(word)) match1 = 0 for term in terms: if term == stemmed: newtitle += ' <span class="keywordhighlight">' + word + '</span>' match1 = 1 break if match1 == 0: newtitle += word issue["title"] = newtitle relevant_string= "" if issue["options"].has_key("comments"): for msg in issue["options"]["comments"]: content = "" selected_words = {} if msg.has_key("text"): word_array = re.split(r'([ \n-/:-?])', msg["text"]) index = -1 for word in word_array: index += 1 if len(word) > 1: stemmed = stemmer(to_lower_case(word)) for term in terms: if term == stemmed: selected_words[index] = 1 word_array[index] = ' <span class="keywordhighlight">' + word + '</span>' for i in range (1, 12): if index + i < len(word_array): selected_words[index + i] = 1 for i in range (1, 12): if index - i >= 0: selected_words[index - i] = 1 break oldindex = 0 # if len(selected_words) == 0: # for i in range (0, 25): # if i < len(word_array): # selected_words[i] = 1 # if not selected_words.has_key(0): #content += " ... " # continue for index in sorted(selected_words.keys()): if index > oldindex + 1: content += " ... " content += word_array[index] oldindex = index if oldindex < len(word_array): content += " ... " if len(content) > 7: relevant_string += content if len(relevant_string) > 0: issue["search_hit_string"] = relevant_string metadata = {'seqnum':seqnum, 'total_matches': total_matches, 'first_shown_match' : str(min(offset+1, len(issue_data))), 'last_shown_match': str(offset+len(issue_data)), 'shown_matches': str(offset)+"-"+str(offset+len(issue_data) )} return HttpResponse(dumps([metadata,issue_data]))