def test_build_from_software_center_agent(self, mock_find_oauth): # pretend we have no token mock_find_oauth.return_value = None db = xapian.inmemory_open() cache = apt.Cache() # monkey patch distro to ensure we get data distro = softwarecenter.distro.get_distro() distro.get_codename = lambda: "natty" # we test against the real https://software-center.ubuntu.com here # so we need network res = update_from_software_center_agent(db, cache, ignore_cache=True) # check results self.assertTrue(res) self.assertTrue(db.get_doccount() > 1) for p in db.postlist(""): doc = db.get_document(p.docid) ppa = doc.get_value(XapianValues.ARCHIVE_PPA) self.assertTrue( ppa.startswith("commercial-ppa") and ppa.count("/") == 1, "ARCHIVE_PPA value incorrect, got '%s'" % ppa ) self.assertTrue("-icon-" in doc.get_value(XapianValues.ICON)) # check support url in the DB url = doc.get_value(XapianValues.SUPPORT_SITE_URL) if url: self.assertTrue(url.startswith("http") or url.startswith("mailto:"))
def add_from_purchased_but_needs_reinstall_data(purchased_but_may_need_reinstall_list, db, cache): """Add application that have been purchased but may require a reinstall This adds a inmemory database to the main db with the special PURCHASED_NEEDS_REINSTALL_MAGIC_CHANNEL_NAME channel prefix :return: a xapian query to get all the apps that need reinstall """ # magic db_purchased = xapian.inmemory_open() # go over the items we have for item in purchased_but_may_need_reinstall_list: # FIXME: what to do with duplicated entries? we will end # up with two xapian.Document, one for the for-pay # and one for the availalbe one from s-c-agent #try: # db.get_xapian_document(item.name, # item.package_name) #except IndexError: # # item is not in the xapian db # pass #else: # # ignore items we already have in the db, ignore # continue # index the item try: parser = SCAPurchasedApplicationParser(item) index_app_info_from_parser(parser, db_purchased, cache) except Exception as e: LOG.exception("error processing: %s " % e) # add new in memory db to the main db db.add_database(db_purchased) # return a query query = xapian.Query("AH"+PURCHASED_NEEDS_REINSTALL_MAGIC_CHANNEL_NAME) return query
def test_update_from_json_string(self): db = xapian.inmemory_open() cache = apt.Cache() p = os.path.join(DATA_DIR, "app-info-json", "apps.json") res = update_from_json_string(db, cache, open(p).read(), origin=p) self.assertTrue(res) self.assertEqual(db.get_doccount(), 1)
def test_update_from_var_lib_apt_lists(self): # ensure we index with german locales to test i18n os.environ["LANGUAGE"] = "de" db = xapian.inmemory_open() res = update_from_var_lib_apt_lists(db, self.cache, listsdir=os.path.join( DATA_DIR, "app-info")) self.assertTrue(res) self.assertEqual(db.get_doccount(), 1) # test if Name-de was picked up i = 0 for it in db.postlist("AAFestplattenbelegung analysieren"): i += 1 self.assertEqual(i, 1) # test if gettext worked found_gettext_translation = False for it in db.postlist("AAFestplattenbelegung analysieren"): doc = db.get_document(it.docid) for term_iter in doc.termlist(): # a german term from the app-info file to ensure that # it got indexed in german if term_iter.term == "festplattenbelegung": found_gettext_translation = True break self.assertTrue(found_gettext_translation)
def test_reinstall_purchased_xapian(self, mock_helper, mock_agent): small_available = [ self.available[0] ] mock_agent.return_value = self._make_fake_scagent( small_available, self.available_for_me) db = xapian.inmemory_open() cache = get_test_pkg_info() # now create purchased debs xapian index (in memory because # we store the repository passwords in here) old_db_len = db.get_doccount() update_from_software_center_agent(db, cache) # ensure we have the new item self.assertEqual(db.get_doccount(), old_db_len+2) # query query = get_reinstall_previous_purchases_query() enquire = xapian.Enquire(db) enquire.set_query(query) matches = enquire.get_mset(0, db.get_doccount()) self.assertEqual(len(matches), 1) distroseries = platform.dist()[2] for m in matches: doc = db.get_document(m.docid) self.assertEqual(doc.get_value(XapianValues.PKGNAME), "photobomb") self.assertEqual( doc.get_value(XapianValues.ARCHIVE_SIGNING_KEY_ID), "1024R/75254D99") self.assertEqual(doc.get_value(XapianValues.ARCHIVE_DEB_LINE), "deb https://username:random3atoken@" "private-ppa.launchpad.net/commercial-ppa-uploaders" "/photobomb/ubuntu %s main" % distroseries)
def test_build_from_software_center_agent(self, mock_find_oauth): # pretend we have no token mock_find_oauth.return_value = None db = xapian.inmemory_open() cache = apt.Cache() # monkey patch distro to ensure we get data distro = softwarecenter.distro.get_distro() distro.get_codename = lambda: "natty" # we test against the real https://software-center.ubuntu.com here # so we need network res = update_from_software_center_agent(db, cache, ignore_cache=True) # check results self.assertTrue(res) self.assertTrue(db.get_doccount() > 1) for p in db.postlist(""): doc = db.get_document(p.docid) ppa = doc.get_value(XapianValues.ARCHIVE_PPA) self.assertTrue( ppa.startswith("commercial-ppa") and ppa.count("/") == 1, "ARCHIVE_PPA value incorrect, got '%s'" % ppa) self.assertTrue("-icon-" in doc.get_value(XapianValues.ICON)) # check support url in the DB url = doc.get_value(XapianValues.SUPPORT_SITE_URL) if url: self.assertTrue( url.startswith("http") or url.startswith("mailto:"))
def get_test_db_from_app_install_data(datadir): db = xapian.inmemory_open() cache = get_pkg_info() cache.open() res = update_from_app_install_data(db, cache, datadir) if res is False: raise AssertionError("Failed to build db from '%s'" % datadir) return db
def _get_db_from_test_app_install_data(self): db = xapian.inmemory_open() res = update_from_app_install_data(db, self.cache, datadir=os.path.join( DATA_DIR, "desktop")) self.assertTrue(res) self.assertEqual(db.get_doccount(), 5) return db
def test_update_from_appstream_xml(self): db = xapian.inmemory_open() res = update_from_appstream_xml(db, self.cache, os.path.join(DATA_DIR, "app-info")) self.assertTrue(res) self.assertEqual(db.get_doccount(), 1) # FIXME: improve tests for p in db.postlist(""): doc = db.get_document(p.docid) for term in doc.termlist(): self.assertIsInstance(term, xapian.TermListItem) self.assertIsInstance(term.term, basestring) for value in doc.values(): self.assertIsInstance(value, xapian.ValueItem) self.assertIsInstance(value.num, long) self.assertIsInstance(value.value, basestring)
def test_for_purchase_apps_date_published(self, mock_find_oauth): # pretend we have no token mock_find_oauth.return_value = None #os.environ["SOFTWARE_CENTER_DEBUG_HTTP"] = "1" #os.environ["SOFTWARE_CENTER_AGENT_HOST"] = "http://sc.staging.ubuntu.com/" # staging does not have a valid cert os.environ["PISTON_MINI_CLIENT_DISABLE_SSL_VALIDATION"] = "1" cache = get_test_pkg_info() db = xapian.inmemory_open() res = update_from_software_center_agent(db, cache, ignore_cache=True) self.assertTrue(res) for p in db.postlist(""): doc = db.get_document(p.docid) date_published = doc.get_value(XapianValues.DATE_PUBLISHED) # make sure that a date_published value is provided self.assertNotEqual(date_published, "") self.assertNotEqual(date_published, None)
def test_for_purchase_apps_date_published(self, mock_find_oauth): # pretend we have no token mock_find_oauth.return_value = None # os.environ["SOFTWARE_CENTER_DEBUG_HTTP"] = "1" # os.environ["SOFTWARE_CENTER_AGENT_HOST"] = "http://sc.staging.ubuntu.com/" # staging does not have a valid cert os.environ["PISTON_MINI_CLIENT_DISABLE_SSL_VALIDATION"] = "1" cache = get_test_pkg_info() db = xapian.inmemory_open() res = update_from_software_center_agent(db, cache, ignore_cache=True) self.assertTrue(res) for p in db.postlist(""): doc = db.get_document(p.docid) date_published = doc.get_value(XapianValues.DATE_PUBLISHED) # make sure that a date_published value is provided self.assertNotEqual(date_published, "") self.assertNotEqual(date_published, None)
def add_from_purchased_but_needs_reinstall_data( purchased_but_may_need_reinstall_list, db, cache): """Add application that have been purchased but may require a reinstall This adds a inmemory database to the main db with the special PURCHASED_NEEDS_REINSTALL_MAGIC_CHANNEL_NAME channel prefix :return: a xapian query to get all the apps that need reinstall """ # magic db_purchased = xapian.inmemory_open() # go over the items we have for item in purchased_but_may_need_reinstall_list: # FIXME: what to do with duplicated entries? we will end # up with two xapian.Document, one for the for-pay # and one for the availalbe one from s-c-agent #try: # db.get_xapian_document(item.name, # item.package_name) #except IndexError: # # item is not in the xapian db # pass #else: # # ignore items we already have in the db, ignore # continue # index the item try: # we fake a channel here item.channel = PURCHASED_NEEDS_REINSTALL_MAGIC_CHANNEL_NAME # and empty category to make the parser happy item.categories = "" # WARNING: item.name needs to be different than # the item.name in the DB otherwise the DB # gets confused about (appname, pkgname) duplication item.name = utf8(_("%s (already purchased)")) % utf8(item.name) parser = SoftwareCenterAgentParser(item) index_app_info_from_parser(parser, db_purchased, cache) except Exception as e: LOG.exception("error processing: %s " % e) # add new in memory db to the main db db.add_database(db_purchased) # return a query query = xapian.Query("AH" + PURCHASED_NEEDS_REINSTALL_MAGIC_CHANNEL_NAME) return query
def make_catalog(uri, fields): """Creates a new and empty catalog in the given uri. If uri=None the catalog is made "in memory". fields must be a dict. It contains some informations about the fields in the database. By example: fields = {'id': Integer(is_key_field=True, is_stored=True, is_indexed=True), ...} """ # In memory if uri is None: db = inmemory_open() return Catalog(db, fields, asynchronous_mode=False) # In the local filesystem path = lfs.get_absolute_path(uri) db = WritableDatabase(path, DB_CREATE) return Catalog(db, fields)
def test_preserve_enquire_sorter(): """Test preservation of sorter set on enquire. """ db = xapian.inmemory_open() doc = xapian.Document() doc.add_term("foo") doc.add_value(1, "1") db.add_document(doc) db.add_document(doc) def make_enq1(db): enq = xapian.Enquire(db) sorter = xapian.MultiValueSorter() enq.set_sort_by_key(sorter, True) del sorter return enq enq = make_enq1(db) enq.set_query(xapian.Query("foo")) enq.get_mset(0, 10) def make_enq2(db): enq = xapian.Enquire(db) sorter = xapian.MultiValueSorter() enq.set_sort_by_key_then_relevance(sorter, True) del sorter return enq enq = make_enq2(db) enq.set_query(xapian.Query("foo")) enq.get_mset(0, 10) def make_enq3(db): enq = xapian.Enquire(db) sorter = xapian.MultiValueSorter() enq.set_sort_by_relevance_then_key(sorter, True) del sorter return enq enq = make_enq3(db) enq.set_query(xapian.Query("foo")) enq.get_mset(0, 10)
def test_preserve_enquire_sorter(): """Test preservation of sorter set on enquire. """ db = xapian.inmemory_open() doc = xapian.Document() doc.add_term('foo') doc.add_value(1, '1') db.add_document(doc) db.add_document(doc) def make_enq1(db): enq = xapian.Enquire(db) sorter = xapian.MultiValueSorter() enq.set_sort_by_key(sorter, True) del sorter return enq enq = make_enq1(db) enq.set_query(xapian.Query('foo')) enq.get_mset(0, 10) def make_enq2(db): enq = xapian.Enquire(db) sorter = xapian.MultiValueSorter() enq.set_sort_by_key_then_relevance(sorter, True) del sorter return enq enq = make_enq2(db) enq.set_query(xapian.Query('foo')) enq.get_mset(0, 10) def make_enq3(db): enq = xapian.Enquire(db) sorter = xapian.MultiValueSorter() enq.set_sort_by_relevance_then_key(sorter, True) del sorter return enq enq = make_enq3(db) enq.set_query(xapian.Query('foo')) enq.get_mset(0, 10)
def add_from_purchased_but_needs_reinstall_data(purchased_but_may_need_reinstall_list, db, cache): """Add application that have been purchased but may require a reinstall This adds a inmemory database to the main db with the special PURCHASED_NEEDS_REINSTALL_MAGIC_CHANNEL_NAME channel prefix :return: a xapian query to get all the apps that need reinstall """ # magic db_purchased = xapian.inmemory_open() # go over the items we have for item in purchased_but_may_need_reinstall_list: # FIXME: what to do with duplicated entries? we will end # up with two xapian.Document, one for the for-pay # and one for the availalbe one from s-c-agent #try: # db.get_xapian_document(item.name, # item.package_name) #except IndexError: # # item is not in the xapian db # pass #else: # # ignore items we already have in the db, ignore # continue # index the item try: # we fake a channel here item.channel = PURCHASED_NEEDS_REINSTALL_MAGIC_CHANNEL_NAME # and empty category to make the parser happy item.categories = "" # WARNING: item.name needs to be different than # the item.name in the DB otherwise the DB # gets confused about (appname, pkgname) duplication item.name = utf8(_("%s (already purchased)")) % utf8(item.name) parser = SoftwareCenterAgentParser(item) index_app_info_from_parser(parser, db_purchased, cache) except Exception as e: LOG.exception("error processing: %s " % e) # add new in memory db to the main db db.add_database(db_purchased) # return a query query = xapian.Query("AH"+PURCHASED_NEEDS_REINSTALL_MAGIC_CHANNEL_NAME) return query
def _database(self, writable=False): """ Private method that returns a xapian.Database for use. Optional arguments: ``writable`` -- Open the database in read/write mode (default=False) Returns an instance of a xapian.Database or xapian.WritableDatabase """ if settings.HAYSTACK_XAPIAN_PATH == MEMORY_DB_NAME: if not SearchBackend.inmemory_db: SearchBackend.inmemory_db = xapian.inmemory_open() return SearchBackend.inmemory_db if writable: database = xapian.WritableDatabase(settings.HAYSTACK_XAPIAN_PATH, xapian.DB_CREATE_OR_OPEN) else: try: database = xapian.Database(settings.HAYSTACK_XAPIAN_PATH) except xapian.DatabaseOpeningError: raise InvalidIndexError(u"Unable to open index at %s" % settings.HAYSTACK_XAPIAN_PATH) return database
def _database(self, writable=False): """ Private method that returns a xapian.Database for use. Optional arguments: ``writable`` -- Open the database in read/write mode (default=False) Returns an instance of a xapian.Database or xapian.WritableDatabase """ if self.path == MEMORY_DB_NAME: if not self.inmemory_db: self.inmemory_db = xapian.inmemory_open() return self.inmemory_db if writable: database = xapian.WritableDatabase(self.path, xapian.DB_CREATE_OR_OPEN) else: try: database = xapian.Database(self.path) except xapian.DatabaseOpeningError: raise InvalidIndexError(u'Unable to open index at %s' % self.path) return database
def _database(self, writable=False): """ Private method that returns a xapian.Database for use. Optional arguments: ``writable`` -- Open the database in read/write mode (default=False) Returns an instance of a xapian.Database or xapian.WritableDatabase """ if settings.HAYSTACK_XAPIAN_PATH == MEMORY_DB_NAME: if not SearchBackend.inmemory_db: SearchBackend.inmemory_db = xapian.inmemory_open() return SearchBackend.inmemory_db if writable: database = xapian.WritableDatabase(settings.HAYSTACK_XAPIAN_PATH, xapian.DB_CREATE_OR_OPEN) else: try: database = xapian.Database(settings.HAYSTACK_XAPIAN_PATH) except xapian.DatabaseOpeningError: raise InvalidIndexError(u'Unable to open index at %s' % settings.HAYSTACK_XAPIAN_PATH) return database
def _database(self, writable=False): """ Private method that returns a xapian.Database for use. Optional arguments: ``writable`` -- Open the database in read/write mode (default=False) Returns an instance of a xapian.Database or xapian.WritableDatabase """ if isinstance(settings.HAYSTACK_XAPIAN_PATH, basestring): if self.path == MEMORY_DB_NAME: if not self.inmemory_db: self.inmemory_db = xapian.inmemory_open() return self.inmemory_db if writable: database = xapian.WritableDatabase(self.path, xapian.DB_CREATE_OR_OPEN) else: try: database = xapian.Database(self.path) except xapian.DatabaseOpeningError: raise InvalidIndexError(u'Unable to open index at %s' % self.path) else: try: host, port = settings.HAYSTACK_XAPIAN_PATH except ValueError: raise InvalidIndexError(u'Unable to open index at %s' % settings.HAYSTACK_XAPIAN_PATH) if writable: database = xapian.remote_open_writable(host, port) database = xapian.WritableDatabase(database) else: try: database = xapian.remote_open(host, port) database = xapian.Database(database) except xapian.DatabaseOpeningError: raise InvalidIndexError(u'Unable to open index at %s' % settings.HAYSTACK_XAPIAN_PATH) return database
def setup_database(): """Set up and return an inmemory database with 5 documents. """ db = xapian.inmemory_open() doc = xapian.Document() doc.set_data("is it cold?") doc.add_term("is") doc.add_posting("it", 1) doc.add_posting("cold", 2) db.add_document(doc) doc = xapian.Document() doc.set_data("was it warm?") doc.add_posting("was", 1) doc.add_posting("it", 2) doc.add_posting("warm", 3) db.add_document(doc) doc.set_data("was it warm? two") doc.add_term("two", 2) doc.add_value(0, xapian.sortable_serialise(2)) db.add_document(doc) doc.set_data("was it warm? three") doc.add_term("three", 3) doc.add_value(0, xapian.sortable_serialise(1.5)) db.add_document(doc) doc.set_data("was it warm? four it") doc.add_term("four", 4) doc.add_term("it", 6) doc.add_posting("it", 7) doc.add_value(5, 'five') doc.add_value(9, 'nine') doc.add_value(0, xapian.sortable_serialise(2)) db.add_document(doc) expect(db.get_doccount(), 5) return db
def setup_database(): """Set up and return an inmemory database with 5 documents. """ db = xapian.inmemory_open() doc = xapian.Document() doc.set_data("is it cold?") doc.add_term("is") doc.add_posting("it", 1) doc.add_posting("cold", 2) db.add_document(doc) doc = xapian.Document() doc.set_data("was it warm?") doc.add_posting("was", 1) doc.add_posting("it", 2) doc.add_posting("warm", 3) db.add_document(doc) doc.set_data("was it warm? two") doc.add_term("two", 2) doc.add_value(0, xapian.sortable_serialise(2)) db.add_document(doc) doc.set_data("was it warm? three") doc.add_term("three", 3) doc.add_value(0, xapian.sortable_serialise(1.5)) db.add_document(doc) doc.set_data("was it warm? four it") doc.add_term("four", 4) doc.add_term("it", 6) doc.add_posting("it", 7) doc.add_value(5, "five") doc.add_value(9, "nine") doc.add_value(0, xapian.sortable_serialise(2)) db.add_document(doc) expect(db.get_doccount(), 5) return db
def test_update_from_var_lib_apt_lists(self): # ensure we index with german locales to test i18n os.environ["LANGUAGE"] = "de" db = xapian.inmemory_open() res = update_from_var_lib_apt_lists(db, self.cache, listsdir=os.path.join(DATA_DIR, "app-info")) self.assertTrue(res) self.assertEqual(db.get_doccount(), 1) # test if Name-de was picked up i = 0 for it in db.postlist("AAFestplattenbelegung analysieren"): i += 1 self.assertEqual(i, 1) # test if gettext worked found_gettext_translation = False for it in db.postlist("AAFestplattenbelegung analysieren"): doc = db.get_document(it.docid) for term_iter in doc.termlist(): # a german term from the app-info file to ensure that # it got indexed in german if term_iter.term == "festplattenbelegung": found_gettext_translation = True break self.assertTrue(found_gettext_translation)
def add_from_purchased_but_needs_reinstall_data( purchased_but_may_need_reinstall_list, db, cache): """Add application that have been purchased but may require a reinstall This adds a inmemory database to the main db with the special PURCHASED_NEEDS_REINSTALL_MAGIC_CHANNEL_NAME channel prefix :return: a xapian query to get all the apps that need reinstall """ # magic db_purchased = xapian.inmemory_open() # go over the items we have for item in purchased_but_may_need_reinstall_list: # FIXME: what to do with duplicated entries? we will end # up with two xapian.Document, one for the for-pay # and one for the availalbe one from s-c-agent #try: # db.get_xapian_document(item.name, # item.package_name) #except IndexError: # # item is not in the xapian db # pass #else: # # ignore items we already have in the db, ignore # continue # index the item try: parser = SCAPurchasedApplicationParser(item) index_app_info_from_parser(parser, db_purchased, cache) except Exception as e: LOG.exception("error processing: %s " % e) # add new in memory db to the main db db.add_database(db_purchased) # return a query query = xapian.Query("AH" + PURCHASED_NEEDS_REINSTALL_MAGIC_CHANNEL_NAME) return query
def _get_db_from_test_app_install_data(self): db = xapian.inmemory_open() res = update_from_app_install_data(db, self.cache, datadir=os.path.join(DATA_DIR, "desktop")) self.assertTrue(res) self.assertEqual(db.get_doccount(), 5) return db
def test_replication_path_only(self): db = xapian.inmemory_open() assert_raises(TypeError, xodb.Database, db, replicated=True)
#!/usr/bin/env python # -*- coding: utf-8 -*- import xapian stem = xapian.Stem('english') db = xapian.inmemory_open() doc = xapian.Document() doc.add_posting(stem("is"), 1) doc.add_posting(stem("there"), 2) doc.add_posting(stem("anybody"), 3) doc.add_posting(stem("out"), 4) doc.add_posting(stem("there"), 5) db.add_document(doc) doc1 = xapian.Document() doc1.add_posting(stem("is"), 1) doc1.add_posting(stem("there"), 2) doc1.add_posting(stem("anybody"), 3) doc1.add_posting(stem("out"), 4) doc1.add_posting(stem("there"), 5) db.add_document(doc1) db.commit() for term in db.allterms(): print term.term, term.termfreq """ anybodi 2 is 2
def test_all(): # Test the version number reporting functions give plausible results. v = "%d.%d.%d" % (xapian.major_version(), xapian.minor_version(), xapian.revision()) v2 = xapian.version_string() expect(v2, v, "Unexpected version output") def access_cvar(): return xapian.cvar # Check that SWIG isn't generating cvar (regression test for ticket#297). expect_exception(AttributeError, "'module' object has no attribute 'cvar'", access_cvar) stem = xapian.Stem("english") expect(str(stem), "Xapian::Stem(english)", "Unexpected str(stem)") doc = xapian.Document() doc.set_data("a\0b") if doc.get_data() == "a": raise TestFail("get_data+set_data truncates at a zero byte") expect(doc.get_data(), "a\0b", "get_data+set_data doesn't transparently handle a zero byte") doc.set_data("is there anybody out there?") doc.add_term("XYzzy") doc.add_posting(stem("is"), 1) doc.add_posting(stem("there"), 2) doc.add_posting(stem("anybody"), 3) doc.add_posting(stem("out"), 4) doc.add_posting(stem("there"), 5) db = xapian.inmemory_open() db.add_document(doc) expect(db.get_doccount(), 1, "Unexpected db.get_doccount()") terms = ["smoke", "test", "terms"] expect_query(xapian.Query(xapian.Query.OP_OR, terms), "(smoke OR test OR terms)") query1 = xapian.Query(xapian.Query.OP_PHRASE, ("smoke", "test", "tuple")) query2 = xapian.Query(xapian.Query.OP_XOR, (xapian.Query("smoke"), query1, "string")) expect_query(query1, "(smoke PHRASE 3 test PHRASE 3 tuple)") expect_query( query2, "(smoke XOR (smoke PHRASE 3 test PHRASE 3 tuple) XOR string)") subqs = ["a", "b"] expect_query(xapian.Query(xapian.Query.OP_OR, subqs), "(a OR b)") expect_query(xapian.Query(xapian.Query.OP_VALUE_RANGE, 0, '1', '4'), "VALUE_RANGE 0 1 4") expect_query(xapian.Query.MatchAll, "<alldocuments>") expect_query(xapian.Query.MatchNothing, "") # Feature test for Query.__iter__ term_count = 0 for term in query2: term_count += 1 expect(term_count, 4, "Unexpected number of terms in query2") enq = xapian.Enquire(db) enq.set_query(xapian.Query(xapian.Query.OP_OR, "there", "is")) mset = enq.get_mset(0, 10) expect(mset.size(), 1, "Unexpected mset.size()") expect(len(mset), 1, "Unexpected mset.size()") # Feature test for Enquire.matching_terms(docid) term_count = 0 for term in enq.matching_terms(mset.get_hit(0)): term_count += 1 expect(term_count, 2, "Unexpected number of matching terms") # Feature test for MSet.__iter__ msize = 0 for match in mset: msize += 1 expect(msize, mset.size(), "Unexpected number of entries in mset") terms = " ".join(enq.matching_terms(mset.get_hit(0))) expect(terms, "is there", "Unexpected terms") # Feature test for ESet.__iter__ rset = xapian.RSet() rset.add_document(1) eset = enq.get_eset(10, rset) term_count = 0 for term in eset: term_count += 1 expect(term_count, 3, "Unexpected number of expand terms") # Feature test for Database.__iter__ term_count = 0 for term in db: term_count += 1 expect(term_count, 5, "Unexpected number of terms in db") # Feature test for Database.allterms term_count = 0 for term in db.allterms(): term_count += 1 expect(term_count, 5, "Unexpected number of terms in db.allterms") # Feature test for Database.postlist count = 0 for posting in db.postlist("there"): count += 1 expect(count, 1, "Unexpected number of entries in db.postlist('there')") # Feature test for Database.postlist with empty term (alldocspostlist) count = 0 for posting in db.postlist(""): count += 1 expect(count, 1, "Unexpected number of entries in db.postlist('')") # Feature test for Database.termlist count = 0 for term in db.termlist(1): count += 1 expect(count, 5, "Unexpected number of entries in db.termlist(1)") # Feature test for Database.positionlist count = 0 for term in db.positionlist(1, "there"): count += 1 expect(count, 2, "Unexpected number of entries in db.positionlist(1, 'there')") # Feature test for Document.termlist count = 0 for term in doc.termlist(): count += 1 expect(count, 5, "Unexpected number of entries in doc.termlist()") # Feature test for TermIter.skip_to term = doc.termlist() term.skip_to('n') while True: try: x = next(term) except StopIteration: break if x.term < 'n': raise TestFail("TermIter.skip_to didn't skip term '%s'" % x.term) # Feature test for Document.values count = 0 for term in doc.values(): count += 1 expect(count, 0, "Unexpected number of entries in doc.values") # Check exception handling for Xapian::DocNotFoundError expect_exception(xapian.DocNotFoundError, "Docid 3 not found", db.get_document, 3) # Check value of OP_ELITE_SET expect(xapian.Query.OP_ELITE_SET, 10, "Unexpected value for OP_ELITE_SET") # Feature test for MatchDecider doc = xapian.Document() doc.set_data("Two") doc.add_posting(stem("out"), 1) doc.add_posting(stem("outside"), 1) doc.add_posting(stem("source"), 2) doc.add_value(0, "yes") db.add_document(doc) class testmatchdecider(xapian.MatchDecider): def __call__(self, doc): return doc.get_value(0) == "yes" query = xapian.Query(stem("out")) enquire = xapian.Enquire(db) enquire.set_query(query) mset = enquire.get_mset(0, 10, None, testmatchdecider()) expect(mset.size(), 1, "Unexpected number of documents returned by match decider") expect(mset.get_docid(0), 2, "MatchDecider mset has wrong docid in") # Feature test for ExpandDecider class testexpanddecider(xapian.ExpandDecider): def __call__(self, term): return (not term.startswith('a')) enquire = xapian.Enquire(db) rset = xapian.RSet() rset.add_document(1) eset = enquire.get_eset(10, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, testexpanddecider()) eset_terms = [term[xapian.ESET_TNAME] for term in eset.items] expect(len(eset_terms), eset.size(), "Unexpected number of terms returned by expand") if [t for t in eset_terms if t.startswith('a')]: raise TestFail("ExpandDecider was not used") # Check min_wt argument to get_eset() works (new in 1.2.5). eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ) expect(eset.items[-1][xapian.ESET_WT] < 1.9, True, "test get_eset() without min_wt") eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, None, 1.9) expect(eset.items[-1][xapian.ESET_WT] >= 1.9, True, "test get_eset() min_wt") # Check QueryParser parsing error. qp = xapian.QueryParser() expect_exception(xapian.QueryParserError, "Syntax: <expression> AND <expression>", qp.parse_query, "test AND") # Check QueryParser pure NOT option qp = xapian.QueryParser() expect_query( qp.parse_query("NOT test", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT), "(<alldocuments> AND_NOT test:(pos=1))") # Check QueryParser partial option qp = xapian.QueryParser() qp.set_database(db) qp.set_default_op(xapian.Query.OP_AND) qp.set_stemming_strategy(qp.STEM_SOME) qp.set_stemmer(xapian.Stem('en')) expect_query( qp.parse_query("foo o", qp.FLAG_PARTIAL), "(Zfoo:(pos=1) AND ((out:(pos=2) SYNONYM outsid:(pos=2)) OR Zo:(pos=2)))" ) expect_query(qp.parse_query("foo outside", qp.FLAG_PARTIAL), "(Zfoo:(pos=1) AND Zoutsid:(pos=2))") # Test supplying unicode strings expect_query(xapian.Query(xapian.Query.OP_OR, (u'foo', u'bar')), '(foo OR bar)') expect_query(xapian.Query(xapian.Query.OP_OR, ('foo', u'bar\xa3')), '(foo OR bar\xc2\xa3)') expect_query(xapian.Query(xapian.Query.OP_OR, ('foo', 'bar\xc2\xa3')), '(foo OR bar\xc2\xa3)') expect_query(xapian.Query(xapian.Query.OP_OR, u'foo', u'bar'), '(foo OR bar)') expect_query( qp.parse_query(u"NOT t\xe9st", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT), "(<alldocuments> AND_NOT Zt\xc3\xa9st:(pos=1))") doc = xapian.Document() doc.set_data(u"Unicode with an acc\xe9nt") doc.add_posting(stem(u"out\xe9r"), 1) expect(doc.get_data(), u"Unicode with an acc\xe9nt".encode('utf-8')) term = doc.termlist().next().term expect(term, u"out\xe9r".encode('utf-8')) # Check simple stopper stop = xapian.SimpleStopper() qp.set_stopper(stop) expect(stop('a'), False) expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo:(pos=1) AND Zbar:(pos=2) AND Za:(pos=3))") stop.add('a') expect(stop('a'), True) expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo:(pos=1) AND Zbar:(pos=2))") # Feature test for custom Stopper class my_b_stopper(xapian.Stopper): def __call__(self, term): return term == "b" def get_description(self): return u"my_b_stopper" stop = my_b_stopper() expect(stop.get_description(), u"my_b_stopper") qp.set_stopper(stop) expect(stop('a'), False) expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo:(pos=1) AND Zbar:(pos=2) AND Za:(pos=3))") expect(stop('b'), True) expect_query(qp.parse_query(u"foo bar b", qp.FLAG_BOOLEAN), "(Zfoo:(pos=1) AND Zbar:(pos=2))") # Test TermGenerator termgen = xapian.TermGenerator() doc = xapian.Document() termgen.set_document(doc) termgen.index_text('foo bar baz foo') expect([(item.term, item.wdf, [pos for pos in item.positer]) for item in doc.termlist()], [('bar', 1, [2]), ('baz', 1, [3]), ('foo', 2, [1, 4])]) # Check DateValueRangeProcessor works context("checking that DateValueRangeProcessor works") qp = xapian.QueryParser() vrpdate = xapian.DateValueRangeProcessor(1, 1, 1960) qp.add_valuerangeprocessor(vrpdate) query = qp.parse_query('12/03/99..12/04/01') expect(str(query), 'Xapian::Query(VALUE_RANGE 1 19991203 20011204)') # Regression test for bug#193, fixed in 1.0.3. context("running regression test for bug#193") vrp = xapian.NumberValueRangeProcessor(0, '$', True) a = '$10' b = '20' slot, a, b = vrp(a, b) expect(slot, 0) expect(xapian.sortable_unserialise(a), 10) expect(xapian.sortable_unserialise(b), 20) # Regression tests copied from PHP (probably always worked in python, but # let's check...) context("running regression tests for issues which were found in PHP") # PHP overload resolution involving boolean types failed. enq.set_sort_by_value(1, True) # Regression test - fixed in 0.9.10.1. oqparser = xapian.QueryParser() oquery = oqparser.parse_query("I like tea") # Regression test for bug#192 - fixed in 1.0.3. enq.set_cutoff(100) # Test setting and getting metadata expect(db.get_metadata('Foo'), '') db.set_metadata('Foo', 'Foo') expect(db.get_metadata('Foo'), 'Foo') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, '') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.set_metadata, '', 'Foo') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, '') # Test OP_SCALE_WEIGHT and corresponding constructor expect_query( xapian.Query(xapian.Query.OP_SCALE_WEIGHT, xapian.Query('foo'), 5), "5 * foo")
#!/usr/bin/env python # -*- coding: utf-8 -*- import xapian stem = xapian.Stem('english') db = xapian.inmemory_open() doc = xapian.Document() doc.add_posting(stem("is"), 1) doc.add_posting(stem("there"), 2) doc.add_posting(stem("anybody"), 3) doc.add_posting(stem("out"), 4) doc.add_posting(stem("there"), 5) db.add_document(doc) qp = xapian.QueryParser() qp.set_database(db) new_query = qp.parse_query('h*', qp.FLAG_WILDCARD) print [str(new_query)]
def test_all(): # Test the version number reporting functions give plausible results. v = "%d.%d.%d" % (xapian.major_version(), xapian.minor_version(), xapian.revision()) v2 = xapian.version_string() expect(v2, v, "Unexpected version output") stem = xapian.Stem("english") expect(stem.get_description(), "Xapian::Stem(english)", "Unexpected stem.get_description()") doc = xapian.Document() doc.set_data("a\0b") if doc.get_data() == "a": raise TestFail("get_data+set_data truncates at a zero byte") expect(doc.get_data(), "a\0b", "get_data+set_data doesn't transparently handle a zero byte") doc.set_data("is there anybody out there?") doc.add_term("XYzzy") doc.add_posting(stem("is"), 1) doc.add_posting(stem("there"), 2) doc.add_posting(stem("anybody"), 3) doc.add_posting(stem("out"), 4) doc.add_posting(stem("there"), 5) db = xapian.inmemory_open() db.add_document(doc) expect(db.get_doccount(), 1, "Unexpected db.get_doccount()") terms = ["smoke", "test", "terms"] expect_query(xapian.Query(xapian.Query.OP_OR, terms), "(smoke OR test OR terms)") query1 = xapian.Query(xapian.Query.OP_PHRASE, ("smoke", "test", "tuple")) query2 = xapian.Query(xapian.Query.OP_XOR, (xapian.Query("smoke"), query1, "string")) expect_query(query1, "(smoke PHRASE 3 test PHRASE 3 tuple)") expect_query(query2, "(smoke XOR (smoke PHRASE 3 test PHRASE 3 tuple) XOR string)") subqs = ["a", "b"] expect_query(xapian.Query(xapian.Query.OP_OR, subqs), "(a OR b)") expect_query(xapian.Query(xapian.Query.OP_VALUE_RANGE, 0, '1', '4'), "VALUE_RANGE 0 1 4") # Feature test for Query.__iter__ term_count = 0 for term in query2: term_count += 1 expect(term_count, 4, "Unexpected number of terms in query2") enq = xapian.Enquire(db) enq.set_query(xapian.Query(xapian.Query.OP_OR, "there", "is")) mset = enq.get_mset(0, 10) expect(mset.size(), 1, "Unexpected mset.size()") # Feature test for Enquire.matching_terms(docid) term_count = 0 for term in enq.matching_terms(mset.get_hit(0)): term_count += 1 expect(term_count, 2, "Unexpected number of matching terms") # Feature test for MSet.__iter__ msize = 0 for match in mset: msize += 1 expect(msize, mset.size(), "Unexpected number of entries in mset") terms = " ".join(enq.matching_terms(mset.get_hit(0))) expect(terms, "is there", "Unexpected terms") # Feature test for ESet.__iter__ rset = xapian.RSet() rset.add_document(1) eset = enq.get_eset(10, rset) term_count = 0 for term in eset: term_count += 1 expect(term_count, 3, "Unexpected number of expand terms") # Feature test for Database.__iter__ term_count = 0 for term in db: term_count += 1 expect(term_count, 5, "Unexpected number of terms in db") # Feature test for Database.allterms term_count = 0 for term in db.allterms(): term_count += 1 expect(term_count, 5, "Unexpected number of terms in db.allterms") # Feature test for Database.postlist count = 0 for posting in db.postlist("there"): count += 1 expect(count, 1, "Unexpected number of entries in db.postlist('there')") # Feature test for Database.postlist with empty term (alldocspostlist) count = 0 for posting in db.postlist(""): count += 1 expect(count, 1, "Unexpected number of entries in db.postlist('')") # Feature test for Database.termlist count = 0 for term in db.termlist(1): count += 1 expect(count, 5, "Unexpected number of entries in db.termlist(1)") # Feature test for Database.positionlist count = 0 for term in db.positionlist(1, "there"): count += 1 expect(count, 2, "Unexpected number of entries in db.positionlist(1, 'there')") # Feature test for Document.termlist count = 0 for term in doc.termlist(): count += 1 expect(count, 5, "Unexpected number of entries in doc.termlist()") # Feature test for TermIter.skip_to term = doc.termlist() term.skip_to('n') while True: try: x = term.next() except StopIteration: break if x.term < 'n': raise TestFail("TermIter.skip_to didn't skip term '%s'" % x.term) # Feature test for Document.values count = 0 for term in doc.values(): count += 1 expect(count, 0, "Unexpected number of entries in doc.values") # Check exception handling for Xapian::DocNotFoundError expect_exception(xapian.DocNotFoundError, "Docid 3 not found", db.get_document, 3) # Check value of OP_ELITE_SET expect(xapian.Query.OP_ELITE_SET, 10, "Unexpected value for OP_ELITE_SET") # Feature test for MatchDecider doc = xapian.Document() doc.set_data("Two") doc.add_posting(stem("out"), 1) doc.add_posting(stem("outside"), 1) doc.add_posting(stem("source"), 2) doc.add_value(0, "yes") db.add_document(doc) class testmatchdecider(xapian.MatchDecider): def __call__(self, doc): return doc.get_value(0) == "yes" query = xapian.Query(stem("out")) enquire = xapian.Enquire(db) enquire.set_query(query) mset = enquire.get_mset(0, 10, None, testmatchdecider()) expect(mset.size(), 1, "Unexpected number of documents returned by match decider") expect(mset.get_docid(0), 2, "MatchDecider mset has wrong docid in") # Feature test for ExpandDecider class testexpanddecider(xapian.ExpandDecider): def __call__(self, term): return (not term.startswith('a')) enquire = xapian.Enquire(db) rset = xapian.RSet() rset.add_document(1) eset = enquire.get_eset(10, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, testexpanddecider()) eset_terms = [term[xapian.ESET_TNAME] for term in eset.items] expect(len(eset_terms), eset.size(), "Unexpected number of terms returned by expand") if filter(lambda t: t.startswith('a'), eset_terms): raise TestFail("ExpandDecider was not used") # Check QueryParser parsing error. qp = xapian.QueryParser() expect_exception(xapian.QueryParserError, "Syntax: <expression> AND <expression>", qp.parse_query, "test AND") # Check QueryParser pure NOT option qp = xapian.QueryParser() expect_query(qp.parse_query("NOT test", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT), "(<alldocuments> AND_NOT test:(pos=1))") # Check QueryParser partial option qp = xapian.QueryParser() qp.set_database(db) qp.set_default_op(xapian.Query.OP_AND) qp.set_stemming_strategy(qp.STEM_SOME) qp.set_stemmer(xapian.Stem('en')) expect_query(qp.parse_query("foo o", qp.FLAG_PARTIAL), "(Zfoo:(pos=1) AND (out:(pos=2) OR outsid:(pos=2) OR Zo:(pos=2)))") expect_query(qp.parse_query("foo outside", qp.FLAG_PARTIAL), "(Zfoo:(pos=1) AND Zoutsid:(pos=2))") # Test supplying unicode strings expect_query(xapian.Query(xapian.Query.OP_OR, (u'foo', u'bar')), '(foo OR bar)') expect_query(xapian.Query(xapian.Query.OP_OR, ('foo', u'bar\xa3')), '(foo OR bar\xc2\xa3)') expect_query(xapian.Query(xapian.Query.OP_OR, ('foo', 'bar\xc2\xa3')), '(foo OR bar\xc2\xa3)') expect_query(xapian.Query(xapian.Query.OP_OR, u'foo', u'bar'), '(foo OR bar)') expect_query(qp.parse_query(u"NOT t\xe9st", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT), "(<alldocuments> AND_NOT Zt\xc3\xa9st:(pos=1))") doc = xapian.Document() doc.set_data(u"Unicode with an acc\xe9nt") doc.add_posting(stem(u"out\xe9r"), 1) expect(doc.get_data(), u"Unicode with an acc\xe9nt".encode('utf-8')) term = doc.termlist().next().term expect(term, u"out\xe9r".encode('utf-8')) # Check simple stopper stop = xapian.SimpleStopper() qp.set_stopper(stop) expect(stop('a'), False) expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo:(pos=1) AND Zbar:(pos=2) AND Za:(pos=3))") stop.add('a') expect(stop('a'), True) expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo:(pos=1) AND Zbar:(pos=2))") # Feature test for custom Stopper class my_b_stopper(xapian.Stopper): def __call__(self, term): return term == "b" def get_description(self): return u"my_b_stopper" stop = my_b_stopper() expect(stop.get_description(), u"my_b_stopper") qp.set_stopper(stop) expect(stop('a'), False) expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo:(pos=1) AND Zbar:(pos=2) AND Za:(pos=3))") expect(stop('b'), True) expect_query(qp.parse_query(u"foo bar b", qp.FLAG_BOOLEAN), "(Zfoo:(pos=1) AND Zbar:(pos=2))") # Test TermGenerator termgen = xapian.TermGenerator() doc = xapian.Document() termgen.set_document(doc) termgen.index_text('foo bar baz foo') expect([(item.term, item.wdf, [pos for pos in item.positer]) for item in doc.termlist()], [('bar', 1, [2]), ('baz', 1, [3]), ('foo', 2, [1, 4])]) # Check DateValueRangeProcessor works context("checking that DateValueRangeProcessor works") qp = xapian.QueryParser() vrpdate = xapian.DateValueRangeProcessor(1, 1, 1960) qp.add_valuerangeprocessor(vrpdate) query = qp.parse_query('12/03/99..12/04/01') expect(str(query), 'Xapian::Query(VALUE_RANGE 1 19991203 20011204)') # Regression test for bug#193, fixed in 1.0.3. context("running regression test for bug#193") vrp = xapian.NumberValueRangeProcessor(0, '$', True) a = '$10' b = '20' slot, a, b = vrp(a, b) expect(slot, 0) expect(xapian.sortable_unserialise(a), 10) expect(xapian.sortable_unserialise(b), 20) # Regression tests copied from PHP (probably always worked in python, but # let's check...) context("running regression tests for issues which were found in PHP") # PHP overload resolution involving boolean types failed. enq.set_sort_by_value(1, True) # Regression test - fixed in 0.9.10.1. oqparser = xapian.QueryParser() oquery = oqparser.parse_query("I like tea") # Regression test for bug#192 - fixed in 1.0.3. enq.set_cutoff(100) # Test setting and getting metadata expect(db.get_metadata('Foo'), '') db.set_metadata('Foo', 'Foo') expect(db.get_metadata('Foo'), 'Foo') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, '') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.set_metadata, '', 'Foo') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, '') # Test OP_SCALE_WEIGHT and corresponding constructor expect_query(xapian.Query(xapian.Query.OP_SCALE_WEIGHT, xapian.Query('foo'), 5), "5 * foo")
def test_all(): # Test the version number reporting functions give plausible results. v = "%d.%d.%d" % (xapian.major_version(), xapian.minor_version(), xapian.revision()) v2 = xapian.version_string() expect(v2, v, "Unexpected version output") # A regexp check would be better, but seems to create a bogus "leak" of -1 # objects in Python 3. expect(len(xapian.__version__.split('.')), 3, 'xapian.__version__ not X.Y.Z') expect((xapian.__version__.split('.'))[0], '1', 'xapian.__version__ not "1.Y.Z"') def access_cvar(): res = xapian.cvar print("Unhandled constants: ", res) return res # Check that SWIG isn't generating cvar (regression test for ticket#297). expect_exception(AttributeError, "'module' object has no attribute 'cvar'", access_cvar) stem = xapian.Stem(b"english") expect(str(stem), "Xapian::Stem(english)", "Unexpected str(stem)") doc = xapian.Document() doc.set_data(b"a\0b") if doc.get_data() == b"a": raise TestFail("get_data+set_data truncates at a zero byte") expect(doc.get_data(), b"a\0b", "get_data+set_data doesn't transparently handle a zero byte") doc.set_data(b"is there anybody out there?") doc.add_term(b"XYzzy") doc.add_posting(stem(b"is"), 1) doc.add_posting(stem(b"there"), 2) doc.add_posting(stem(b"anybody"), 3) doc.add_posting(stem(b"out"), 4) doc.add_posting(stem(b"there"), 5) db = xapian.inmemory_open() db.add_document(doc) expect(db.get_doccount(), 1, "Unexpected db.get_doccount()") terms = ["smoke", "test", "terms"] expect_query(xapian.Query(xapian.Query.OP_OR, [t.encode('utf-8') for t in terms]), "(smoke OR test OR terms)") query1 = xapian.Query(xapian.Query.OP_PHRASE, (b"smoke", b"test", b"tuple")) query2 = xapian.Query(xapian.Query.OP_XOR, (xapian.Query(b"smoke"), query1, b"string")) expect_query(query1, "(smoke PHRASE 3 test PHRASE 3 tuple)") expect_query(query2, "(smoke XOR (smoke PHRASE 3 test PHRASE 3 tuple) XOR string)") subqs = ["a", "b"] expect_query(xapian.Query(xapian.Query.OP_OR, [s.encode('utf-8') for s in subqs]), "(a OR b)") expect_query(xapian.Query(xapian.Query.OP_VALUE_RANGE, 0, b'1', b'4'), "VALUE_RANGE 0 1 4") # Check database factory functions are wrapped as expected (or not wrapped # in the first cases): expect_exception(AttributeError, "'module' object has no attribute 'open_stub'", lambda : xapian.open_stub(b"nosuchdir/nosuchdb")) expect_exception(AttributeError, "'module' object has no attribute 'open_stub'", lambda : xapian.open_stub(b"nosuchdir/nosuchdb", xapian.DB_OPEN)) expect_exception(AttributeError, "'module' object has no attribute 'chert_open'", lambda : xapian.chert_open(b"nosuchdir/nosuchdb")) expect_exception(AttributeError, "'module' object has no attribute 'chert_open'", lambda : xapian.chert_open(b"nosuchdir/nosuchdb", xapian.DB_CREATE)) expect_exception(xapian.DatabaseOpeningError, None, lambda : xapian.Database(b"nosuchdir/nosuchdb", xapian.DB_BACKEND_STUB)) expect_exception(xapian.DatabaseOpeningError, None, lambda : xapian.WritableDatabase(b"nosuchdir/nosuchdb", xapian.DB_OPEN|xapian.DB_BACKEND_STUB)) expect_exception(xapian.DatabaseOpeningError, None, lambda : xapian.Database(b"nosuchdir/nosuchdb", xapian.DB_BACKEND_GLASS)) expect_exception(xapian.DatabaseCreateError, None, lambda : xapian.WritableDatabase(b"nosuchdir/nosuchdb", xapian.DB_CREATE|xapian.DB_BACKEND_GLASS)) expect_exception(xapian.DatabaseOpeningError, None, lambda : xapian.Database(b"nosuchdir/nosuchdb", xapian.DB_BACKEND_CHERT)) expect_exception(xapian.DatabaseCreateError, None, lambda : xapian.WritableDatabase(b"nosuchdir/nosuchdb", xapian.DB_CREATE|xapian.DB_BACKEND_CHERT)) expect_exception(xapian.NetworkError, None, xapian.remote_open, b"/bin/false", b"") expect_exception(xapian.NetworkError, None, xapian.remote_open_writable, b"/bin/false", b"") expect_exception(xapian.NetworkError, None, xapian.remote_open, b"127.0.0.1", 0, 1) expect_exception(xapian.NetworkError, None, xapian.remote_open_writable, b"127.0.0.1", 0, 1) # Check wrapping of MatchAll and MatchNothing: expect_query(xapian.Query.MatchAll, "<alldocuments>") expect_query(xapian.Query.MatchNothing, "") # Feature test for Query.__iter__ term_count = 0 for term in query2: term_count += 1 expect(term_count, 4, "Unexpected number of terms in query2") enq = xapian.Enquire(db) enq.set_query(xapian.Query(xapian.Query.OP_OR, b"there", b"is")) mset = enq.get_mset(0, 10) expect(mset.size(), 1, "Unexpected mset.size()") expect(len(mset), 1, "Unexpected mset.size()") # Feature test for Enquire.matching_terms(docid) term_count = 0 for term in enq.matching_terms(mset.get_hit(0)): term_count += 1 expect(term_count, 2, "Unexpected number of matching terms") # Feature test for MSet.__iter__ msize = 0 for match in mset: msize += 1 expect(msize, mset.size(), "Unexpected number of entries in mset") terms = b" ".join(enq.matching_terms(mset.get_hit(0))) expect(terms, b"is there", "Unexpected terms") # Feature test for ESet.__iter__ rset = xapian.RSet() rset.add_document(1) eset = enq.get_eset(10, rset) term_count = 0 for term in eset: term_count += 1 expect(term_count, 3, "Unexpected number of expand terms") # Feature test for Database.__iter__ term_count = 0 for term in db: term_count += 1 expect(term_count, 5, "Unexpected number of terms in db") # Feature test for Database.allterms term_count = 0 for term in db.allterms(): term_count += 1 expect(term_count, 5, "Unexpected number of terms in db.allterms") # Feature test for Database.postlist count = 0 for posting in db.postlist(b"there"): count += 1 expect(count, 1, "Unexpected number of entries in db.postlist('there')") # Feature test for Database.postlist with empty term (alldocspostlist) count = 0 for posting in db.postlist(b""): count += 1 expect(count, 1, "Unexpected number of entries in db.postlist('')") # Feature test for Database.termlist count = 0 for term in db.termlist(1): count += 1 expect(count, 5, "Unexpected number of entries in db.termlist(1)") # Feature test for Database.positionlist count = 0 for term in db.positionlist(1, b"there"): count += 1 expect(count, 2, "Unexpected number of entries in db.positionlist(1, 'there')") # Feature test for Document.termlist count = 0 for term in doc.termlist(): count += 1 expect(count, 5, "Unexpected number of entries in doc.termlist()") # Feature test for TermIter.skip_to term = doc.termlist() term.skip_to(b'n') while True: try: x = next(term) except StopIteration: break if x.term < b'n': raise TestFail("TermIter.skip_to didn't skip term '%s'" % x.term.decode('utf-8')) # Feature test for Document.values count = 0 for term in list(doc.values()): count += 1 expect(count, 0, "Unexpected number of entries in doc.values") # Check exception handling for Xapian::DocNotFoundError expect_exception(xapian.DocNotFoundError, "Docid 3 not found", db.get_document, 3) # Check value of OP_ELITE_SET expect(xapian.Query.OP_ELITE_SET, 10, "Unexpected value for OP_ELITE_SET") # Feature test for MatchDecider doc = xapian.Document() doc.set_data(b"Two") doc.add_posting(stem(b"out"), 1) doc.add_posting(stem(b"outside"), 1) doc.add_posting(stem(b"source"), 2) doc.add_value(0, b"yes") db.add_document(doc) class testmatchdecider(xapian.MatchDecider): def __call__(self, doc): return doc.get_value(0) == b"yes" query = xapian.Query(stem(b"out")) enquire = xapian.Enquire(db) enquire.set_query(query) mset = enquire.get_mset(0, 10, None, testmatchdecider()) expect(mset.size(), 1, "Unexpected number of documents returned by match decider") expect(mset.get_docid(0), 2, "MatchDecider mset has wrong docid in") # Feature test for ExpandDecider class testexpanddecider(xapian.ExpandDecider): def __call__(self, term): return (not term.startswith(b'a')) enquire = xapian.Enquire(db) rset = xapian.RSet() rset.add_document(1) eset = enquire.get_eset(10, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, testexpanddecider()) eset_terms = [item.term for item in eset] expect(len(eset_terms), eset.size(), "Unexpected number of terms returned by expand") if [t for t in eset_terms if t.startswith(b'a')]: raise TestFail("ExpandDecider was not used") # Check min_wt argument to get_eset() works (new in 1.2.5). eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ) expect([i.weight for i in eset][-1] < 1.9, True, "test get_eset() without min_wt") eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, None, 1.9) expect([i.weight for i in eset][-1] >= 1.9, True, "test get_eset() min_wt") # Check QueryParser parsing error. qp = xapian.QueryParser() expect_exception(xapian.QueryParserError, "Syntax: <expression> AND <expression>", qp.parse_query, b"test AND") # Check QueryParser pure NOT option qp = xapian.QueryParser() expect_query(qp.parse_query(b"NOT test", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT), "(<alldocuments> AND_NOT test@1)") # Check QueryParser partial option qp = xapian.QueryParser() qp.set_database(db) qp.set_default_op(xapian.Query.OP_AND) qp.set_stemming_strategy(qp.STEM_SOME) qp.set_stemmer(xapian.Stem(b'en')) expect_query(qp.parse_query(b"foo o", qp.FLAG_PARTIAL), "(Zfoo@1 AND ((out@2 SYNONYM outsid@2) OR Zo@2))") expect_query(qp.parse_query(b"foo outside", qp.FLAG_PARTIAL), "(Zfoo@1 AND Zoutsid@2)") # Test supplying unicode strings expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar')), '(foo OR bar)') expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar\xa3')), '(foo OR bar\\xa3)') expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar\xc2\xa3')), '(foo OR bar\u00a3)') expect_query(xapian.Query(xapian.Query.OP_OR, b'foo', b'bar'), '(foo OR bar)') expect_query(qp.parse_query(b"NOT t\xe9st", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT), "(<alldocuments> AND_NOT Zt\u00e9st@1)") doc = xapian.Document() doc.set_data(b"Unicode with an acc\xe9nt") doc.add_posting(stem(b"out\xe9r"), 1) expect(doc.get_data(), b"Unicode with an acc\xe9nt") term = next(doc.termlist()).term expect(term, b"out\xe9r") # Check simple stopper stop = xapian.SimpleStopper() qp.set_stopper(stop) expect(stop(b'a'), False) expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2 AND Za@3)") stop.add(b'a') expect(stop(b'a'), True) expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2)") # Feature test for custom Stopper class my_b_stopper(xapian.Stopper): def __call__(self, term): return term == b"b" def get_description(self): return "my_b_stopper" stop = my_b_stopper() expect(stop.get_description(), "my_b_stopper") qp.set_stopper(stop) expect(stop(b'a'), False) expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2 AND Za@3)") expect(stop(b'b'), True) expect_query(qp.parse_query(b"foo bar b", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2)") # Test TermGenerator termgen = xapian.TermGenerator() doc = xapian.Document() termgen.set_document(doc) termgen.index_text(b'foo bar baz foo') expect([(item.term, item.wdf, [pos for pos in item.positer]) for item in doc.termlist()], [(b'bar', 1, [2]), (b'baz', 1, [3]), (b'foo', 2, [1, 4])]) # Check DateValueRangeProcessor works context("checking that DateValueRangeProcessor works") qp = xapian.QueryParser() vrpdate = xapian.DateValueRangeProcessor(1, 1, 1960) qp.add_valuerangeprocessor(vrpdate) query = qp.parse_query(b'12/03/99..12/04/01') expect(str(query), 'Query(0 * VALUE_RANGE 1 19991203 20011204)') # Regression test for bug#193, fixed in 1.0.3. context("running regression test for bug#193") vrp = xapian.NumberValueRangeProcessor(0, b'$', True) a = '$10' b = '20' slot, a, b = vrp(a, b.encode('utf-8')) expect(slot, 0) expect(xapian.sortable_unserialise(a), 10) expect(xapian.sortable_unserialise(b), 20) # Feature test for xapian.FieldProcessor context("running feature test for xapian.FieldProcessor") class testfieldprocessor(xapian.FieldProcessor): def __call__(self, s): if s == 'spam': raise Exception('already spam') return xapian.Query("spam") qp.add_prefix('spam', testfieldprocessor()) qp.add_boolean_prefix('boolspam', testfieldprocessor()) query = qp.parse_query('spam:ignored') expect(str(query), 'Query(spam)') # FIXME: This doesn't currently work: # expect_exception(Exception, 'already spam', qp.parse_query, 'spam:spam') # Regression tests copied from PHP (probably always worked in python, but # let's check...) context("running regression tests for issues which were found in PHP") # PHP overload resolution involving boolean types failed. enq.set_sort_by_value(1, True) # Regression test - fixed in 0.9.10.1. oqparser = xapian.QueryParser() oquery = oqparser.parse_query(b"I like tea") # Regression test for bug#192 - fixed in 1.0.3. enq.set_cutoff(100) # Test setting and getting metadata expect(db.get_metadata(b'Foo'), b'') db.set_metadata(b'Foo', b'Foo') expect(db.get_metadata(b'Foo'), b'Foo') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, b'') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.set_metadata, b'', b'Foo') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, b'') # Test OP_SCALE_WEIGHT and corresponding constructor expect_query(xapian.Query(xapian.Query.OP_SCALE_WEIGHT, xapian.Query(b'foo'), 5), "5 * foo")
if __name__ == "__main__": import sys from time import time import linecache import glob import traceback import linecache import xapian import re stem = xapian.Stem("french") ti = xapian.inmemory_open() ti = xapian.WritableDatabase("test.ti", xapian.DB_CREATE_OR_OPEN) # ti = xapian.quartz_open('test.idx') # start = time() # lines = 0 # for f in glob.glob('*.txt'): # print f, # for linenumber, line in enumerate(file(f,'rb')): # lines += 1 # line = line.strip() # doc = xapian.Document() # doc.set_data('%12s:%04i'%(f,linenumber)) # for word_number, word in enumerate(re.findall(r'\w+',line.lower())): # doc.add_posting(word,word_number) # ti.add_document(doc) # if linenumber % 100 == 0: # sys.stdout.write('.') # print 'OK' # print 'Indexing time : %.2fs for %i lines'%(time()-start,lines)
def test_all(): # Test the version number reporting functions give plausible results. v = "%d.%d.%d" % (xapian.major_version(), xapian.minor_version(), xapian.revision()) v2 = xapian.version_string() expect(v2, v, "Unexpected version output") def access_cvar(): return xapian.cvar # Check that SWIG isn't generating cvar (regression test for ticket#297). expect_exception(AttributeError, "'module' object has no attribute 'cvar'", access_cvar) stem = xapian.Stem("english") expect(str(stem), "Xapian::Stem(english)", "Unexpected str(stem)") doc = xapian.Document() doc.set_data("a\0b") if doc.get_data() == "a": raise TestFail("get_data+set_data truncates at a zero byte") expect(doc.get_data(), "a\0b", "get_data+set_data doesn't transparently handle a zero byte") doc.set_data("is there anybody out there?") doc.add_term("XYzzy") doc.add_posting(stem("is"), 1) doc.add_posting(stem("there"), 2) doc.add_posting(stem("anybody"), 3) doc.add_posting(stem("out"), 4) doc.add_posting(stem("there"), 5) db = xapian.inmemory_open() db.add_document(doc) expect(db.get_doccount(), 1, "Unexpected db.get_doccount()") terms = ["smoke", "test", "terms"] expect_query(xapian.Query(xapian.Query.OP_OR, terms), "(smoke OR test OR terms)") query1 = xapian.Query(xapian.Query.OP_PHRASE, ("smoke", "test", "tuple")) query2 = xapian.Query(xapian.Query.OP_XOR, (xapian.Query("smoke"), query1, "string")) expect_query(query1, "(smoke PHRASE 3 test PHRASE 3 tuple)") expect_query(query2, "(smoke XOR (smoke PHRASE 3 test PHRASE 3 tuple) XOR string)") subqs = ["a", "b"] expect_query(xapian.Query(xapian.Query.OP_OR, subqs), "(a OR b)") expect_query(xapian.Query(xapian.Query.OP_VALUE_RANGE, 0, "1", "4"), "VALUE_RANGE 0 1 4") # Check database factory functions are wrapped as expected: expect_exception(xapian.DatabaseOpeningError, None, xapian.open_stub, "nosuchdir/nosuchdb") expect_exception(xapian.DatabaseOpeningError, None, xapian.open_stub, "nosuchdir/nosuchdb", xapian.DB_OPEN) expect_exception(xapian.DatabaseOpeningError, None, xapian.brass_open, "nosuchdir/nosuchdb") expect_exception(xapian.DatabaseCreateError, None, xapian.brass_open, "nosuchdir/nosuchdb", xapian.DB_CREATE) expect_exception(xapian.DatabaseOpeningError, None, xapian.chert_open, "nosuchdir/nosuchdb") expect_exception(xapian.DatabaseCreateError, None, xapian.chert_open, "nosuchdir/nosuchdb", xapian.DB_CREATE) expect_exception(xapian.NetworkError, None, xapian.remote_open, "/bin/false", "") expect_exception(xapian.NetworkError, None, xapian.remote_open_writable, "/bin/false", "") expect_exception(xapian.NetworkError, None, xapian.remote_open, "127.0.0.1", 0, 1) expect_exception(xapian.NetworkError, None, xapian.remote_open_writable, "127.0.0.1", 0, 1) # Check wrapping of MatchAll and MatchNothing: expect_query(xapian.Query.MatchAll, "<alldocuments>") expect_query(xapian.Query.MatchNothing, "") # Feature test for Query.__iter__ term_count = 0 for term in query2: term_count += 1 expect(term_count, 4, "Unexpected number of terms in query2") enq = xapian.Enquire(db) enq.set_query(xapian.Query(xapian.Query.OP_OR, "there", "is")) mset = enq.get_mset(0, 10) expect(mset.size(), 1, "Unexpected mset.size()") expect(len(mset), 1, "Unexpected mset.size()") # Feature test for Enquire.matching_terms(docid) term_count = 0 for term in enq.matching_terms(mset.get_hit(0)): term_count += 1 expect(term_count, 2, "Unexpected number of matching terms") # Feature test for MSet.__iter__ msize = 0 for match in mset: msize += 1 expect(msize, mset.size(), "Unexpected number of entries in mset") terms = " ".join(enq.matching_terms(mset.get_hit(0))) expect(terms, "is there", "Unexpected terms") # Feature test for ESet.__iter__ rset = xapian.RSet() rset.add_document(1) eset = enq.get_eset(10, rset) term_count = 0 for term in eset: term_count += 1 expect(term_count, 3, "Unexpected number of expand terms") # Feature test for Database.__iter__ term_count = 0 for term in db: term_count += 1 expect(term_count, 5, "Unexpected number of terms in db") # Feature test for Database.allterms term_count = 0 for term in db.allterms(): term_count += 1 expect(term_count, 5, "Unexpected number of terms in db.allterms") # Feature test for Database.postlist count = 0 for posting in db.postlist("there"): count += 1 expect(count, 1, "Unexpected number of entries in db.postlist('there')") # Feature test for Database.postlist with empty term (alldocspostlist) count = 0 for posting in db.postlist(""): count += 1 expect(count, 1, "Unexpected number of entries in db.postlist('')") # Feature test for Database.termlist count = 0 for term in db.termlist(1): count += 1 expect(count, 5, "Unexpected number of entries in db.termlist(1)") # Feature test for Database.positionlist count = 0 for term in db.positionlist(1, "there"): count += 1 expect(count, 2, "Unexpected number of entries in db.positionlist(1, 'there')") # Feature test for Document.termlist count = 0 for term in doc.termlist(): count += 1 expect(count, 5, "Unexpected number of entries in doc.termlist()") # Feature test for TermIter.skip_to term = doc.termlist() term.skip_to("n") while True: try: x = next(term) except StopIteration: break if x.term < "n": raise TestFail("TermIter.skip_to didn't skip term '%s'" % x.term) # Feature test for Document.values count = 0 for term in doc.values(): count += 1 expect(count, 0, "Unexpected number of entries in doc.values") # Check exception handling for Xapian::DocNotFoundError expect_exception(xapian.DocNotFoundError, "Docid 3 not found", db.get_document, 3) # Check value of OP_ELITE_SET expect(xapian.Query.OP_ELITE_SET, 10, "Unexpected value for OP_ELITE_SET") # Feature test for MatchDecider doc = xapian.Document() doc.set_data("Two") doc.add_posting(stem("out"), 1) doc.add_posting(stem("outside"), 1) doc.add_posting(stem("source"), 2) doc.add_value(0, "yes") db.add_document(doc) class testmatchdecider(xapian.MatchDecider): def __call__(self, doc): return doc.get_value(0) == "yes" query = xapian.Query(stem("out")) enquire = xapian.Enquire(db) enquire.set_query(query) mset = enquire.get_mset(0, 10, None, testmatchdecider()) expect(mset.size(), 1, "Unexpected number of documents returned by match decider") expect(mset.get_docid(0), 2, "MatchDecider mset has wrong docid in") # Feature test for ExpandDecider class testexpanddecider(xapian.ExpandDecider): def __call__(self, term): return not term.startswith("a") enquire = xapian.Enquire(db) rset = xapian.RSet() rset.add_document(1) eset = enquire.get_eset(10, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, testexpanddecider()) eset_terms = [term[xapian.ESET_TNAME] for term in eset.items] expect(len(eset_terms), eset.size(), "Unexpected number of terms returned by expand") if [t for t in eset_terms if t.startswith("a")]: raise TestFail("ExpandDecider was not used") # Check min_wt argument to get_eset() works (new in 1.2.5). eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ) expect(eset.items[-1][xapian.ESET_WT] < 1.9, True, "test get_eset() without min_wt") eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, None, 1.9) expect(eset.items[-1][xapian.ESET_WT] >= 1.9, True, "test get_eset() min_wt") # Check QueryParser parsing error. qp = xapian.QueryParser() expect_exception(xapian.QueryParserError, "Syntax: <expression> AND <expression>", qp.parse_query, "test AND") # Check QueryParser pure NOT option qp = xapian.QueryParser() expect_query(qp.parse_query("NOT test", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT), "(<alldocuments> AND_NOT test@1)") # Check QueryParser partial option qp = xapian.QueryParser() qp.set_database(db) qp.set_default_op(xapian.Query.OP_AND) qp.set_stemming_strategy(qp.STEM_SOME) qp.set_stemmer(xapian.Stem("en")) expect_query(qp.parse_query("foo o", qp.FLAG_PARTIAL), "(Zfoo@1 AND ((out@2 SYNONYM outsid@2) OR Zo@2))") expect_query(qp.parse_query("foo outside", qp.FLAG_PARTIAL), "(Zfoo@1 AND Zoutsid@2)") # Test supplying unicode strings expect_query(xapian.Query(xapian.Query.OP_OR, (u"foo", u"bar")), "(foo OR bar)") expect_query(xapian.Query(xapian.Query.OP_OR, ("foo", u"bar\xa3")), "(foo OR bar\xc2\xa3)") expect_query(xapian.Query(xapian.Query.OP_OR, ("foo", "bar\xc2\xa3")), "(foo OR bar\xc2\xa3)") expect_query(xapian.Query(xapian.Query.OP_OR, u"foo", u"bar"), "(foo OR bar)") expect_query( qp.parse_query(u"NOT t\xe9st", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT), "(<alldocuments> AND_NOT Zt\xc3\xa9st@1)" ) doc = xapian.Document() doc.set_data(u"Unicode with an acc\xe9nt") doc.add_posting(stem(u"out\xe9r"), 1) expect(doc.get_data(), u"Unicode with an acc\xe9nt".encode("utf-8")) term = doc.termlist().next().term expect(term, u"out\xe9r".encode("utf-8")) # Check simple stopper stop = xapian.SimpleStopper() qp.set_stopper(stop) expect(stop("a"), False) expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2 AND Za@3)") stop.add("a") expect(stop("a"), True) expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2)") # Feature test for custom Stopper class my_b_stopper(xapian.Stopper): def __call__(self, term): return term == "b" def get_description(self): return u"my_b_stopper" stop = my_b_stopper() expect(stop.get_description(), u"my_b_stopper") qp.set_stopper(stop) expect(stop("a"), False) expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2 AND Za@3)") expect(stop("b"), True) expect_query(qp.parse_query(u"foo bar b", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2)") # Test TermGenerator termgen = xapian.TermGenerator() doc = xapian.Document() termgen.set_document(doc) termgen.index_text("foo bar baz foo") expect( [(item.term, item.wdf, [pos for pos in item.positer]) for item in doc.termlist()], [("bar", 1, [2]), ("baz", 1, [3]), ("foo", 2, [1, 4])], ) # Check DateValueRangeProcessor works context("checking that DateValueRangeProcessor works") qp = xapian.QueryParser() vrpdate = xapian.DateValueRangeProcessor(1, 1, 1960) qp.add_valuerangeprocessor(vrpdate) query = qp.parse_query("12/03/99..12/04/01") expect(str(query), "Query(0 * VALUE_RANGE 1 19991203 20011204)") # Regression test for bug#193, fixed in 1.0.3. context("running regression test for bug#193") vrp = xapian.NumberValueRangeProcessor(0, "$", True) a = "$10" b = "20" slot, a, b = vrp(a, b) expect(slot, 0) expect(xapian.sortable_unserialise(a), 10) expect(xapian.sortable_unserialise(b), 20) # Regression tests copied from PHP (probably always worked in python, but # let's check...) context("running regression tests for issues which were found in PHP") # PHP overload resolution involving boolean types failed. enq.set_sort_by_value(1, True) # Regression test - fixed in 0.9.10.1. oqparser = xapian.QueryParser() oquery = oqparser.parse_query("I like tea") # Regression test for bug#192 - fixed in 1.0.3. enq.set_cutoff(100) # Test setting and getting metadata expect(db.get_metadata("Foo"), "") db.set_metadata("Foo", "Foo") expect(db.get_metadata("Foo"), "Foo") expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, "") expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.set_metadata, "", "Foo") expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, "") # Test OP_SCALE_WEIGHT and corresponding constructor expect_query(xapian.Query(xapian.Query.OP_SCALE_WEIGHT, xapian.Query("foo"), 5), "5 * foo")
def inmemory(): """Returns an xodb database backed by an in-memory xapian database. Does not support spelling correction. """ return open(xapian.inmemory_open(), spelling=False, inmem=True)
def test_all(): # Test the version number reporting functions give plausible results. v = "%d.%d.%d" % (xapian.major_version(), xapian.minor_version(), xapian.revision()) v2 = xapian.version_string() expect(v2, v, "Unexpected version output") # A regexp check would be better, but seems to create a bogus "leak" of -1 # objects in Python 3. expect(len(xapian.__version__.split('.')), 3, 'xapian.__version__ not X.Y.Z') expect((xapian.__version__.split('.'))[0], '1', 'xapian.__version__ not "1.Y.Z"') def access_cvar(): res = xapian.cvar print("Unhandled constants: ", res) return res # Check that SWIG isn't generating cvar (regression test for ticket#297). # # Python 3.5 generates a different exception message here to earlier # versions, so we need a check which matches both. expect_exception(AttributeError, lambda msg: msg.find("has no attribute 'cvar'") != -1, access_cvar) stem = xapian.Stem(b"english") expect(str(stem), "Xapian::Stem(english)", "Unexpected str(stem)") doc = xapian.Document() doc.set_data(b"a\0b") if doc.get_data() == b"a": raise TestFail("get_data+set_data truncates at a zero byte") expect(doc.get_data(), b"a\0b", "get_data+set_data doesn't transparently handle a zero byte") doc.set_data(b"is there anybody out there?") doc.add_term(b"XYzzy") doc.add_posting(stem(b"is"), 1) doc.add_posting(stem(b"there"), 2) doc.add_posting(stem(b"anybody"), 3) doc.add_posting(stem(b"out"), 4) doc.add_posting(stem(b"there"), 5) db = xapian.inmemory_open() db.add_document(doc) expect(db.get_doccount(), 1, "Unexpected db.get_doccount()") terms = ["smoke", "test", "terms"] expect_query( xapian.Query(xapian.Query.OP_OR, [t.encode('utf-8') for t in terms]), "(smoke OR test OR terms)") query1 = xapian.Query(xapian.Query.OP_PHRASE, (b"smoke", b"test", b"tuple")) query2 = xapian.Query(xapian.Query.OP_XOR, (xapian.Query(b"smoke"), query1, b"string")) expect_query(query1, "(smoke PHRASE 3 test PHRASE 3 tuple)") expect_query( query2, "(smoke XOR (smoke PHRASE 3 test PHRASE 3 tuple) XOR string)") subqs = ["a", "b"] expect_query( xapian.Query(xapian.Query.OP_OR, [s.encode('utf-8') for s in subqs]), "(a OR b)") expect_query(xapian.Query(xapian.Query.OP_VALUE_RANGE, 0, b'1', b'4'), "VALUE_RANGE 0 1 4") # Check database factory functions are wrapped as expected (or not wrapped # in the first cases): expect_exception( AttributeError, lambda msg: msg.find("has no attribute 'open_stub'") != -1, lambda: xapian.open_stub(b"nosuchdir/nosuchdb")) expect_exception( AttributeError, lambda msg: msg.find("has no attribute 'open_stub'") != -1, lambda: xapian.open_stub(b"nosuchdir/nosuchdb", xapian.DB_OPEN)) expect_exception( AttributeError, lambda msg: msg.find("has no attribute 'chert_open'") != -1, lambda: xapian.chert_open(b"nosuchdir/nosuchdb")) expect_exception( AttributeError, lambda msg: msg.find("has no attribute 'chert_open'") != -1, lambda: xapian.chert_open(b"nosuchdir/nosuchdb", xapian.DB_CREATE)) expect_exception( xapian.DatabaseOpeningError, None, lambda: xapian.Database(b"nosuchdir/nosuchdb", xapian.DB_BACKEND_STUB)) expect_exception( xapian.DatabaseOpeningError, None, lambda: xapian.WritableDatabase( b"nosuchdir/nosuchdb", xapian.DB_OPEN | xapian.DB_BACKEND_STUB)) expect_exception( xapian.DatabaseOpeningError, None, lambda: xapian.Database( b"nosuchdir/nosuchdb", xapian.DB_BACKEND_GLASS)) expect_exception( xapian.DatabaseCreateError, None, lambda: xapian.WritableDatabase( b"nosuchdir/nosuchdb", xapian.DB_CREATE | xapian.DB_BACKEND_GLASS)) expect_exception( xapian.DatabaseOpeningError, None, lambda: xapian.Database( b"nosuchdir/nosuchdb", xapian.DB_BACKEND_CHERT)) expect_exception( xapian.DatabaseCreateError, None, lambda: xapian.WritableDatabase( b"nosuchdir/nosuchdb", xapian.DB_CREATE | xapian.DB_BACKEND_CHERT)) expect_exception(xapian.NetworkError, None, xapian.remote_open, b"/bin/false", b"") expect_exception(xapian.NetworkError, None, xapian.remote_open_writable, b"/bin/false", b"") expect_exception(xapian.NetworkError, None, xapian.remote_open, b"127.0.0.1", 0, 1) expect_exception(xapian.NetworkError, None, xapian.remote_open_writable, b"127.0.0.1", 0, 1) # Check wrapping of MatchAll and MatchNothing: expect_query(xapian.Query.MatchAll, "<alldocuments>") expect_query(xapian.Query.MatchNothing, "") # Feature test for Query.__iter__ term_count = 0 for term in query2: term_count += 1 expect(term_count, 4, "Unexpected number of terms in query2") enq = xapian.Enquire(db) enq.set_query(xapian.Query(xapian.Query.OP_OR, b"there", b"is")) mset = enq.get_mset(0, 10) expect(mset.size(), 1, "Unexpected mset.size()") expect(len(mset), 1, "Unexpected mset.size()") # Feature test for Enquire.matching_terms(docid) term_count = 0 for term in enq.matching_terms(mset.get_hit(0)): term_count += 1 expect(term_count, 2, "Unexpected number of matching terms") # Feature test for MSet.__iter__ msize = 0 for match in mset: msize += 1 expect(msize, mset.size(), "Unexpected number of entries in mset") terms = b" ".join(enq.matching_terms(mset.get_hit(0))) expect(terms, b"is there", "Unexpected terms") # Feature test for ESet.__iter__ rset = xapian.RSet() rset.add_document(1) eset = enq.get_eset(10, rset) term_count = 0 for term in eset: term_count += 1 expect(term_count, 3, "Unexpected number of expand terms") # Feature test for Database.__iter__ term_count = 0 for term in db: term_count += 1 expect(term_count, 5, "Unexpected number of terms in db") # Feature test for Database.allterms term_count = 0 for term in db.allterms(): term_count += 1 expect(term_count, 5, "Unexpected number of terms in db.allterms") # Feature test for Database.postlist count = 0 for posting in db.postlist(b"there"): count += 1 expect(count, 1, "Unexpected number of entries in db.postlist('there')") # Feature test for Database.postlist with empty term (alldocspostlist) count = 0 for posting in db.postlist(b""): count += 1 expect(count, 1, "Unexpected number of entries in db.postlist('')") # Feature test for Database.termlist count = 0 for term in db.termlist(1): count += 1 expect(count, 5, "Unexpected number of entries in db.termlist(1)") # Feature test for Database.positionlist count = 0 for term in db.positionlist(1, b"there"): count += 1 expect(count, 2, "Unexpected number of entries in db.positionlist(1, 'there')") # Feature test for Document.termlist count = 0 for term in doc.termlist(): count += 1 expect(count, 5, "Unexpected number of entries in doc.termlist()") # Feature test for TermIter.skip_to term = doc.termlist() term.skip_to(b'n') while True: try: x = next(term) except StopIteration: break if x.term < b'n': raise TestFail("TermIter.skip_to didn't skip term '%s'" % x.term.decode('utf-8')) # Feature test for Document.values count = 0 for term in list(doc.values()): count += 1 expect(count, 0, "Unexpected number of entries in doc.values") # Check exception handling for Xapian::DocNotFoundError expect_exception(xapian.DocNotFoundError, "Docid 3 not found", db.get_document, 3) # Check value of OP_ELITE_SET expect(xapian.Query.OP_ELITE_SET, 10, "Unexpected value for OP_ELITE_SET") # Feature test for MatchDecider doc = xapian.Document() doc.set_data(b"Two") doc.add_posting(stem(b"out"), 1) doc.add_posting(stem(b"outside"), 1) doc.add_posting(stem(b"source"), 2) doc.add_value(0, b"yes") db.add_document(doc) class testmatchdecider(xapian.MatchDecider): def __call__(self, doc): return doc.get_value(0) == b"yes" query = xapian.Query(stem(b"out")) enquire = xapian.Enquire(db) enquire.set_query(query) mset = enquire.get_mset(0, 10, None, testmatchdecider()) expect(mset.size(), 1, "Unexpected number of documents returned by match decider") expect(mset.get_docid(0), 2, "MatchDecider mset has wrong docid in") # Feature test for ExpandDecider class testexpanddecider(xapian.ExpandDecider): def __call__(self, term): return (not term.startswith(b'a')) enquire = xapian.Enquire(db) rset = xapian.RSet() rset.add_document(1) eset = enquire.get_eset(10, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, testexpanddecider()) eset_terms = [item.term for item in eset] expect(len(eset_terms), eset.size(), "Unexpected number of terms returned by expand") if [t for t in eset_terms if t.startswith(b'a')]: raise TestFail("ExpandDecider was not used") # Check min_wt argument to get_eset() works (new in 1.2.5). eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ) expect([i.weight for i in eset][-1] < 1.9, True, "test get_eset() without min_wt") eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, None, 1.9) expect([i.weight for i in eset][-1] >= 1.9, True, "test get_eset() min_wt") # Check QueryParser parsing error. qp = xapian.QueryParser() expect_exception(xapian.QueryParserError, "Syntax: <expression> AND <expression>", qp.parse_query, b"test AND") # Check QueryParser pure NOT option qp = xapian.QueryParser() expect_query( qp.parse_query(b"NOT test", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT), "(<alldocuments> AND_NOT test@1)") # Check QueryParser partial option qp = xapian.QueryParser() qp.set_database(db) qp.set_default_op(xapian.Query.OP_AND) qp.set_stemming_strategy(qp.STEM_SOME) qp.set_stemmer(xapian.Stem(b'en')) expect_query(qp.parse_query(b"foo o", qp.FLAG_PARTIAL), "(Zfoo@1 AND ((SYNONYM WILDCARD OR o) OR Zo@2))") expect_query(qp.parse_query(b"foo outside", qp.FLAG_PARTIAL), "(Zfoo@1 AND ((SYNONYM WILDCARD OR outside) OR Zoutsid@2))") # Test supplying unicode strings expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar')), '(foo OR bar)') expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar\xa3')), '(foo OR bar\\xa3)') expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar\xc2\xa3')), '(foo OR bar\u00a3)') expect_query(xapian.Query(xapian.Query.OP_OR, b'foo', b'bar'), '(foo OR bar)') expect_query( qp.parse_query(b"NOT t\xe9st", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT), "(<alldocuments> AND_NOT Zt\u00e9st@1)") doc = xapian.Document() doc.set_data(b"Unicode with an acc\xe9nt") doc.add_posting(stem(b"out\xe9r"), 1) expect(doc.get_data(), b"Unicode with an acc\xe9nt") term = next(doc.termlist()).term expect(term, b"out\xe9r") # Check simple stopper stop = xapian.SimpleStopper() qp.set_stopper(stop) expect(stop(b'a'), False) expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2 AND Za@3)") stop.add(b'a') expect(stop(b'a'), True) expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2)") # Feature test for custom Stopper class my_b_stopper(xapian.Stopper): def __call__(self, term): return term == b"b" def get_description(self): return "my_b_stopper" stop = my_b_stopper() expect(stop.get_description(), "my_b_stopper") qp.set_stopper(stop) expect(stop(b'a'), False) expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2 AND Za@3)") expect(stop(b'b'), True) expect_query(qp.parse_query(b"foo bar b", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2)") # Test TermGenerator termgen = xapian.TermGenerator() doc = xapian.Document() termgen.set_document(doc) termgen.index_text(b'foo bar baz foo') expect([(item.term, item.wdf, [pos for pos in item.positer]) for item in doc.termlist()], [(b'bar', 1, [2]), (b'baz', 1, [3]), (b'foo', 2, [1, 4])]) # Check DateValueRangeProcessor works context("checking that DateValueRangeProcessor works") qp = xapian.QueryParser() vrpdate = xapian.DateValueRangeProcessor(1, 1, 1960) qp.add_valuerangeprocessor(vrpdate) query = qp.parse_query(b'12/03/99..12/04/01') expect(str(query), 'Query(0 * VALUE_RANGE 1 19991203 20011204)') # Regression test for bug#193, fixed in 1.0.3. context("running regression test for bug#193") vrp = xapian.NumberValueRangeProcessor(0, b'$', True) a = '$10' b = '20' slot, a, b = vrp(a, b.encode('utf-8')) expect(slot, 0) expect(xapian.sortable_unserialise(a), 10) expect(xapian.sortable_unserialise(b), 20) # Feature test for xapian.FieldProcessor context("running feature test for xapian.FieldProcessor") class testfieldprocessor(xapian.FieldProcessor): def __call__(self, s): if s == 'spam': raise Exception('already spam') return xapian.Query("spam") qp.add_prefix('spam', testfieldprocessor()) qp.add_boolean_prefix('boolspam', testfieldprocessor()) query = qp.parse_query('spam:ignored') expect(str(query), 'Query(spam)') # FIXME: This doesn't currently work: # expect_exception(Exception, 'already spam', qp.parse_query, 'spam:spam') # Regression tests copied from PHP (probably always worked in python, but # let's check...) context("running regression tests for issues which were found in PHP") # PHP overload resolution involving boolean types failed. enq.set_sort_by_value(1, True) # Regression test - fixed in 0.9.10.1. oqparser = xapian.QueryParser() oquery = oqparser.parse_query(b"I like tea") # Regression test for bug#192 - fixed in 1.0.3. enq.set_cutoff(100) # Test setting and getting metadata expect(db.get_metadata(b'Foo'), b'') db.set_metadata(b'Foo', b'Foo') expect(db.get_metadata(b'Foo'), b'Foo') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, b'') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.set_metadata, b'', b'Foo') expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, b'') # Test OP_SCALE_WEIGHT and corresponding constructor expect_query( xapian.Query(xapian.Query.OP_SCALE_WEIGHT, xapian.Query(b'foo'), 5), "5 * foo")