class pylucene_test(unittest.TestCase): def setUp(self): if FLAG_TO_CONTROL_JVM == 0: self.pylucene = PyLucene(startJVM=True) set_FLAG_TO_CONTROL_JVM() else: self.pylucene = PyLucene(startJVM=False) def tearDown(self): shutil.rmtree(self.pylucene.STORE_DIR) # testing functions def test_pylucene_01(self): """ test in search in general_info field with a big set of docs """ for i in range(0, 200): finalDate = "%d/%d/1900" % (i, i) doc_dict = { "doc_id": i, "general_info": "title subtitle", "subject": "subject", "source": "source", "initial_date": "1800", "final_date": finalDate, "content": "content1", } self.pylucene.index_doc(doc_dict) docs = self.pylucene.search_docs(value="title", field="general_info") self.assertEquals(200, len(docs)) for i in range(len(docs)): self.assertEquals(docs[i].get("general_info"), "title subtitle") self.assertEquals(docs[i].get("subject"), "subject") self.assertEquals(docs[i].get("source"), "source") self.assertEquals(docs[i].get("initial_date"), "1800") # no guaranteed order # self.assertEquals(docs[i].get("final_date"), "%d/%d/1900" % (i,i)) self.assertEquals(docs[i].get("content"), None)
class pylucene_test(unittest.TestCase): def setUp(self): if FLAG_TO_CONTROL_JVM == 0: self.pylucene = PyLucene(startJVM=True) set_FLAG_TO_CONTROL_JVM() else: self.pylucene = PyLucene(startJVM=False) def tearDown(self): shutil.rmtree(self.pylucene.STORE_DIR) # testing functions # def test_pylucene_01(self): # """ # test in search in general_info field # """ # # for i in range(0, 3): # doc_dict = { # "doc_id" : str(i), # "general_info" : "title subtitle", # "subject" : "subject", # "source" : "source", # "initial_date" : "1800", # "final_date" : "%d/%d/1900" % (i, i), # "content" : "content1" # } # # self.pylucene.index_doc(doc_dict) # # docs = self.pylucene.search_docs(value="title", field="general_info") # # self.assertEquals(3, len(docs)) # # # # def test_pylucene_02(self): # """ # test match with field tokenized and in general search field # """ # # for i in range(0, 3): # doc_dict = { # "doc_id" : str(i), # "general_info" : "title subtitle", # "subject" : "subject", # "source" : "source", # "initial_date" : "1800", # "final_date" : "%d/%d/1900" % (i, i), # "content" : "content1" # } # # self.pylucene.index_doc(doc_dict) # # docs = self.pylucene.search_docs("title") # # self.assertEquals(3, len(docs)) # # # def test_pylucene_03(self): # """ # test perfect match with a specific field # """ # # for i in range(0, 3): # doc_dict = { # "doc_id" : str(i), # "general_info" : "title subtitle", # "subject" : "subject%d" % (i), # "source" : "source", # "initial_date" : "1800", # "final_date" : "%d/%d/1900" % (i, i), # "content" : "content1" # } # # self.pylucene.index_doc(doc_dict) # # docs = self.pylucene.search_docs(value="subject1", field="subject") # # self.assertEquals(1, len(docs)) # # # def test_pylucene_04(self): # """ # test search for date fields (year/month/day) # """ # # for i in range(0, 3): # doc_dict = { # "doc_id" : str(i), # "general_info" : "title subtitle", # "subject" : "subject%d" % (i), # "source" : "source", # "initial_date" : "1800", # "final_date" : "%d/%d/1900" % (i, i), # "content" : "content1" # } # # self.pylucene.index_doc(doc_dict) # # docs = self.pylucene.search_docs(value="1900", field="final_date") # # self.assertEquals(3, len(docs)) # # # def test_pylucene_05(self): # """ # test search for number # """ # # for i in range(0, 3): # doc_dict = { # "doc_id" : str(i), # "general_info" : "title subtitle", # "subject" : "subject%d" % (i), # "source" : "source", # "initial_date" : "1800", # "final_date" : "%d/%d/1900" % (i, i), # "content" : "content1" # } # # self.pylucene.index_doc(doc_dict) # # docs = self.pylucene.search_docs(value="19", field="final_date") # # self.assertEquals(0, len(docs)) def test_pylucene_06(self): """ test search for specific value in final_date field """ for i in range(0, 3): doc_dict = { "doc_id" : str(i), "general_info" : "title subtitle", "subject" : "subject%d" % (i), "source" : "source", "initial_date" : "1800", "final_date" : "%d/%d/1900" % (i, i), "content" : "content1", } self.pylucene.index_doc(doc_dict) docs = self.pylucene.search_docs(value="2/2/1900", field="final_date") self.assertEquals(3, len(docs)) self.assertEquals(str(2), docs[0].get("doc_id")) def test_pylucene_07(self): """ test search for specific value in final_date field with different separators """ for i in range(0, 3): doc_dict = { "doc_id" : str(i), "general_info" : "title subtitle", "subject" : "subject%d" % (i), "source" : "source", "initial_date" : "1800", "final_date" : "%d/%d/1900" % (i, i), "content" : "content1", } self.pylucene.index_doc(doc_dict) docs = self.pylucene.search_docs(value="2-2-1900", field="final_date") self.assertEquals(3, len(docs)) self.assertEquals(str(2), docs[0].get("doc_id"))