Beispiel #1
0
 def test_medium_minhashexcludes(self): #Minhash must exclude at least some misses!
     testmgr = zTestDataManager.TestBookManager('samplebooks.zip','../testbooks/')
     testmgr.unpack_archive(max=10)
     testbook = testmgr.get_testbooks()[0]
     e = zTestDataManager.ErrorMaker()
     dupe = testmgr.make_error_dupes([testbook,], errormaker=e)[0]
     books = testmgr.get_testbooks()
     library = Library.Library()
     for book in books:
         mybook = Book(textfile=book)
         mybook.initialize_text_data()
         library.add_book(mybook)
     dupe = library.get_book_textfile(dupe)
     testablebook = library.get_book_textfile(testbook)
     possiblematches = library.get_possible_matches(testablebook)
     self.assertLess(len(possiblematches), 5, msg="minhash was not able to eliminate at least one possible book")
     self.assertGreaterEqual(len(possiblematches), 1, msg="minhash didn't get at least one hit")
     minverification = testmgr.verify_minhash_results(library)
     testmgr.print_formatted_minhash_results(minverification)
Beispiel #2
0
 def test_medium_minhashfindsmatches(self): #make certain minhash actually...works.
     testmgr = zTestDataManager.TestBookManager('samplebooks.zip','../testbooks/')
     testmgr.unpack_archive(max=2)
     testbook = testmgr.get_testbooks()[0]
     e = zTestDataManager.ErrorMaker()
     dupe = testmgr.make_error_dupes([testbook,], errormaker=e)[0]
     books = testmgr.get_testbooks()
     library = Library.Library()
     for book in books:
         mybook = Book(textfile=book)
         mybook.initialize_text_data()
         library.add_book(mybook)
     dupe = library.get_book_textfile(dupe)
     testablebook = library.get_book_textfile(testbook)
     possiblematches = library.get_possible_matches(testablebook)
     found = False
     for id, score in possiblematches:
         if id == dupe.id:
             found=True
     self.assertTrue(found)