def test_bktree(): test_passed = True test_database = pickle.load( open( 'testdatabase.p', 'rb' ) ) test_database = test_database['bktree'] keys = test_database['keys'] result_target = test_database['result'] query = test_database['query'] BKT = BKTree(levenshtein_distance_DP) for key in keys: BKT.insert(key) result = BKT.get(query) for (word,distance) in result: encounter = 0 for (word_target,distance_target) in result_target: if word == word_target: if not distance == distance_target: test_passed = False encounter +=1 if not encounter == 1: test_passed = False return test_passed
dataset = Dataset(csv_file, query_processor) dataset.load() print("Dataset Loaded!") #process it into an efficient binary search tree print("Constructing RB-Tree...", end='', flush=True) binary_search_tree = RedBlackTree() temp_dataset = dataset.get_token_bookids() for i in range(len(temp_dataset)): binary_search_tree.insert(temp_dataset[i][0], temp_dataset[i][1]) print("RB-Tree constructed!") #create a BK-tree print("Constructing BK-Tree...", end='', flush=True) bk_tree = BKTree(levenshtein_distance_DP) english_dictionary_list = dataset.get_dictionary() for i in range(len(english_dictionary_list)): bk_tree.insert(english_dictionary_list[i]) print("BK-Tree constructed!") if CONSTRUCT_DEBUG_MODE: pickle.dump(dataset, open("dataset.p", 'wb')) pickle.dump(binary_search_tree, open("binary_search_tree.p", 'wb')) pickle.dump(bk_tree, open("bk_tree.p", 'wb')) #create a Ranker ranker = Ranker(dataset, query_processor, binary_search_tree, bk_tree) #Get the book-titles for “The search engine.” ranker.evaluate("The search engine.")