Exemple #1
0
def test_bktree():
    test_passed = True

    test_database = pickle.load( open( 'testdatabase.p', 'rb' ) )
    test_database = test_database['bktree']

    keys = test_database['keys']
    result_target = test_database['result']
    query = test_database['query']

    BKT = BKTree(levenshtein_distance_DP)
    for key in keys:
        BKT.insert(key)

    result = BKT.get(query)

    for (word,distance) in result:
        encounter = 0
        for (word_target,distance_target) in result_target:
            if word == word_target:
                if not distance == distance_target:
                    test_passed = False
                encounter +=1
        if not encounter == 1:
            test_passed = False

    return test_passed
Exemple #2
0
    dataset = Dataset(csv_file, query_processor)
    dataset.load()
    print("Dataset Loaded!")

    #process it into an efficient binary search tree
    print("Constructing RB-Tree...", end='', flush=True)
    binary_search_tree = RedBlackTree()
    temp_dataset = dataset.get_token_bookids()
    for i in range(len(temp_dataset)):
        binary_search_tree.insert(temp_dataset[i][0], temp_dataset[i][1])
    print("RB-Tree constructed!")

    #create a BK-tree
    print("Constructing BK-Tree...", end='', flush=True)
    bk_tree = BKTree(levenshtein_distance_DP)
    english_dictionary_list = dataset.get_dictionary()
    for i in range(len(english_dictionary_list)):
        bk_tree.insert(english_dictionary_list[i])
    print("BK-Tree constructed!")

    if CONSTRUCT_DEBUG_MODE:
        pickle.dump(dataset, open("dataset.p", 'wb'))
        pickle.dump(binary_search_tree, open("binary_search_tree.p", 'wb'))
        pickle.dump(bk_tree, open("bk_tree.p", 'wb'))

#create a Ranker
ranker = Ranker(dataset, query_processor, binary_search_tree, bk_tree)

#Get the book-titles for “The search engine.”
ranker.evaluate("The search engine.")