Exemple #1
0
def dotest(terms, expected, which):
    files = filelist(rootdir)
    terms = words(terms)
    # print(terms)

    if which == 0:
        linear_docs = linear_search(files, terms)
        # print(filenames(linear_docs))
        names = filenames(linear_docs)
        names.sort()
        expected.sort()	
        #assert filenames(linear_docs) == expected
        assert names == expected, "found "+str(names)+" != expected "+str(expected)
    elif which == 1:
        index = create_index(files)
        index_docs = index_search(files, index, terms)
        # print(filenames(index_docs))
        names = filenames(index_docs)
        names.sort()
        expected.sort()
        #assert filenames(index_docs) == expected
        assert names == expected, "found "+str(names)+" != expected "+str(expected)
    else:
        index = myhtable_create_index(files)
        index_docs = myhtable_index_search(files, index, terms)
        # print(filenames(index_docs))
        names = filenames(index_docs)
        names.sort()
        expected.sort()
        #assert filenames(index_docs) == expected
        assert names == expected, "found "+str(names)+" != expected "+str(expected)
Exemple #2
0
def dotest(terms, expected, which):
    files = filelist(rootdir)
    terms = words(terms)
    # print(terms)

    if which == 0:
        linear_docs = linear_search(files, terms)
        # print(filenames(linear_docs))
        names = filenames(linear_docs)
        names.sort()
        expected.sort()	
        #assert filenames(linear_docs) == expected
        assert names == expected, "found "+str(names)+" != expected "+str(expected)
    elif which == 1:
        index = create_index(files)
        index_docs = index_search(files, index, terms)
        # print(filenames(index_docs))
        names = filenames(index_docs)
        names.sort()
        expected.sort()
        #assert filenames(index_docs) == expected
        assert names == expected, "found "+str(names)+" != expected "+str(expected)
    else:
        index = myhtable_create_index(files)
        index_docs = myhtable_index_search(files, index, terms)
        # print(filenames(index_docs))
        names = filenames(index_docs)
        names.sort()
        expected.sort()
        #assert filenames(index_docs) == expected
        assert names == expected, "found "+str(names)+" != expected "+str(expected)
def time_check(choice: str, ite: int):
    l = []
    lim = 10000000
    if choice == 'sorted':
        for i in range(lim):
            l.append(random.randint(0, lim))
        l.sort()

    elif choice == 'normalized':
        for i in range(lim):
            l.append(i)

    inter = []
    seq = []
    bnry = []
    indexed = []

    for i in range(0, ite):
        value = random.randint(0, lim)
        if (choice == 'normalized'):
            start2 = time.clock()
            ret1 = interpolation_search(l, value)
            end2 = time.clock()
            if ret1 == True:
                time2 = end2 - start2
                inter.append(time2)

        start1 = time.clock()
        ret2 = linear_search(l, value)
        end1 = time.clock()
        if ret2 == True:
            time1 = end1 - start1
            seq.append(time1)

        start3 = time.clock()
        ret3 = binary_search(l, value)
        end3 = time.clock()
        if ret3 == True:
            time3 = end3 - start3
            bnry.append(time3)

        start4 = time.clock()
        ret4 = index_search(l, value)
        end4 = time.clock()
        if ret4 == True:
            time4 = end4 - start4
            indexed.append(time4)

    plt.xlabel('Iterations')
    plt.ylabel('Time')
    if len(inter) != 0:
        plt.plot(inter, color='red', label='interpolation')
    plt.plot(seq, color='green', label='sequential')
    plt.plot(bnry, color='blue', label='binary')
    plt.plot(indexed, color='purple', label='indexed')
    plt.legend()
    plt.grid(True)
    plt.show()
def test_index_berlitz():
    terms = "hawaii travel"

    files = filelist(rootdir)

    terms = words(terms)

    index = create_index(files)
    index_docs = index_search(files, index, terms)

    expected = ['HistoryHawaii.txt']
    assert filenames(index_docs) == expected
def test_index_berlitz_none():
    terms = "missspellinnng"

    files = filelist(rootdir)

    terms = words(terms)

    index = create_index(files)
    index_docs = index_search(files, index, terms)

    expected = []
    assert filenames(index_docs) == expected
Exemple #6
0
def test_index_berlitz_none():
    terms = "missspellinnng"

    files = filelist(rootdir)

    terms = words(terms)

    index = create_index(files)
    index_docs = index_search(files, index, terms)

    expected = []
    assert filenames(index_docs) == expected
def test_index_berlitz():
    terms = "hawaii travel"

    files = filelist(rootdir)

    terms = words(terms)

    index = create_index(files)
    index_docs = index_search(files, index, terms)

    expected = ['HistoryHawaii.txt']
    assert filenames(index_docs) == expected
Exemple #8
0
# files = files[:100]
N = len(files)
print N, "files"

index = None

while True:
    terms = raw_input("Search terms: ")
    terms = words(terms)

    if impl == 'linear':
        docs = linear_search(files, terms)
    elif impl == 'index':
        if index is None:
            index = create_index(files)
            print "Index complete"
        docs = index_search(files, index, terms)
    elif impl == 'myhtable':
        if index is None:
            index = myhtable_create_index(files)
            print "Index complete"
        docs = myhtable_index_search(files, index, terms)
    else:
        print "Invalid search type:", impl
        break
    page = results(docs, terms)
    f = open("/tmp/results.html", "w")
    f.write(page)
    f.close()
    webbrowser.open_new_tab("file:///tmp/results.html")
Exemple #9
0
# Uncomment the next line to test just the first 100 files instead of all files
# files = files[:100]
N = len(files)
print(N, "files")

index = None

while True:
    terms = input("Search terms: ")
    terms = words(terms)

    if impl=='linear':
        docs = linear_search(files, terms)
    elif impl == 'index':
        if index is None:
            index = create_index(files)
            print("Index complete")
        docs = index_search(files, index, terms)
    elif impl == 'myhtable':
        if index is None:
            index = myhtable_create_index(files)
            print("Index complete")
        docs = myhtable_index_search(files, index, terms)
    else:
        print("Invalid search type:", impl)
        break
    page = results(docs, terms)
    with open("/tmp/results.html", "w", encoding='UTF-8') as f:
        f.write(page)
    webbrowser.open_new_tab("file:///tmp/results.html")
Exemple #10
0
def time_check(choice: str, ite: int):
    l = []
    lim = 1000000
    if choice == 'sorted':
        for i in range(lim):
            l.append(random.randint(0, lim))
        l.sort()

    elif choice == 'normalized':
        for i in range(lim):
            l.append(i)

    inter_time = []
    seq_time = []
    bnry_time = []
    indexed_time = []

    inter_counter = []
    seq_counter = []
    bnry_counter = []
    indexed_counter = []

    for i in range(0, ite):
        value = random.randint(0, lim)
        if (choice == 'normalized'):
            start1 = time.clock()
            counter1 = interpolation_search(l, value)
            end1 = time.clock()
            time1 = end1 - start1
            inter_time.append(time1)
            inter_counter.append(counter1)

        start2 = time.clock()
        counter2 = linear_search(l, value)
        end2 = time.clock()
        time2 = end2 - start2
        seq_time.append(time2)
        seq_counter.append(counter2)

        start3 = time.clock()
        counter3 = binary_search(l, value)
        end3 = time.clock()
        time3 = end3 - start3
        bnry_time.append(time3)
        bnry_counter.append(counter3)

        start4 = time.clock()
        counter4 = index_search(l, value)
        end4 = time.clock()
        time4 = end4 - start4
        indexed_time.append(time4)
        indexed_counter.append(counter4)

    #print(seq_counter.shape)
    plt.title("Counter x Number of searchs")
    plt.xlabel('Steps')
    plt.ylabel('Number of searchs')
    if len(inter_time) != 0:
        plt.subplot(2, 2, 1)
        plt.hist(inter_counter, color='red', label='interpolation')
        plt.xlabel('Steps')
        plt.ylabel('Number of searchs')
        plt.legend()
        plt.grid(True)

    plt.subplot(2, 2, 2)
    plt.hist(seq_counter, color='green', label='sequential')
    plt.xlabel('Steps')
    plt.ylabel('Number of searchs')
    plt.legend()
    plt.grid(True)

    plt.subplot(2, 2, 3)
    plt.hist(bnry_counter, color='blue', label='binary')
    plt.xlabel('Steps')
    plt.ylabel('Number of searchs')
    plt.legend()
    plt.grid(True)

    plt.subplot(2, 2, 4)
    plt.hist(indexed_counter, color='purple', label='indexed')
    plt.xlabel('Steps')
    plt.ylabel('Number of searchs')
    plt.legend()
    plt.grid(True)
    plt.show()

    plt.title("Number of searchs x Time")
    plt.xlabel('Number of searchs')
    plt.ylabel('Time')
    if len(inter_time) != 0:
        plt.plot(inter_time, color='red', label='interpolation')
    plt.plot(seq_time, color='green', label='sequential')
    plt.plot(bnry_time, color='blue', label='binary')
    plt.plot(indexed_time, color='purple', label='indexed')
    plt.legend()
    plt.grid(True)
    plt.show()