Esempio n. 1
0
    def test_positive(self):
        container = []
        maxelements = 7
        library.create(container, maxelements)

        # Test remove positive cases
        self.assertEqual(library.size(container), 0)
        for n in range(maxelements):
            self.assertEqual(library.inList(n, container), 0)
            self.assertEqual(library.add(n, container), 1)
            self.assertEqual(library.inList(n, container), 1)
            self.assertEqual(library.size(container), n + 1)
        for n in range(maxelements):
            self.assertEqual(library.inList(n, container), 1)
            self.assertEqual(library.remove(n, container), 1)
            self.assertEqual(library.inList(n, container), 0)
            self.assertEqual(library.size(container), maxelements - (n + 1))
        self.assertEqual(library.size(container), 0)

        # Test removeAll positive cases
        self.assertEqual(library.size(container), 0)
        self.assertEqual(library.inList(9, container), 0)
        for n in range(maxelements):
            self.assertEqual(library.add(9, container), 1)
            self.assertEqual(library.inList(9, container), 1)
            self.assertEqual(library.size(container), n + 1)
        for n in range(maxelements, 10):
            self.assertEqual(library.inList(9, container), 1)
            self.assertEqual(library.add(9, container), 0)
            self.assertEqual(library.inList(9, container), 1)
            self.assertEqual(library.size(container), maxelements)
        self.assertEqual(library.removeAll(9, container), maxelements)
        self.assertEqual(library.size(container), 0)
Esempio n. 2
0
    def test_boundary(self):
        container = []
        maxelements = 7
        library.create(container, maxelements)

        # Test remove boundary cases
        self.assertEqual(library.inList(0, container), 0)
        self.assertEqual(library.remove(0, container), 0)
        for n in range(maxelements):
            library.add(n, container)
        for n in range(maxelements, 10):
            self.assertEqual(library.inList(n, container), 0)
            self.assertEqual(library.add(n, container), 0)
            self.assertEqual(library.inList(n, container), 0)
            self.assertEqual(library.size(container), maxelements)
        for n in range(maxelements, 10):
            self.assertEqual(library.inList(n, container), 0)
            self.assertEqual(library.remove(n, container), 0)
            self.assertEqual(library.inList(n, container), 0)
            self.assertEqual(library.size(container), maxelements)
        for n in range(maxelements):
            library.remove(n, container)
        self.assertEqual(library.remove(0, container), 0)
        self.assertEqual(library.inList(9, container), 0)

        # Test removeAll boundary cases
        self.assertEqual(library.inList(9, container), 0)
        self.assertEqual(library.removeAll(9, container), 0)
        for n in range(maxelements):
            self.assertEqual(library.add(9, container), 1)
            self.assertEqual(library.inList(9, container), 1)
            self.assertEqual(library.size(container), n + 1)
        for n in range(maxelements, 10):
            self.assertEqual(library.inList(9, container), 1)
            self.assertEqual(library.add(9, container), 0)
            self.assertEqual(library.inList(9, container), 1)
            self.assertEqual(library.size(container), maxelements)
        self.assertEqual(library.removeAll(9, container), maxelements)
        self.assertEqual(library.size(container), 0)
        self.assertEqual(library.removeAll(9, container), 0)
        self.assertEqual(library.inList(9, container), 0)
Esempio n. 3
0
    def test_random(self):
        container = []
        maxelements = 7
        library.create(container, maxelements)

        random_numbers = [random.randint(1, 9) for i in range(maxelements)]

        # Test random cases
        self.assertEqual(library.size(container), 0)
        size = 0
        for n in random_numbers:
            self.assertEqual(library.size(container), size)
            self.assertEqual(library.add(n, container), 1)
            size += 1
            self.assertEqual(library.inList(n, container), 1)
            self.assertEqual(library.size(container), size)
        for n in random_numbers:
            self.assertEqual(library.size(container), size)
            self.assertEqual(library.inList(n, container), 1)
            self.assertEqual(library.remove(n, container), 1)
            size -= 1
            self.assertEqual(library.size(container), size)
        self.assertEqual(library.size(container), 0)
from library import read_data, remove, tokenize, stem, stopwords, write_inverted_index, write_most_word

files = {}
word_count = {}

for data in read_data():
    data['body'] = remove(data['body'])
    data['body'] = tokenize(data['body'])
    for token in data['body']:
        token = stem(token)
        if token not in stopwords() and len(token) > 1:
            if data['id'] not in word_count.keys():
                word_count[data['id']] = {}
            elif token not in word_count[data['id']].keys():
                word_count[data['id']][token] = 1
            else:
                word_count[data['id']][token] += 1
            if token not in files.keys():
                files[token] = {data['id']: 1}
            elif token in files and data['id'] not in files[token]:
                files[token][data['id']] = 1
            else:
                files[token][data['id']] += 1

most_word = {}
for words, counter in word_count.items():
    word_counts = 0
    for word, value in counter.items():
        if word_counts < value:
            word_counts = value
    most_word[words] = word_counts
Esempio n. 5
0
def run():
    print("Proyek Mesin Pencarian Sederhana dengan Cranfield Collection")
    term = read_inverted_index()

    for query in read_query():
        print(str(query['query number']) + '. ', query['query'])

    choice = input("Masukan id dari query yang ingin dicari = ")
    a("cls")
    print("Proyek Mesin Pencarian Sederhana dengan Cranfield Collection")
    for query in read_query():
        if query['query number'] == int(choice):
            selected_query = query['query']

    selected_query = remove(selected_query)
    selected_query = tokenize(selected_query)

    tf = {}
    term_doc = {}
    for token in selected_query:
        token = stem(token)
        if token not in stopwords() and len(token) > 1:
            if token not in tf.keys():
                tf[token] = {}
                tf[token]['query'] = 1
                term_doc[token] = term.get(token)
            else:
                tf[token]['query'] += 1

    max_idf = {}
    ranked_doc = {}
    for term, doc in term_doc.items():
        max_idf[term] = len(term_doc[term])
        for id, total in doc.items():
            if id not in ranked_doc:
                ranked_doc[id] = 0
            if id not in tf[term]:
                tf[term][id] = total

    for word, data in tf.items():
        for id, sum in data.items():
            if id != "query":
                tf[word][id] = 0.5 + (0.5 *
                                      (tf[word][id] / read_most_word()[id]))

    idf = {}
    for term in tf.keys():
        idf[term] = math.log2(1400 / max_idf[term])

    for word, data in tf.items():
        for id, sum in data.items():
            if id != "query":
                ranked_doc[id] += tf[word][id] * idf[word]

    sorted_doc = sorted(ranked_doc.items(), key=lambda x: x[1], reverse=True)
    doc = read_data()

    sum = int(input("Masukan banyak dokumen relevan yang ini di tampilkan = "))
    for i in range(sum):
        print(
            str(i + 1) + ". " + str(doc[int(sorted_doc[i][0]) - 1]['title']) +
            " | similarity coefficient = " + str(sorted_doc[i][1]))
    a("pause")
    a("cls")
    start()