def test_positive(self): container = [] maxelements = 7 library.create(container, maxelements) # Test remove positive cases self.assertEqual(library.size(container), 0) for n in range(maxelements): self.assertEqual(library.inList(n, container), 0) self.assertEqual(library.add(n, container), 1) self.assertEqual(library.inList(n, container), 1) self.assertEqual(library.size(container), n + 1) for n in range(maxelements): self.assertEqual(library.inList(n, container), 1) self.assertEqual(library.remove(n, container), 1) self.assertEqual(library.inList(n, container), 0) self.assertEqual(library.size(container), maxelements - (n + 1)) self.assertEqual(library.size(container), 0) # Test removeAll positive cases self.assertEqual(library.size(container), 0) self.assertEqual(library.inList(9, container), 0) for n in range(maxelements): self.assertEqual(library.add(9, container), 1) self.assertEqual(library.inList(9, container), 1) self.assertEqual(library.size(container), n + 1) for n in range(maxelements, 10): self.assertEqual(library.inList(9, container), 1) self.assertEqual(library.add(9, container), 0) self.assertEqual(library.inList(9, container), 1) self.assertEqual(library.size(container), maxelements) self.assertEqual(library.removeAll(9, container), maxelements) self.assertEqual(library.size(container), 0)
def test_boundary(self): container = [] maxelements = 7 library.create(container, maxelements) # Test remove boundary cases self.assertEqual(library.inList(0, container), 0) self.assertEqual(library.remove(0, container), 0) for n in range(maxelements): library.add(n, container) for n in range(maxelements, 10): self.assertEqual(library.inList(n, container), 0) self.assertEqual(library.add(n, container), 0) self.assertEqual(library.inList(n, container), 0) self.assertEqual(library.size(container), maxelements) for n in range(maxelements, 10): self.assertEqual(library.inList(n, container), 0) self.assertEqual(library.remove(n, container), 0) self.assertEqual(library.inList(n, container), 0) self.assertEqual(library.size(container), maxelements) for n in range(maxelements): library.remove(n, container) self.assertEqual(library.remove(0, container), 0) self.assertEqual(library.inList(9, container), 0) # Test removeAll boundary cases self.assertEqual(library.inList(9, container), 0) self.assertEqual(library.removeAll(9, container), 0) for n in range(maxelements): self.assertEqual(library.add(9, container), 1) self.assertEqual(library.inList(9, container), 1) self.assertEqual(library.size(container), n + 1) for n in range(maxelements, 10): self.assertEqual(library.inList(9, container), 1) self.assertEqual(library.add(9, container), 0) self.assertEqual(library.inList(9, container), 1) self.assertEqual(library.size(container), maxelements) self.assertEqual(library.removeAll(9, container), maxelements) self.assertEqual(library.size(container), 0) self.assertEqual(library.removeAll(9, container), 0) self.assertEqual(library.inList(9, container), 0)
def test_random(self): container = [] maxelements = 7 library.create(container, maxelements) random_numbers = [random.randint(1, 9) for i in range(maxelements)] # Test random cases self.assertEqual(library.size(container), 0) size = 0 for n in random_numbers: self.assertEqual(library.size(container), size) self.assertEqual(library.add(n, container), 1) size += 1 self.assertEqual(library.inList(n, container), 1) self.assertEqual(library.size(container), size) for n in random_numbers: self.assertEqual(library.size(container), size) self.assertEqual(library.inList(n, container), 1) self.assertEqual(library.remove(n, container), 1) size -= 1 self.assertEqual(library.size(container), size) self.assertEqual(library.size(container), 0)
from library import read_data, remove, tokenize, stem, stopwords, write_inverted_index, write_most_word files = {} word_count = {} for data in read_data(): data['body'] = remove(data['body']) data['body'] = tokenize(data['body']) for token in data['body']: token = stem(token) if token not in stopwords() and len(token) > 1: if data['id'] not in word_count.keys(): word_count[data['id']] = {} elif token not in word_count[data['id']].keys(): word_count[data['id']][token] = 1 else: word_count[data['id']][token] += 1 if token not in files.keys(): files[token] = {data['id']: 1} elif token in files and data['id'] not in files[token]: files[token][data['id']] = 1 else: files[token][data['id']] += 1 most_word = {} for words, counter in word_count.items(): word_counts = 0 for word, value in counter.items(): if word_counts < value: word_counts = value most_word[words] = word_counts
def run(): print("Proyek Mesin Pencarian Sederhana dengan Cranfield Collection") term = read_inverted_index() for query in read_query(): print(str(query['query number']) + '. ', query['query']) choice = input("Masukan id dari query yang ingin dicari = ") a("cls") print("Proyek Mesin Pencarian Sederhana dengan Cranfield Collection") for query in read_query(): if query['query number'] == int(choice): selected_query = query['query'] selected_query = remove(selected_query) selected_query = tokenize(selected_query) tf = {} term_doc = {} for token in selected_query: token = stem(token) if token not in stopwords() and len(token) > 1: if token not in tf.keys(): tf[token] = {} tf[token]['query'] = 1 term_doc[token] = term.get(token) else: tf[token]['query'] += 1 max_idf = {} ranked_doc = {} for term, doc in term_doc.items(): max_idf[term] = len(term_doc[term]) for id, total in doc.items(): if id not in ranked_doc: ranked_doc[id] = 0 if id not in tf[term]: tf[term][id] = total for word, data in tf.items(): for id, sum in data.items(): if id != "query": tf[word][id] = 0.5 + (0.5 * (tf[word][id] / read_most_word()[id])) idf = {} for term in tf.keys(): idf[term] = math.log2(1400 / max_idf[term]) for word, data in tf.items(): for id, sum in data.items(): if id != "query": ranked_doc[id] += tf[word][id] * idf[word] sorted_doc = sorted(ranked_doc.items(), key=lambda x: x[1], reverse=True) doc = read_data() sum = int(input("Masukan banyak dokumen relevan yang ini di tampilkan = ")) for i in range(sum): print( str(i + 1) + ". " + str(doc[int(sorted_doc[i][0]) - 1]['title']) + " | similarity coefficient = " + str(sorted_doc[i][1])) a("pause") a("cls") start()