def get_scores(self, terms): """creates a list of scores for each file in corpus The score = weighted frequency / the total word count in the file. Compute this score for each term in a query and sum all the scores. Args: terms (list) : a list of str Returns: list : a list of tuples, each containing the filename and its relevancy score """ scores = HashTableLinear() for term in terms: if self.term_freqs.contains(term): hashtable = self.term_freqs[term] for file1 in hashtable.slots: if file1 != None and hashtable.contains(file1[0]): if scores.contains(file1[0]): key, val = scores.remove(file1[0]) scores.put(file1[0], val + self.get_wf(file1[1])) else: scores.put(file1[0], self.get_wf(file1[1])) for file1 in scores.slots: if file1 is not None: key, val = scores.remove(file1[0]) val /= self.doc_length.get(file1[0]) scores.put(file1[0], val) return scores
def test_linear4(self): ht = HashTableLinear() for i in range(22): ht.put(chr(i), i) self.assertEqual(ht.size(), 22) self.assertTrue(ht.load_factor() <= 0.75) self.assertEqual(ht[chr(0)], 0) self.assertEqual(ht[chr(1)], 1) self.assertEqual(ht[chr(19)], 19) for i in range(22): ht.remove(chr(i)) self.assertFalse(ht.contains(chr(0))) self.assertFalse(ht.contains(chr(1))) self.assertFalse(ht.contains(chr(19)))
def test_HashTableLinear(self): t = HashTableLinear() self.assertEqual(t.size(), 0) self.assertFalse(t.contains('us')) self.assertRaises(KeyError, t.get, 'us') t.put('us', 'us') self.assertEqual(t.get('us'), 'us') self.assertEqual(t['us'], 'us') self.assertTrue(t.contains('us')) self.assertFalse(t.contains('say')) self.assertEqual(t.size(), 1) self.assertEqual(t.collisions(), 0) t.put('say', 'say') self.assertEqual(t.get('say'), 'say') self.assertTrue(t.contains('say')) self.assertEqual(t.size(), 2) self.assertEqual(t.collisions(), 1) t.remove('say') self.assertFalse(t.contains('say')) self.assertTrue(t.contains('us')) t.remove('us') self.assertEqual(t.size(), 0) # print(hash_string('the', 11)) # 'the' = 5 t.put('us', 'us') t.put('say', 'say') # self.assertEqual(t.load_factor(), 0.18181818181818182) t.put('the', 'the') # t.put(chr(0), chr(0)) # t.put('0', '0') # print('chr 0', chr(0)) # print('just 0', '0') # print(type(chr(0))) # print(type('0')) # print(hash_string('us', 23)) # 'the' = 5 # print(hash_string('say', 23)) # 'the' = 5 # print(hash_string('the', 23)) # 'the' = 5 # print('from tests', t) self.assertTrue(t.contains('us')) self.assertTrue(t.contains('the'))
def test_whole_functionality(self): """ Tests the Separate Chain Hash Table Functionality""" filename = 'stop_words.txt' hash_table = HashTableLinear() hash_table = import_stopwords(filename, hash_table) self.assertRaises(KeyError, hash_table.get, 'BubbleGum') self.assertTrue('to' in hash_table) second_hash = HashTableLinear() second_hash.put('three', 'three') third_hash = HashTableLinear() third_hash.put('three', 'three') self.assertEqual(second_hash, third_hash) self.assertNotEqual(hash_table, second_hash) self.assertNotEqual(hash_table, 5) expected = "Hash_val = 0: None\n" \ "Hash_val = 1: None\n" \ "Hash_val = 2: None\n" \ "Hash_val = 3: None\n" \ "Hash_val = 4: ('three', 'three')\n" \ "Hash_val = 5: None\n" \ "Hash_val = 6: None\n" \ "Hash_val = 7: None\n" \ "Hash_val = 8: None\n" \ "Hash_val = 9: None\n" \ "Hash_val = 10: None\n" self.assertEqual(expected, repr(second_hash)) second_hash['four'] = 'four' self.assertEqual(second_hash['four'], 'four') second_hash['five'] = 'five' self.assertEqual(0, hash_table.get('from')) self.assertFalse(second_hash.contains('p')) self.assertTrue(second_hash.contains('five')) second_hash.remove('five') self.assertFalse(second_hash.contains('five')) self.assertRaises(KeyError, second_hash.remove, 'p') self.assertEqual(1, third_hash.size()) self.assertEqual(0, third_hash.collisions())
def test_hash_linear(self): table = HashTableLinear() self.assertEqual(table.table_size, 11) table["3"] = "3" table["2"] = "2" table["4"] = "4" table["5"] = "5" self.assertEqual("5" in table, True) self.assertEqual("6" in table, False) self.assertRaises(KeyError, table.get, "6") table["3"] = "6" self.assertEqual(table["3"], "6") table[chr(40)] = "20" self.assertEqual(table["3"], "6") self.assertEqual(table.num_collisions, 1) table.remove("3") table.remove("4") self.assertRaises(KeyError, table.get, "4") self.assertRaises(KeyError, table.remove, "4")