예제 #1
0
 def get_scores(self, terms):
     """creates a list of scores for each file in corpus
     The score = weighted frequency / the total word count in the file.
     Compute this score for each term in a query and sum all the scores.
     Args:
         terms (list) : a list of str
     Returns:
         list : a list of tuples, each containing the filename and its relevancy score
     """
     scores = HashTableLinear()
     for term in terms:
         if self.term_freqs.contains(term):
             hashtable = self.term_freqs[term]
             for file1 in hashtable.slots:
                 if file1 != None and hashtable.contains(file1[0]):
                     if scores.contains(file1[0]):
                         key, val = scores.remove(file1[0])
                         scores.put(file1[0], val + self.get_wf(file1[1]))
                     else:
                         scores.put(file1[0], self.get_wf(file1[1]))
     for file1 in scores.slots:
         if file1 is not None:
             key, val = scores.remove(file1[0])
             val /= self.doc_length.get(file1[0])
             scores.put(file1[0], val)
     return scores
예제 #2
0
 def test_linear4(self):
     ht = HashTableLinear()
     for i in range(22):
         ht.put(chr(i), i)
     self.assertEqual(ht.size(), 22)
     self.assertTrue(ht.load_factor() <= 0.75)
     self.assertEqual(ht[chr(0)], 0)
     self.assertEqual(ht[chr(1)], 1)
     self.assertEqual(ht[chr(19)], 19)
     for i in range(22):
         ht.remove(chr(i))
     self.assertFalse(ht.contains(chr(0)))
     self.assertFalse(ht.contains(chr(1)))
     self.assertFalse(ht.contains(chr(19)))
예제 #3
0
    def test_HashTableLinear(self):
        t = HashTableLinear()

        self.assertEqual(t.size(), 0)
        self.assertFalse(t.contains('us'))
        self.assertRaises(KeyError, t.get, 'us')

        t.put('us', 'us')
        self.assertEqual(t.get('us'), 'us')
        self.assertEqual(t['us'], 'us')
        self.assertTrue(t.contains('us'))
        self.assertFalse(t.contains('say'))
        self.assertEqual(t.size(), 1)
        self.assertEqual(t.collisions(), 0)

        t.put('say', 'say')
        self.assertEqual(t.get('say'), 'say')
        self.assertTrue(t.contains('say'))
        self.assertEqual(t.size(), 2)
        self.assertEqual(t.collisions(), 1)

        t.remove('say')
        self.assertFalse(t.contains('say'))
        self.assertTrue(t.contains('us'))
        t.remove('us')
        self.assertEqual(t.size(), 0)

        # print(hash_string('the', 11)) # 'the' = 5
        t.put('us', 'us')
        t.put('say', 'say')
        # self.assertEqual(t.load_factor(), 0.18181818181818182)
        t.put('the', 'the')
        # t.put(chr(0), chr(0))
        # t.put('0', '0')
        # print('chr 0', chr(0))
        # print('just 0', '0')
        # print(type(chr(0)))
        # print(type('0'))

        # print(hash_string('us', 23)) # 'the' = 5
        # print(hash_string('say', 23)) # 'the' = 5
        # print(hash_string('the', 23)) # 'the' = 5
        # print('from tests', t)

        self.assertTrue(t.contains('us'))
        self.assertTrue(t.contains('the'))
    def test_whole_functionality(self):
        """ Tests the Separate Chain Hash Table Functionality"""

        filename = 'stop_words.txt'
        hash_table = HashTableLinear()

        hash_table = import_stopwords(filename, hash_table)

        self.assertRaises(KeyError, hash_table.get, 'BubbleGum')
        self.assertTrue('to' in hash_table)

        second_hash = HashTableLinear()
        second_hash.put('three', 'three')
        third_hash = HashTableLinear()
        third_hash.put('three', 'three')
        self.assertEqual(second_hash, third_hash)
        self.assertNotEqual(hash_table, second_hash)
        self.assertNotEqual(hash_table, 5)
        expected = "Hash_val = 0: None\n" \
            "Hash_val = 1: None\n" \
            "Hash_val = 2: None\n" \
            "Hash_val = 3: None\n" \
            "Hash_val = 4: ('three', 'three')\n" \
            "Hash_val = 5: None\n" \
            "Hash_val = 6: None\n" \
            "Hash_val = 7: None\n" \
            "Hash_val = 8: None\n" \
            "Hash_val = 9: None\n" \
            "Hash_val = 10: None\n"

        self.assertEqual(expected, repr(second_hash))

        second_hash['four'] = 'four'
        self.assertEqual(second_hash['four'], 'four')
        second_hash['five'] = 'five'
        self.assertEqual(0, hash_table.get('from'))

        self.assertFalse(second_hash.contains('p'))
        self.assertTrue(second_hash.contains('five'))
        second_hash.remove('five')
        self.assertFalse(second_hash.contains('five'))
        self.assertRaises(KeyError, second_hash.remove, 'p')

        self.assertEqual(1, third_hash.size())

        self.assertEqual(0, third_hash.collisions())
예제 #5
0
 def test_hash_linear(self):
     table = HashTableLinear()
     self.assertEqual(table.table_size, 11)
     table["3"] = "3"
     table["2"] = "2"
     table["4"] = "4"
     table["5"] = "5"
     self.assertEqual("5" in table, True)
     self.assertEqual("6" in table, False)
     self.assertRaises(KeyError, table.get, "6")
     table["3"] = "6"
     self.assertEqual(table["3"], "6")
     table[chr(40)] = "20"
     self.assertEqual(table["3"], "6")
     self.assertEqual(table.num_collisions, 1)
     table.remove("3")
     table.remove("4")
     self.assertRaises(KeyError, table.get, "4")
     self.assertRaises(KeyError, table.remove, "4")