def test_get_top(self):
     '''Test retrieval of top terms'''
     my_tfidf = tf.TFIDFCorpus('text-files')
     result = my_tfidf.get_top_words('a-drinking-song-yeats.txt')
     self.assertEqual(result, [
         'comes', 'glass', 'grow', 'lift', 'sigh', "that's", 'truth', 'we',
         'wine', 'mouth'
     ])
 def test_computeidf(self):
     '''Test generation of IDF dictionary'''
     my_tfidf = tf.TFIDFCorpus('text-files')
     k = my_tfidf.idf.keys()
     self.assertEqual(my_tfidf.idf['light'], 2.0)
     self.assertEqual(my_tfidf.idf['cold'], 4.0)
     self.assertEqual(my_tfidf.idf['time'], 2.0)
     self.assertNotIn('facebook', k)
Example #3
0
 def test_files(self):
     '''Test reading in the list of filenames'''
     my_tfidf = tf.TFIDFCorpus('text-files')
     result = my_tfidf.get_filenames()
     file_list = ['a-drinking-song-yeats.txt',
                  'christmas-carol.txt',
                  'mercutio.txt', 
                  'the-start-teasdale.txt']
     self.assertEqual(result, file_list)
 def test_gettfidf(self):
     '''Test calculation of TFIDF'''
     my_tfidf = tf.TFIDFCorpus('text-files')
     result = my_tfidf.get_tfidf('mercutio.txt', 'her')
     self.assertAlmostEqual(result, 0.008630462173553425)
     result = my_tfidf.get_tfidf('mercutio.txt', 'spider')
     self.assertAlmostEqual(result, 0.006931471805599453)
     result = my_tfidf.get_tfidf('christmas-carol.txt', 'her')
     self.assertAlmostEqual(result, 0.00040748168902518533)
     with self.assertRaises(ValueError):
         my_tfidf.get_tfidf('christmas-carol.txt', 'google')
Example #5
0
    def test_create_docs(self):
        '''Test creation of TFDocument instances'''
        my_tfidf = tf.TFIDFCorpus('text-files')

        result = len(my_tfidf.docs)
        self.assertEqual(result, 4)
Example #6
0
                     'christmas-carol.txt',
                     'mercutio.txt', 
                     'the-start-teasdale.txt']
        self.assertEqual(result, file_list)

    
    def test_create_docs(self):
        '''Test creation of TFDocument instances'''
        my_tfidf = tf.TFIDFCorpus('text-files')

        result = len(my_tfidf.docs)
        self.assertEqual(result, 4)
    
    def test_computeidf(self):
        '''Test generation of IDF dictionary'''
        my_tfidf = tf.TFIDFCorpus('text-files')
        k = my_tfidf.idf.keys()
        self.assertEqual(my_tfidf.idf['light'], 2.0)
        self.assertEqual(my_tfidf.idf['cold'], 4.0)
        self.assertEqual(my_tfidf.idf['time'], 2.0)
        self.assertNotIn('facebook', k)
    
    def test_gettfidf(self):
        '''Test calculation of TFIDF'''
        my_tfidf = tf.TFIDFCorpus('text-files')
        result = my_tfidf.get_tfidf('mercutio.txt', 'her')
        self.assertAlmostEqual(result, 0.008630462173553425)
        result = my_tfidf.get_tfidf('mercutio.txt', 'spider')
        self.assertAlmostEqual(result, 0.006931471805599453)
        result = my_tfidf.get_tfidf('christmas-carol.txt', 'her')
        self.assertAlmostEqual(result, 0.00040748168902518533)
 def test_get_three(self):
     '''Test retrieval of non-default number of terms'''
     my_tfidf = tf.TFIDFCorpus('text-files')
     result = my_tfidf.get_top_words('mercutio.txt', n=3)
     self.assertEqual(result, ['dream', "o'er", 'then'])