예제 #1
0
 def test_create_cache_files_after_first_set(self):
     self.assertTrue(
         os.path.isfile(
             os.path.join(bayes.BASE_DIR,
                          'all_data/Chinese/cache/vector.cache')))
     folder = os.path.join(bayes.BASE_DIR, 'all_data/Chinese/cache')
     for the_file in os.listdir(folder):
         file_path = os.path.join(folder, the_file)
         try:
             if os.path.isfile(file_path):
                 os.unlink(file_path)
         except Exception as e:
             pass
     test_bayes = bayes.Classify(cache=False)
     self.assertTrue(
         os.path.isfile(
             os.path.join(bayes.BASE_DIR,
                          'all_data/Chinese/cache/vector.cache')))
     self.assertTrue(
         os.path.isfile(
             os.path.join(bayes.BASE_DIR,
                          'all_data/Chinese/cache/vocab.cache')))
     self.assertTrue(
         os.path.isfile(
             os.path.join(bayes.BASE_DIR,
                          'all_data/Chinese/cache/classify.cache')))
예제 #2
0
 def test_cache_work(self):
     test_bayes = bayes.Classify(cache=False)
     percentage_list, word_list = test_bayes.bayes_classify(
         '美联储当天结束货币政策例会后发表声明说,自2017年12月以来,' +
         '美国就业市场和经济活动继续保持稳健增长,失业率继续维持在低水平。')
     test_result = sorted(percentage_list, key=lambda x: x[1],
                          reverse=True)[0][0]
     self.assertEqual(test_result, 'normal.dat')
예제 #3
0
 def test_have_default_cache_files(self):
     bayes.Classify(cache=False)
     self.assertTrue(
         os.path.isfile(
             os.path.join(bayes.BASE_DIR,
                          'all_data/Chinese/cache/vector.cache')))
     self.assertTrue(
         os.path.isfile(
             os.path.join(bayes.BASE_DIR,
                          'all_data/Chinese/cache/classify.cache')))
     self.assertTrue(
         os.path.isfile(
             os.path.join(bayes.BASE_DIR,
                          'all_data/Chinese/cache/vocab.cache')))
예제 #4
0
    def test_error_rate(self):
        '''
        test error rate
        '''
        def error_rate(instance):
            classify_results = []
            for i in range(len(instance.test_data)):
                percentage_list, word_list = (instance.bayes_classify(
                    instance.test_data[i]))
                test_result = sorted(percentage_list,
                                     key=lambda x: x[1],
                                     reverse=True)[0][0]
                classify_results.append(test_result)
                # Uncomment to see which sentence was classified wrong.
                # if test_result != instance.test_classify[i]:
                #     print('-'*20)
                #     print(instance.test_data[i])
                #     print('test_result is %s' % test_result)
                #     print('true is %s' % instance.test_classify[i])
                #     print('percentage_list is %s' % percentage_list)
                #     print('-'*20)
            wrong_results = [
                i for i, j in zip(instance.test_classify, classify_results)
                if i != j
            ]
            return len(wrong_results) / len(instance.test_data)

        a = []
        test_times = 20
        print('\nThis may takes some time')
        for i in range(test_times):
            if i % 5 == 0:
                print('Completed %s tasks, %s tasks left.' %
                      (i, test_times - i))
            test_bayes = bayes.Classify(cache=False)
            a.append(error_rate(test_bayes))
        print('The error rate is %s' %
              "{0:.2f}".format(sum(a) / test_times * 100) + '%')
예제 #5
0
 def test_data_num_correct(self):
     test_bayes = bayes.Classify(test_num=80, cache=False)
     self.assertTrue(len(test_bayes.test_data), 80)