def test_create_cache_files_after_first_set(self): self.assertTrue( os.path.isfile( os.path.join(bayes.BASE_DIR, 'all_data/Chinese/cache/vector.cache'))) folder = os.path.join(bayes.BASE_DIR, 'all_data/Chinese/cache') for the_file in os.listdir(folder): file_path = os.path.join(folder, the_file) try: if os.path.isfile(file_path): os.unlink(file_path) except Exception as e: pass test_bayes = bayes.Classify(cache=False) self.assertTrue( os.path.isfile( os.path.join(bayes.BASE_DIR, 'all_data/Chinese/cache/vector.cache'))) self.assertTrue( os.path.isfile( os.path.join(bayes.BASE_DIR, 'all_data/Chinese/cache/vocab.cache'))) self.assertTrue( os.path.isfile( os.path.join(bayes.BASE_DIR, 'all_data/Chinese/cache/classify.cache')))
def test_cache_work(self): test_bayes = bayes.Classify(cache=False) percentage_list, word_list = test_bayes.bayes_classify( '美联储当天结束货币政策例会后发表声明说,自2017年12月以来,' + '美国就业市场和经济活动继续保持稳健增长,失业率继续维持在低水平。') test_result = sorted(percentage_list, key=lambda x: x[1], reverse=True)[0][0] self.assertEqual(test_result, 'normal.dat')
def test_have_default_cache_files(self): bayes.Classify(cache=False) self.assertTrue( os.path.isfile( os.path.join(bayes.BASE_DIR, 'all_data/Chinese/cache/vector.cache'))) self.assertTrue( os.path.isfile( os.path.join(bayes.BASE_DIR, 'all_data/Chinese/cache/classify.cache'))) self.assertTrue( os.path.isfile( os.path.join(bayes.BASE_DIR, 'all_data/Chinese/cache/vocab.cache')))
def test_error_rate(self): ''' test error rate ''' def error_rate(instance): classify_results = [] for i in range(len(instance.test_data)): percentage_list, word_list = (instance.bayes_classify( instance.test_data[i])) test_result = sorted(percentage_list, key=lambda x: x[1], reverse=True)[0][0] classify_results.append(test_result) # Uncomment to see which sentence was classified wrong. # if test_result != instance.test_classify[i]: # print('-'*20) # print(instance.test_data[i]) # print('test_result is %s' % test_result) # print('true is %s' % instance.test_classify[i]) # print('percentage_list is %s' % percentage_list) # print('-'*20) wrong_results = [ i for i, j in zip(instance.test_classify, classify_results) if i != j ] return len(wrong_results) / len(instance.test_data) a = [] test_times = 20 print('\nThis may takes some time') for i in range(test_times): if i % 5 == 0: print('Completed %s tasks, %s tasks left.' % (i, test_times - i)) test_bayes = bayes.Classify(cache=False) a.append(error_rate(test_bayes)) print('The error rate is %s' % "{0:.2f}".format(sum(a) / test_times * 100) + '%')
def test_data_num_correct(self): test_bayes = bayes.Classify(test_num=80, cache=False) self.assertTrue(len(test_bayes.test_data), 80)