def bernoulli_bayes_test(train_files_number=10, lines_per_file=10): text_classifier = BayesTextClassifier() train_classifier(text_classifier, train_files_number, resource_folder="resources") case_count, correct_case_count = 0, 0 for expected_language, listing in get_test_set(10, lines_per_file): actual_language = text_classifier.test(listing, model='bernoulli') case_count += 1 if expected_language == actual_language: correct_case_count += 1 print 'Bernoulli Bayes Test. Model was trained on %d files per language. %d lines per test file. ' \ '%d out of %d are correctly classifier. Productivity: %f' \ % (train_files_number, lines_per_file, correct_case_count, case_count, float(correct_case_count / case_count))
def multinomial_bayes_test(train_files_number=10, lines_per_file=10, feature_selection_method='mutual', feature_selection_count=-1): text_classifier = BayesTextClassifier() train_classifier(text_classifier, train_files_number, resource_folder="resources") if feature_selection_count != -1: if feature_selection_method == 'mutual': text_classifier.make_feature_selection_mutual_information(feature_selection_count) elif feature_selection_method == 'square': text_classifier.make_feature_selection_chi_square(feature_selection_count) case_count, correct_case_count = 0, 0 for expected_language, listing in get_test_set(10, lines_per_file): actual_language = text_classifier.test(listing) case_count += 1 if expected_language == actual_language: correct_case_count += 1 print 'Multinomial Bayes Test. Model was trained on %d files per language. %d lines per test file. ' \ '%d out of %d are correctly classifier. Productivity: %f' \ % (train_files_number, lines_per_file, correct_case_count, case_count, float(correct_case_count / case_count))