def predict(self, test_file, output_file): total = 0.0 wfp = open(output_file, "w") fp = open(test_file) positive = 0 negative = 0 true_positive = 0 false_positive = 0 true_negative = 0 false_negative = 0 sys.stderr.write("start to predict ...\n") me = model_evaluate.ModelEvaluate() while True: line = fp.readline() if len(line) <= 0: break arr = line.strip('\r\n').split('\t') label = int(arr[0]) test_sample = Sample() test_sample.load_sample(arr[1:], label, self.__word_dict) p = self.__classifier(test_sample) wfp.write("%d\t%f\n" % (label, p)) total += 1 if p > 0.5: p = 1 else: p = -1 me.add(int(label), p) me.report() fp.close() wfp.close()
parser.add_argument("-t", "--test_file", help="test file") parser.add_argument("-o", "--output_file", help="output file") args = parser.parse_args() train_file = args.train_file test_file = args.test_file dict_file = args.dict_file output_file = args.output_file if not train_file or not os.path.exists(train_file): parser.print_help() sys.exit(1) if not test_file or not os.path.exists(test_file): parser.print_help() sys.exit(1) if not dict_file or not os.path.exists(dict_file): parser.print_help() sys.exit(1) me = model_evaluate.ModelEvaluate() naive_bayes = NaiveBayes() naive_bayes.train_model(train_file, dict_file) fp = open(test_file) while True: line = fp.readline() if len(line) <= 0: break label, words = naive_bayes.get_articel_words(line) l, p = naive_bayes.predict(words) me.add(int(label), int(l)) me.report()