def main(): if len(sys.argv) != 3: print 'Usage: %s training_filename test_filename' % sys.argv[0] return 1 train_filename, test_filename = sys.argv[1:] training_data = hw5_common.read_part_of_speech_file(train_filename) known_words = set(word for pos, word in training_data) print >> sys.stderr, 'Training baseline model' baseline_model = hw5.BaselineModel(training_data) print >> sys.stderr, 'Evaluating baseline model' baseline_unknown_accuracy, baseline_accuracy = compute_score( hw5_common.get_predictions( test_filename, baseline_model.predict_sentence), known_words) print >> sys.stderr, 'Training hmm model' hmm_model = hw5.HiddenMarkovModel.train(training_data) print >> sys.stderr, 'Evaluating hmm model' hmm_unknown_accuracy, hmm_accuracy = compute_score( hw5_common.get_predictions( test_filename, hmm_model.predict_sentence), known_words) print '%s Baseline accuracy' % baseline_accuracy print '%s Baseline accuracy on unknown words' % baseline_unknown_accuracy print '%s HMM accuracy' % hmm_accuracy print '%s HMM accuracy on unknown words' % hmm_unknown_accuracy print 'Score for Part III: %d/50' % ( math.ceil(max(baseline_accuracy.value(), hmm_accuracy.value()) * 50)) print 'Score for Part IV-unknown words: %d/20' % ( max(0, math.ceil((hmm_unknown_accuracy.value() - 0.6) * 50)))
def main(): parser = optparse.OptionParser() parser.add_option('-s', '--smoothing', choices=(NO_SMOOTHING, ADD_ONE_SMOOTHING), default=NO_SMOOTHING) parser.add_option('-o', '--order', default=1, type=int) parser.add_option('-u', '--unknown', choices=(PREDICT_ZERO, PREDICT_MOST_COMMON_PART_OF_SPEECH,), default=PREDICT_ZERO) options, args = parser.parse_args() train_filename, test_filename = args training_data = hw5_common.read_part_of_speech_file(train_filename) if options.order == 0: model = BaselineModel(training_data) else: model = HiddenMarkovModel.train( training_data, options.smoothing, options.unknown, options.order) predictions = hw5_common.get_predictions( test_filename, model.predict_sentence) for word, prediction, true_pos in predictions: print word, prediction, true_pos