def main():
  parser = optparse.OptionParser()
  parser.add_option('-s', '--smoothing', choices=(NO_SMOOTHING,
    ADD_ONE_SMOOTHING), default=NO_SMOOTHING)
  parser.add_option('-o', '--order', default=1, type=int)
  parser.add_option('-u', '--unknown',
      choices=(PREDICT_ZERO, PREDICT_MOST_COMMON_PART_OF_SPEECH,),
      default=PREDICT_ZERO)
  options, args = parser.parse_args()
  train_filename, test_filename = args
  training_data = nlp_common.read_part_of_speech_file(train_filename)
  if options.order == 0:
    model = BaselineModel(training_data)
  else:
    model = HiddenMarkovModel.train(
        training_data, options.smoothing, options.unknown, options.order)
  predictions = nlp_common.get_predictions(
      test_filename, model.predict_sentence)
  for word, prediction, true_pos in predictions:
    print word, prediction, true_pos
def main():
  if len(sys.argv) != 3:
    print 'Usage: %s training_filename test_filename' % sys.argv[0]
    return 1
  train_filename, test_filename = sys.argv[1:]
  training_data = nlp_common.read_part_of_speech_file(train_filename)
  known_words = set(word for pos, word in training_data)
  main_start_time = time.time()
  print >> sys.stderr, 'Training baseline model'
  start_time = time.time()
  baseline_model = nlp.BaselineModel(training_data)
  print "Baseline trained in " + str(time.time() - start_time) + " seconds." 
  print >> sys.stderr, 'Evaluating baseline model'
  baseline_unknown_accuracy, baseline_accuracy = compute_score(
      nlp_common.get_predictions(
        test_filename, baseline_model.predict_sentence), known_words)
  start_time = time.time()
  print >> sys.stderr, 'Training hmm model'
  hmm_model = nlp.HiddenMarkovModel.train(training_data)
  print "HMM trained in " + str(time.time() - start_time) + " seconds." 
  print >> sys.stderr, 'Evaluating hmm model'
  hmm_unknown_accuracy, hmm_accuracy = compute_score(
      nlp_common.get_predictions(
        test_filename, hmm_model.predict_sentence), known_words)

  print '%s Baseline accuracy' % baseline_accuracy
  print '%s Baseline accuracy on unknown words' % baseline_unknown_accuracy
  print '%s HMM accuracy' % hmm_accuracy
  print '%s HMM accuracy on unknown words' % hmm_unknown_accuracy

  print 'Score for Part III: %d/50' % (
      math.ceil(max(baseline_accuracy.value(), hmm_accuracy.value()) * 50))

  print 'Score for Part IV-unknown words: %d/20' % (
      max(0, math.ceil((hmm_unknown_accuracy.value() - 0.6) * 50)))
  print 'Finished in ' + str(time.time() - main_start_time)