def main(logger): print "Initializing Data Parser..." data_parser = OrwellDataParser(logger) print "Initializing Linear Sequence Model..." ls_obj = LinearSequence(logger, data_parser, use_avg=True, use_suffix=True) print "Initializing Viterbi..." viterbi_obj = Viterbi(logger, ls_obj) print "Initializing Accuracy Estimator..." accuracy_estimator = AccuracyEstimator(logger, data_parser) for language, language_file in DATA_FILES: print "******************************************************************************************" print language print "******************************************************************************************" print "Training Linear Sequence Linear Sequence Model with %s data..." % language viterbi_obj.train(language_file, START_LINE - 1) #import pdb;pdb.set_trace() print viterbi_obj.predict_sequence(["his", "breast", "rose", "and", "fell", "a", "little", "faster", "."]) print "Estimating accuracy of the model..." total_accuracy, unseen_accuracy = accuracy_estimator.compute_parameters(viterbi_obj, language_file, START_LINE) print "TOTAL ACCURACY : %.10f" % total_accuracy print "UNSEEN_ACCURACY : %.10f" % unseen_accuracy print "Resetting model and estimator parameters..." viterbi_obj.reset() accuracy_estimator.reset()
def main(logger): print "Initializing Data Parser..." data_parser = OrwellDataParser(logger) print "Initializing HMM..." hmm_obj = HMM(logger, data_parser, skew_unseen=True) print "Initializing Viterbi..." viterbi_obj = Viterbi(logger, hmm_obj) print "Initializing Accuracy Estimator..." accuracy_estimator = AccuracyEstimator(logger, data_parser) for language, language_file in DATA_FILES: print "******************************************************************************************" print language print "******************************************************************************************" print "Training HMM with %s data..." % language viterbi_obj.train(language_file, START_LINE - 1) print "Estimating accuracy of the model..." total_accuracy, unseen_accuracy = accuracy_estimator.compute_parameters(viterbi_obj, language_file, START_LINE) print "TOTAL ACCURACY : %.10f" % total_accuracy print "UNSEEN_ACCURACY : %.10f" % unseen_accuracy print "Resetting model and estimator parameters..." viterbi_obj.reset() accuracy_estimator.reset()
def main(logger): out_ptr = open(OUTPUT, "w") print "Initializing Data Parser..." data_parser = OrwellDataParser(logger) print "Initializing Linear Sequence Model..." ls_obj = LinearSequence(logger, data_parser) print "Initializing Viterbi..." viterbi_obj = Viterbi(logger, ls_obj) print "Initializing Accuracy Estimator..." accuracy_estimator = AccuracyEstimator(logger, data_parser) for language, language_file in TRAINING_FILES: language_file = "Data/%s" % language_file print "******************************************************************************************" print language print "******************************************************************************************" print "Training Linear Sequence Linear Sequence Model with %s data..." % language viterbi_obj.train(language_file, START_LINE - 1) #import pdb;pdb.set_trace() #print viterbi_obj.predict_sequence(["his", "breast", "rose", "and", "fell", "a", "little", "faster", "."]) print "Estimating accuracy of the model..." total_accuracy, unseen_accuracy = accuracy_estimator.compute_parameters(viterbi_obj, language_file, "inaccurate_words_%s.txt"%language, language, START_LINE) print "TOTAL ACCURACY : %.10f" % total_accuracy print "UNSEEN_ACCURACY : %.10f" % unseen_accuracy o_line = "Language : %s\nTotal Accuracy : %.10f\nUnseen Accuracy : %.10f\n\n\n" % (language, total_accuracy, unseen_accuracy) out_ptr.write(o_line) print "Resetting model and estimator parameters..." viterbi_obj.reset() accuracy_estimator.reset() out_ptr.close()