Example #1
0
def main(logger):
    
    print "Initializing Data Parser..."
    data_parser         = OrwellDataParser(logger)

    print "Initializing Linear Sequence Model..."
    ls_obj              = LinearSequence(logger, data_parser, use_avg=True, use_suffix=True)

    print "Initializing Viterbi..."
    viterbi_obj         = Viterbi(logger, ls_obj)

    print "Initializing Accuracy Estimator..."
    accuracy_estimator  = AccuracyEstimator(logger, data_parser)

    for language, language_file in DATA_FILES:
        print "******************************************************************************************"
        print language
        print "******************************************************************************************"

        print "Training Linear Sequence Linear Sequence Model with %s data..." % language
        viterbi_obj.train(language_file, START_LINE - 1)

        #import pdb;pdb.set_trace()
        print viterbi_obj.predict_sequence(["his", "breast", "rose", "and", "fell", "a", "little", "faster", "."])
    
        print "Estimating accuracy of the model..."
        total_accuracy, unseen_accuracy = accuracy_estimator.compute_parameters(viterbi_obj, language_file, START_LINE)
        
        print "TOTAL ACCURACY           : %.10f" % total_accuracy
        print "UNSEEN_ACCURACY          : %.10f" % unseen_accuracy
        print "Resetting model and estimator parameters..."
        
        viterbi_obj.reset()
        accuracy_estimator.reset()
Example #2
0
def main(logger):

    print "Initializing Data Parser..."
    data_parser         = OrwellDataParser(logger)

    print "Initializing HMM..."
    hmm_obj             = HMM(logger, data_parser, skew_unseen=True)

    print "Initializing Viterbi..."
    viterbi_obj         = Viterbi(logger, hmm_obj)

    print "Initializing Accuracy Estimator..."
    accuracy_estimator  = AccuracyEstimator(logger, data_parser)

    for language, language_file in DATA_FILES:
        print "******************************************************************************************"
        print language
        print "******************************************************************************************"

        print "Training HMM with %s data..." % language
        viterbi_obj.train(language_file, START_LINE - 1)

        print "Estimating accuracy of the model..."
        total_accuracy, unseen_accuracy = accuracy_estimator.compute_parameters(viterbi_obj, language_file, START_LINE)

        print "TOTAL ACCURACY           : %.10f" % total_accuracy
        print "UNSEEN_ACCURACY          : %.10f" % unseen_accuracy
        print "Resetting model and estimator parameters..."

        viterbi_obj.reset()
        accuracy_estimator.reset()
def main(logger):

    out_ptr             = open(OUTPUT, "w")
    print "Initializing Data Parser..."
    data_parser         = OrwellDataParser(logger)

    print "Initializing Linear Sequence Model..."
    ls_obj              = LinearSequence(logger, data_parser)

    print "Initializing Viterbi..."
    viterbi_obj         = Viterbi(logger, ls_obj)

    print "Initializing Accuracy Estimator..."
    accuracy_estimator  = AccuracyEstimator(logger, data_parser)

    for language, language_file in TRAINING_FILES:
        language_file   = "Data/%s" % language_file
        print "******************************************************************************************"
        print language
        print "******************************************************************************************"

        print "Training Linear Sequence Linear Sequence Model with %s data..." % language
        viterbi_obj.train(language_file, START_LINE - 1)

        #import pdb;pdb.set_trace()
        #print viterbi_obj.predict_sequence(["his", "breast", "rose", "and", "fell", "a", "little", "faster", "."])

        print "Estimating accuracy of the model..."
        total_accuracy, unseen_accuracy = accuracy_estimator.compute_parameters(viterbi_obj, language_file, "inaccurate_words_%s.txt"%language, language, START_LINE)

        print "TOTAL ACCURACY           : %.10f" % total_accuracy
        print "UNSEEN_ACCURACY          : %.10f" % unseen_accuracy

        o_line = "Language : %s\nTotal Accuracy : %.10f\nUnseen Accuracy : %.10f\n\n\n" % (language, total_accuracy, unseen_accuracy)
        out_ptr.write(o_line)
        print "Resetting model and estimator parameters..."

        viterbi_obj.reset()
        accuracy_estimator.reset()

    out_ptr.close()