예제 #1
0
def learn_and_decode(trainfile, featlistfile, gazfile, brownfile, num_iter, testfile):
    sentset, labelset, postagset, all_feats, info = framework.get_all(trainfile, gazfile, featlistfile, brownfile)
    sys.stderr.write("\n" + str(len(all_feats)) + " features in all\n")

    sys.stderr.write("\nreading test data \n")
    tsents, tgoldtagseqs, tpostagseqs = framework.read_data(testfile)
    tinfo = framework.get_maps(tsents, tpostagseqs, gazfile, brownfile)
    
    testdata = (tsents, tgoldtagseqs, tpostagseqs, tinfo)
    weights = init_weights(all_feats)
    tot_weights = init_weights(all_feats)
 
    #ADAGRAD
    ad = init_weights(all_feats)

    for ite in range(num_iter):
        sys.stderr.write("Iteration " + str(ite) + "\n---------------------------\ntotal train sentences = "+ str(len(sentset)) + "\n")
        weights_a, weights = run(sentset, labelset, postagset, all_feats, info, weights, testdata, ad) #ADAGRAD
        framework.write_weights(weights, ite)
        add_weights(tot_weights, weights_a)

    for key in tot_weights.iterkeys():
        tot_weights[key] /= num_iter*len(sentset)

    sys.stderr.write("\n\nfinal performance on test\n")
    decode(tsents, tgoldtagseqs, tpostagseqs, tinfo, tot_weights)
예제 #2
0
            if goldtagseqs[i][j] in ('B', 'I'):
                tot_rec_bi += 1
            else:
                tot_rec_o += 1
            if tags[j] in ('B', 'I'):
                tot_prec_bi += 1
            else:
                tot_prec_o += 1
            print sent[j]+"\t"+postags[j]+"\t"+goldtagseqs[i][j]+"\t"+tags[j]
        print
        
        tot += len(tags)
    sys.stderr.write("accuracy     = "    + str(acc/tot) + "\n")
    sys.stderr.write("BI recall    = " + str(tp_bi/tot_rec_bi) + "\n")
    if tot_prec_bi > 0:
        sys.stderr.write("BI precision = " + str(tp_bi/tot_prec_bi) + "\n")
    sys.stderr.write("O recall     = "     + str(tp_o/tot_rec_o) + "\n")
    if tot_prec_o > 0:
        sys.stderr.write("O precision  = "  + str(tp_o/tot_prec_o) + "\n\n")

if __name__ == "__main__":
    testfile = sys.argv[1]
    weightsfile = sys.argv[2]
    gazfile = sys.argv[3]
    brownfile = sys.argv[4]
    sents, goldtagseqs, postagseqs = read_data(testfile)
    info = get_maps(sents, postagseqs, gazfile, brownfile)
    weights = read_weights(weightsfile)
    decode(sents, goldtagseqs, postagseqs, info, weights)