def learn_and_decode(trainfile, featlistfile, gazfile, brownfile, num_iter, testfile): sentset, labelset, postagset, all_feats, info = framework.get_all(trainfile, gazfile, featlistfile, brownfile) sys.stderr.write("\n" + str(len(all_feats)) + " features in all\n") sys.stderr.write("\nreading test data \n") tsents, tgoldtagseqs, tpostagseqs = framework.read_data(testfile) tinfo = framework.get_maps(tsents, tpostagseqs, gazfile, brownfile) testdata = (tsents, tgoldtagseqs, tpostagseqs, tinfo) weights = init_weights(all_feats) tot_weights = init_weights(all_feats) #ADAGRAD ad = init_weights(all_feats) for ite in range(num_iter): sys.stderr.write("Iteration " + str(ite) + "\n---------------------------\ntotal train sentences = "+ str(len(sentset)) + "\n") weights_a, weights = run(sentset, labelset, postagset, all_feats, info, weights, testdata, ad) #ADAGRAD framework.write_weights(weights, ite) add_weights(tot_weights, weights_a) for key in tot_weights.iterkeys(): tot_weights[key] /= num_iter*len(sentset) sys.stderr.write("\n\nfinal performance on test\n") decode(tsents, tgoldtagseqs, tpostagseqs, tinfo, tot_weights)
if goldtagseqs[i][j] in ('B', 'I'): tot_rec_bi += 1 else: tot_rec_o += 1 if tags[j] in ('B', 'I'): tot_prec_bi += 1 else: tot_prec_o += 1 print sent[j]+"\t"+postags[j]+"\t"+goldtagseqs[i][j]+"\t"+tags[j] print tot += len(tags) sys.stderr.write("accuracy = " + str(acc/tot) + "\n") sys.stderr.write("BI recall = " + str(tp_bi/tot_rec_bi) + "\n") if tot_prec_bi > 0: sys.stderr.write("BI precision = " + str(tp_bi/tot_prec_bi) + "\n") sys.stderr.write("O recall = " + str(tp_o/tot_rec_o) + "\n") if tot_prec_o > 0: sys.stderr.write("O precision = " + str(tp_o/tot_prec_o) + "\n\n") if __name__ == "__main__": testfile = sys.argv[1] weightsfile = sys.argv[2] gazfile = sys.argv[3] brownfile = sys.argv[4] sents, goldtagseqs, postagseqs = read_data(testfile) info = get_maps(sents, postagseqs, gazfile, brownfile) weights = read_weights(weightsfile) decode(sents, goldtagseqs, postagseqs, info, weights)