def main(): tool.preprocess('../data/POS/train', '../data/POS/ptrain') tool.preprocess('../data/NPC/train', '../data/NPC/ptrain') e0 = em.emission() bt0 = bitr.bi_transition() tt0 = tritr.tri_transition() # print "without preprocessor" # e0.compute('../data/POS/train') # t0.compute('../data/POS/train') # e0.predict('../data/POS/dev.in','../data/POS/dev.p2.out',p=False) # print "POS,MLE:", tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out') # print "POS,MLE likelihood:", e0.filelikelihood("../data/POS/dev.p2.out",p=False) # viterbi_best(e0,t0,'../data/POS/dev.in','../data/POS/dev.p3.out',p=False) # print "POS,DP:", tool.evaluate('../data/POS/dev.p3.out','../data/POS/dev.out') # print "POS,DP likelihood:", e0.filelikelihood("../data/POS/dev.p3.out", p=False) # start = time.clock() # viterbi_Nbest(e0, t0, '../data/POS/dev.in', '../data/POS/dev.p4.out', best=1, p=False) # print "runtime:",time.clock()-start # c = 1 # while c<=1: # print c,":POS, DP2:", tool.evaluate('../data/POS/dev.p4.out', '../data/POS/dev.out',col=c) # print c,":POS, DP2 likelihood:", e0.filelikelihood("../data/POS/dev.p4.out",p=False, col=c) # c+=1 print "with preprocessor" e0.compute('../data/POS/ptrain') bt0.compute('../data/POS/ptrain') tt0.compute('../data/POS/ptrain') # e0.predict('../data/POS/test.in','../data/POS/test.p1.out') # era,eno= tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out',col=1,pr=True) # print "error rate:",era # print "POS, MLE, likelihood:",e0.filelikelihood("../data/POS/dev.p2.out") # with new smoothing 0.27637 # viterbi_best(e0,bt0,'../data/POS/dev.in','../data/POS/dev.p2.out') # era,eno = tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out',pr=True) # print "POS, DP:", era # print "POS, DP likelihood:", e0.filelikelihood("../data/POS/dev.p3.out") # start = time.clock() # 0.5 1.5 0: 0.2574 # 1 10 1: 0.2422 # 1 15 1: 0.2422 # 1 20 1: 0.239 # 1 25 1: 0.2369 # 1 30 1: 0.235 # 1 35 1: 0.2334 # viterbi_Nbest(e0, bt0, tt0, '../data/POS/dev.in', '../data/POS/dev.p5.out',lambda0=1.0, lambda1=30.0, lambda2=1.0, best=1) # print "runtime:",time.clock() - start c = 1 while c <= 1: era, eno = tool.evaluate('../data/POS/dev.p5.out', '../data/POS/dev.out', col=c, pr=True) print c, ":POS, DP2:", era # print c,":POS, DP2 likelihood:", e0.filelikelihood("../data/POS/dev.p4.out",col=c) c += 1
def main(): tool.preprocess('../data/POS/train', '../data/POS/ptrain') tool.preprocess('../data/NPC/train', '../data/NPC/ptrain') e0 = em.emission() bt0 = bitr.bi_transition() tt0 = tritr.tri_transition() # print "without preprocessor" # e0.compute('../data/POS/train') # t0.compute('../data/POS/train') # e0.predict('../data/POS/dev.in','../data/POS/dev.p2.out',p=False) # print "POS,MLE:", tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out') # print "POS,MLE likelihood:", e0.filelikelihood("../data/POS/dev.p2.out",p=False) # viterbi_best(e0,t0,'../data/POS/dev.in','../data/POS/dev.p3.out',p=False) # print "POS,DP:", tool.evaluate('../data/POS/dev.p3.out','../data/POS/dev.out') # print "POS,DP likelihood:", e0.filelikelihood("../data/POS/dev.p3.out", p=False) # start = time.clock() # viterbi_Nbest(e0, t0, '../data/POS/dev.in', '../data/POS/dev.p4.out', best=1, p=False) # print "runtime:",time.clock()-start # c = 1 # while c<=1: # print c,":POS, DP2:", tool.evaluate('../data/POS/dev.p4.out', '../data/POS/dev.out',col=c) # print c,":POS, DP2 likelihood:", e0.filelikelihood("../data/POS/dev.p4.out",p=False, col=c) # c+=1 print "with preprocessor" e0.compute('../data/POS/ptrain') bt0.compute('../data/POS/ptrain') tt0.compute('../data/POS/ptrain') # e0.predict('../data/POS/test.in','../data/POS/test.p1.out') # era,eno= tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out',col=1,pr=True) # print "error rate:",era # print "POS, MLE, likelihood:",e0.filelikelihood("../data/POS/dev.p2.out") # with new smoothing 0.27637 # viterbi_best(e0,bt0,'../data/POS/dev.in','../data/POS/dev.p2.out') # era,eno = tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out',pr=True) # print "POS, DP:", era # print "POS, DP likelihood:", e0.filelikelihood("../data/POS/dev.p3.out") # start = time.clock() # 0.5 1.5 0: 0.2574 # 1 10 1: 0.2422 # 1 15 1: 0.2422 # 1 20 1: 0.239 # 1 25 1: 0.2369 # 1 30 1: 0.235 # 1 35 1: 0.2334 # viterbi_Nbest(e0, bt0, tt0, '../data/POS/dev.in', '../data/POS/dev.p5.out',lambda0=1.0, lambda1=30.0, lambda2=1.0, best=1) # print "runtime:",time.clock() - start c = 1 while c <= 1: era, eno = tool.evaluate('../data/POS/dev.p5.out', '../data/POS/dev.out',col=c,pr=True) print c,":POS, DP2:",era # print c,":POS, DP2 likelihood:", e0.filelikelihood("../data/POS/dev.p4.out",col=c) c += 1
def main(): tool.preprocess('../data/POS/train', '../data/POS/ptrain') tool.preprocess('../data/NPC/train', '../data/NPC/ptrain') e0 = em.emission() t0 = tr.transition() print "without preprocessor" e0.compute('../data/POS/train') t0.compute('../data/POS/train') e0.predict('../data/POS/dev.in','../data/POS/dev.p2.out',p=False) print "POS,MLE:", tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out') viterbi_best(e0,t0,'../data/POS/dev.in','../data/POS/dev.p3.out',p=False) print "POS,DP:", tool.evaluate('../data/POS/dev.p3.out','../data/POS/dev.out')
def main(): tool.preprocess('../data/POS/train', '../data/POS/ptrain') tool.preprocess('../data/NPC/train', '../data/NPC/ptrain') e0 = em.emission() t0 = tr.transition() print "without preprocessor" e0.compute('../data/POS/train') t0.compute('../data/POS/train') e0.predict('../data/POS/dev.in', '../data/POS/dev.p2.out', p=False) print "POS,MLE:", tool.evaluate('../data/POS/dev.p2.out', '../data/POS/dev.out') viterbi_best(e0, t0, '../data/POS/dev.in', '../data/POS/dev.p3.out', p=False) print "POS,DP:", tool.evaluate('../data/POS/dev.p3.out', '../data/POS/dev.out')