def main(): tool.preprocess('../data/POS/train', '../data/POS/ptrain') tool.preprocess('../data/NPC/train', '../data/NPC/ptrain') e0 = em.emission() t0 = tr.transition() print "without preprocessor" e0.compute('../data/POS/train') t0.compute('../data/POS/train') e0.predict('../data/POS/dev.in','../data/POS/dev.p2.out',p=False) print "POS,MLE:", tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out') viterbi_best(e0,t0,'../data/POS/dev.in','../data/POS/dev.p3.out',p=False) print "POS,DP:", tool.evaluate('../data/POS/dev.p3.out','../data/POS/dev.out')
def main(): tool.preprocess('../data/POS/train', '../data/POS/ptrain') tool.preprocess('../data/NPC/train', '../data/NPC/ptrain') e0 = em.emission() bt0 = bitr.bi_transition() tt0 = tritr.tri_transition() # print "without preprocessor" # e0.compute('../data/POS/train') # t0.compute('../data/POS/train') # e0.predict('../data/POS/dev.in','../data/POS/dev.p2.out',p=False) # print "POS,MLE:", tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out') # print "POS,MLE likelihood:", e0.filelikelihood("../data/POS/dev.p2.out",p=False) # viterbi_best(e0,t0,'../data/POS/dev.in','../data/POS/dev.p3.out',p=False) # print "POS,DP:", tool.evaluate('../data/POS/dev.p3.out','../data/POS/dev.out') # print "POS,DP likelihood:", e0.filelikelihood("../data/POS/dev.p3.out", p=False) # start = time.clock() # viterbi_Nbest(e0, t0, '../data/POS/dev.in', '../data/POS/dev.p4.out', best=1, p=False) # print "runtime:",time.clock()-start # c = 1 # while c<=1: # print c,":POS, DP2:", tool.evaluate('../data/POS/dev.p4.out', '../data/POS/dev.out',col=c) # print c,":POS, DP2 likelihood:", e0.filelikelihood("../data/POS/dev.p4.out",p=False, col=c) # c+=1 print "with preprocessor" e0.compute('../data/POS/ptrain') bt0.compute('../data/POS/ptrain') tt0.compute('../data/POS/ptrain') # e0.predict('../data/POS/test.in','../data/POS/test.p1.out') # era,eno= tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out',col=1,pr=True) # print "error rate:",era # print "POS, MLE, likelihood:",e0.filelikelihood("../data/POS/dev.p2.out") # with new smoothing 0.27637 # viterbi_best(e0,bt0,'../data/POS/dev.in','../data/POS/dev.p2.out') # era,eno = tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out',pr=True) # print "POS, DP:", era # print "POS, DP likelihood:", e0.filelikelihood("../data/POS/dev.p3.out") # start = time.clock() # 0.5 1.5 0: 0.2574 # 1 10 1: 0.2422 # 1 15 1: 0.2422 # 1 20 1: 0.239 # 1 25 1: 0.2369 # 1 30 1: 0.235 # 1 35 1: 0.2334 # viterbi_Nbest(e0, bt0, tt0, '../data/POS/dev.in', '../data/POS/dev.p5.out',lambda0=1.0, lambda1=30.0, lambda2=1.0, best=1) # print "runtime:",time.clock() - start c = 1 while c <= 1: era, eno = tool.evaluate('../data/POS/dev.p5.out', '../data/POS/dev.out', col=c, pr=True) print c, ":POS, DP2:", era # print c,":POS, DP2 likelihood:", e0.filelikelihood("../data/POS/dev.p4.out",col=c) c += 1
def main(): tool.preprocess('../data/POS/train', '../data/POS/ptrain') tool.preprocess('../data/NPC/train', '../data/NPC/ptrain') e0 = em.emission() t0 = tr.transition() print "without preprocessor" e0.compute('../data/POS/train') t0.compute('../data/POS/train') e0.predict('../data/POS/dev.in', '../data/POS/dev.p2.out', p=False) print "POS,MLE:", tool.evaluate('../data/POS/dev.p2.out', '../data/POS/dev.out') viterbi_best(e0, t0, '../data/POS/dev.in', '../data/POS/dev.p3.out', p=False) print "POS,DP:", tool.evaluate('../data/POS/dev.p3.out', '../data/POS/dev.out')
def main(): tool.preprocess('../data/POS/train', '../data/POS/ptrain') tool.preprocess('../data/NPC/train', '../data/NPC/ptrain') e0 = em.emission() bt0 = bitr.bi_transition() tt0 = tritr.tri_transition() # print "without preprocessor" # e0.compute('../data/POS/train') # t0.compute('../data/POS/train') # e0.predict('../data/POS/dev.in','../data/POS/dev.p2.out',p=False) # print "POS,MLE:", tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out') # print "POS,MLE likelihood:", e0.filelikelihood("../data/POS/dev.p2.out",p=False) # viterbi_best(e0,t0,'../data/POS/dev.in','../data/POS/dev.p3.out',p=False) # print "POS,DP:", tool.evaluate('../data/POS/dev.p3.out','../data/POS/dev.out') # print "POS,DP likelihood:", e0.filelikelihood("../data/POS/dev.p3.out", p=False) # start = time.clock() # viterbi_Nbest(e0, t0, '../data/POS/dev.in', '../data/POS/dev.p4.out', best=1, p=False) # print "runtime:",time.clock()-start # c = 1 # while c<=1: # print c,":POS, DP2:", tool.evaluate('../data/POS/dev.p4.out', '../data/POS/dev.out',col=c) # print c,":POS, DP2 likelihood:", e0.filelikelihood("../data/POS/dev.p4.out",p=False, col=c) # c+=1 print "with preprocessor" e0.compute('../data/POS/ptrain') bt0.compute('../data/POS/ptrain') tt0.compute('../data/POS/ptrain') # e0.predict('../data/POS/test.in','../data/POS/test.p1.out') # era,eno= tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out',col=1,pr=True) # print "error rate:",era # print "POS, MLE, likelihood:",e0.filelikelihood("../data/POS/dev.p2.out") # with new smoothing 0.27637 # viterbi_best(e0,bt0,'../data/POS/dev.in','../data/POS/dev.p2.out') # era,eno = tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out',pr=True) # print "POS, DP:", era # print "POS, DP likelihood:", e0.filelikelihood("../data/POS/dev.p3.out") # start = time.clock() # 0.5 1.5 0: 0.2574 # 1 10 1: 0.2422 # 1 15 1: 0.2422 # 1 20 1: 0.239 # 1 25 1: 0.2369 # 1 30 1: 0.235 # 1 35 1: 0.2334 # viterbi_Nbest(e0, bt0, tt0, '../data/POS/dev.in', '../data/POS/dev.p5.out',lambda0=1.0, lambda1=30.0, lambda2=1.0, best=1) # print "runtime:",time.clock() - start c = 1 while c <= 1: era, eno = tool.evaluate('../data/POS/dev.p5.out', '../data/POS/dev.out',col=c,pr=True) print c,":POS, DP2:",era # print c,":POS, DP2 likelihood:", e0.filelikelihood("../data/POS/dev.p4.out",col=c) c += 1
def eaSteadyState(toolbox, population, ngen, halloffame=None): """The is the steady-state evolutionary algorithm """ _logger.info("Start of evolution") # Evaluate the individuals with an invalid fitness invalid_ind = [ind for ind in population if not ind.fitness.valid] fitnesses = toolbox.map(toolbox.evaluate, invalid_ind) for ind, fit in zip(invalid_ind, fitnesses): ind.fitness.values = fit if halloffame is not None: halloffame.update(population) # Begin the generational process for gen in range(ngen): _logger.info("Evolving generation %i", gen) p1, p2 = toolbox.select(population, 2) p1 = toolbox.clone(p1) p2 = toolbox.clone(p2) toolbox.mate(p1, p2) child = random.choice([p1, p2]) toolbox.mutate(child) child.fitness.values = toolbox.evaluate(child) if halloffame is not None: halloffame.update(child) # Select the next generation population population[:] = toolbox.select(population + [child], len(population)) # Gather all the fitnesses in one list and print the stats fits = [ind.fitness.values for ind in population] fits_t = zip(*fits) # Transpose fitnesses for analysis minimums = map(min, fits_t) maximums = map(max, fits_t) length = len(population) sums = map(sum, fits_t) sums2 = [sum(x*x for x in fit) for fit in fits_t] means = [sum_ / length for sum_ in sums] std_devs = [abs(sum2 / length - mean**2)**0.5 for sum2, mean in zip(sums2, means)] _logger.debug("Min %s", ", ".join(map(str, minimums))) _logger.debug("Max %s", ", ".join(map(str, maximums))) _logger.debug("Avg %s", ", ".join(map(str, means))) _logger.debug("Std %s", ", ".join(map(str, std_devs))) _logger.info("End of (successful) evolution") return population