Ejemplo n.º 1
0
def main():
    tool.preprocess('../data/POS/train', '../data/POS/ptrain')
    tool.preprocess('../data/NPC/train', '../data/NPC/ptrain')

    e0 = em.emission()
    t0 = tr.transition()
    print "without preprocessor"
    e0.compute('../data/POS/train')
    t0.compute('../data/POS/train')
    e0.predict('../data/POS/dev.in','../data/POS/dev.p2.out',p=False)
    print "POS,MLE:", tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out')
    viterbi_best(e0,t0,'../data/POS/dev.in','../data/POS/dev.p3.out',p=False)
    print "POS,DP:", tool.evaluate('../data/POS/dev.p3.out','../data/POS/dev.out')
Ejemplo n.º 2
0
def main():
    tool.preprocess('../data/POS/train', '../data/POS/ptrain')
    tool.preprocess('../data/NPC/train', '../data/NPC/ptrain')

    e0 = em.emission()
    bt0 = bitr.bi_transition()
    tt0 = tritr.tri_transition()
    # print "without preprocessor"
    # e0.compute('../data/POS/train')
    # t0.compute('../data/POS/train')
    # e0.predict('../data/POS/dev.in','../data/POS/dev.p2.out',p=False)
    # print "POS,MLE:", tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out')
    # print "POS,MLE likelihood:", e0.filelikelihood("../data/POS/dev.p2.out",p=False)
    # viterbi_best(e0,t0,'../data/POS/dev.in','../data/POS/dev.p3.out',p=False)
    # print "POS,DP:", tool.evaluate('../data/POS/dev.p3.out','../data/POS/dev.out')
    # print "POS,DP likelihood:", e0.filelikelihood("../data/POS/dev.p3.out", p=False)
    # start = time.clock()
    # viterbi_Nbest(e0, t0, '../data/POS/dev.in', '../data/POS/dev.p4.out', best=1, p=False)
    # print "runtime:",time.clock()-start
    # c = 1
    # while c<=1:
    # print c,":POS, DP2:", tool.evaluate('../data/POS/dev.p4.out', '../data/POS/dev.out',col=c)
    # print c,":POS, DP2 likelihood:", e0.filelikelihood("../data/POS/dev.p4.out",p=False, col=c)
    # c+=1

    print "with preprocessor"
    e0.compute('../data/POS/ptrain')
    bt0.compute('../data/POS/ptrain')
    tt0.compute('../data/POS/ptrain')
    # e0.predict('../data/POS/test.in','../data/POS/test.p1.out')
    # era,eno= tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out',col=1,pr=True)
    # print "error rate:",era
    # print "POS, MLE, likelihood:",e0.filelikelihood("../data/POS/dev.p2.out")
    # with new smoothing 0.27637
    # viterbi_best(e0,bt0,'../data/POS/dev.in','../data/POS/dev.p2.out')
    # era,eno = tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out',pr=True)
    # print "POS, DP:", era
    # print "POS, DP likelihood:", e0.filelikelihood("../data/POS/dev.p3.out")
    # start = time.clock()
    # 0.5 1.5 0: 0.2574
    # 1 10 1: 0.2422
    # 1 15 1: 0.2422
    # 1 20 1: 0.239
    # 1 25 1: 0.2369
    # 1 30 1: 0.235
    # 1 35 1: 0.2334
    # viterbi_Nbest(e0, bt0, tt0, '../data/POS/dev.in', '../data/POS/dev.p5.out',lambda0=1.0, lambda1=30.0, lambda2=1.0, best=1)
    # print "runtime:",time.clock() - start
    c = 1
    while c <= 1:
        era, eno = tool.evaluate('../data/POS/dev.p5.out',
                                 '../data/POS/dev.out',
                                 col=c,
                                 pr=True)
        print c, ":POS, DP2:", era  # print c,":POS, DP2 likelihood:", e0.filelikelihood("../data/POS/dev.p4.out",col=c)
        c += 1
Ejemplo n.º 3
0
def main():
    tool.preprocess('../data/POS/train', '../data/POS/ptrain')
    tool.preprocess('../data/NPC/train', '../data/NPC/ptrain')

    e0 = em.emission()
    t0 = tr.transition()
    print "without preprocessor"
    e0.compute('../data/POS/train')
    t0.compute('../data/POS/train')
    e0.predict('../data/POS/dev.in', '../data/POS/dev.p2.out', p=False)
    print "POS,MLE:", tool.evaluate('../data/POS/dev.p2.out',
                                    '../data/POS/dev.out')
    viterbi_best(e0,
                 t0,
                 '../data/POS/dev.in',
                 '../data/POS/dev.p3.out',
                 p=False)
    print "POS,DP:", tool.evaluate('../data/POS/dev.p3.out',
                                   '../data/POS/dev.out')
Ejemplo n.º 4
0
def main():
    tool.preprocess('../data/POS/train', '../data/POS/ptrain')
    tool.preprocess('../data/NPC/train', '../data/NPC/ptrain')

    e0 = em.emission()
    bt0 = bitr.bi_transition()
    tt0 = tritr.tri_transition()
    # print "without preprocessor"
    # e0.compute('../data/POS/train')
    # t0.compute('../data/POS/train')
    # e0.predict('../data/POS/dev.in','../data/POS/dev.p2.out',p=False)
    # print "POS,MLE:", tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out')
    # print "POS,MLE likelihood:", e0.filelikelihood("../data/POS/dev.p2.out",p=False)
    # viterbi_best(e0,t0,'../data/POS/dev.in','../data/POS/dev.p3.out',p=False)
    # print "POS,DP:", tool.evaluate('../data/POS/dev.p3.out','../data/POS/dev.out')
    # print "POS,DP likelihood:", e0.filelikelihood("../data/POS/dev.p3.out", p=False)
    # start = time.clock()
    # viterbi_Nbest(e0, t0, '../data/POS/dev.in', '../data/POS/dev.p4.out', best=1, p=False)
    # print "runtime:",time.clock()-start
    # c = 1
    # while c<=1:
        # print c,":POS, DP2:", tool.evaluate('../data/POS/dev.p4.out', '../data/POS/dev.out',col=c)
        # print c,":POS, DP2 likelihood:", e0.filelikelihood("../data/POS/dev.p4.out",p=False, col=c)
        # c+=1

    print "with preprocessor"
    e0.compute('../data/POS/ptrain')
    bt0.compute('../data/POS/ptrain')
    tt0.compute('../data/POS/ptrain')
    # e0.predict('../data/POS/test.in','../data/POS/test.p1.out')
    # era,eno= tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out',col=1,pr=True)
    # print "error rate:",era
    # print "POS, MLE, likelihood:",e0.filelikelihood("../data/POS/dev.p2.out")
    # with new smoothing 0.27637
    # viterbi_best(e0,bt0,'../data/POS/dev.in','../data/POS/dev.p2.out')
    # era,eno = tool.evaluate('../data/POS/dev.p2.out','../data/POS/dev.out',pr=True)
    # print "POS, DP:", era
    # print "POS, DP likelihood:", e0.filelikelihood("../data/POS/dev.p3.out")
    # start = time.clock()
    # 0.5 1.5 0: 0.2574
    # 1 10 1: 0.2422
    # 1 15 1: 0.2422
    # 1 20 1: 0.239
    # 1 25 1: 0.2369
    # 1 30 1: 0.235
    # 1 35 1: 0.2334
    # viterbi_Nbest(e0, bt0, tt0, '../data/POS/dev.in', '../data/POS/dev.p5.out',lambda0=1.0, lambda1=30.0, lambda2=1.0, best=1)
    # print "runtime:",time.clock() - start
    c = 1
    while c <= 1:
        era, eno = tool.evaluate('../data/POS/dev.p5.out', '../data/POS/dev.out',col=c,pr=True)
        print c,":POS, DP2:",era       # print c,":POS, DP2 likelihood:", e0.filelikelihood("../data/POS/dev.p4.out",col=c)
        c += 1
Ejemplo n.º 5
0
def eaSteadyState(toolbox, population, ngen, halloffame=None):
    """The is the steady-state evolutionary algorithm
    """
    _logger.info("Start of evolution")
    
    # Evaluate the individuals with an invalid fitness
    invalid_ind = [ind for ind in population if not ind.fitness.valid]
    fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
    for ind, fit in zip(invalid_ind, fitnesses):
        ind.fitness.values = fit
    
    if halloffame is not None:
        halloffame.update(population)
    
    # Begin the generational process
    for gen in range(ngen):
        _logger.info("Evolving generation %i", gen)
        
        p1, p2 = toolbox.select(population, 2)
        p1 = toolbox.clone(p1)
        p2 = toolbox.clone(p2)
        toolbox.mate(p1, p2)
        child = random.choice([p1, p2])
        toolbox.mutate(child)
        
        child.fitness.values = toolbox.evaluate(child)
        
        if halloffame is not None:
            halloffame.update(child)
        
        # Select the next generation population
        population[:] = toolbox.select(population + [child], len(population))
        
        # Gather all the fitnesses in one list and print the stats
        fits = [ind.fitness.values for ind in population]
        fits_t = zip(*fits)             # Transpose fitnesses for analysis
        
        minimums = map(min, fits_t)
        maximums = map(max, fits_t)
        length = len(population)
        sums = map(sum, fits_t)
        sums2 = [sum(x*x for x in fit) for fit in fits_t]
        means = [sum_ / length for sum_ in sums]
        std_devs = [abs(sum2 / length - mean**2)**0.5 for sum2, mean in zip(sums2, means)]
        
        _logger.debug("Min %s", ", ".join(map(str, minimums)))
        _logger.debug("Max %s", ", ".join(map(str, maximums)))
        _logger.debug("Avg %s", ", ".join(map(str, means)))
        _logger.debug("Std %s", ", ".join(map(str, std_devs)))

    _logger.info("End of (successful) evolution")
    return population