Esempio n. 1
0
def lat_rescore(dag, lmfst, lw=9.5):
    """
    Rescore a lattice using a language model FST.
    """
    fst = lat2fsg.build_lattice_fsg(dag, lmfst.InputSymbols(), 1./lw)
    phi = lmfst.InputSymbols().Find("φ")
    if phi != -1:
        opts = openfst.StdPhiComposeOptions()
        opts.matcher1 = openfst.StdPhiMatcher(fst, openfst.MATCH_NONE)
        opts.matcher2 = openfst.StdPhiMatcher(lmfst, openfst.MATCH_INPUT, phi)
        c = openfst.StdComposeFst(fst, lmfst, opts)
    else:
        c = openfst.StdComposeFst(fst, lmfst)
    o = openfst.StdVectorFst()
    openfst.ShortestPath(c, o, 1)
    words = ['<s>']
    st = o.Start()
    score = 0
    while st != -1 and o.NumArcs(st):
        a = o.GetArc(st, 0)
        if a.olabel != 0:
            words.append(lmfst.InputSymbols().Find(a.ilabel))
        score -= a.weight.Value()
        st = a.nextstate
    return words, score
Esempio n. 2
0
 # Turn it into an FSM
 rfst = fstutils.sent2fst(r)
 # Get the hypothesis lattice
 try:
     l = lattice.Dag(os.path.join(latdir, c + ".lat"))
 except IOError:
     try:
         l = lattice.Dag(os.path.join(latdir, c + ".lat.gz"))
     except IOError:
         l = lattice.Dag(htk_file=os.path.join(latdir, c + ".slf"))
 if opts.prune != None:
     l.posterior_prune(-opts.prune)
 # Convert it to an FSM
 lfst = lat2fsg.build_lattice_fsg(l,
                                  rfst.OutputSymbols(),
                                  addsyms=True,
                                  determinize=False,
                                  baseword=lattice.baseword_noclass)
 openfst.ArcSortInput(lfst)
 # Apply Levenshtein model to the input
 errfst = LevenshteinModel(rfst.OutputSymbols())
 openfst.ArcSortInput(errfst)
 # Apply compound word model based on the lattice
 compfst = CompoundWordModel(errfst.OutputSymbols(),
                             lfst.InputSymbols())
 # Precompose and project it to the lattice so compound words
 # are split in the alignment
 xlat = openfst.StdVectorFst()
 openfst.Compose(compfst, lfst, xlat)
 openfst.ProjectInput(xlat)
 openfst.ArcSortInput(xlat)
Esempio n. 3
0
 r = filter(lambda x: not is_filler(x), r)
 # Turn it into an FSM
 rfst = fstutils.sent2fst(r)
 # Get the hypothesis lattice
 try:
     l = lattice.Dag(os.path.join(latdir, c + ".lat"))
 except IOError:
     try:
         l = lattice.Dag(os.path.join(latdir, c + ".lat.gz"))
     except IOError:
         l = lattice.Dag(htk_file=os.path.join(latdir, c + ".slf"))
 if opts.prune != None:
     l.posterior_prune(-opts.prune)
 # Convert it to an FSM
 lfst = lat2fsg.build_lattice_fsg(l, rfst.OutputSymbols(),
                                  addsyms=True, determinize=False,
                                  baseword=lattice.baseword_noclass)
 openfst.ArcSortInput(lfst)
 # Apply Levenshtein model to the input
 errfst = LevenshteinModel(rfst.OutputSymbols())
 openfst.ArcSortInput(errfst)
 # Apply compound word model based on the lattice
 compfst = CompoundWordModel(errfst.OutputSymbols(),
                             lfst.InputSymbols())
 # Precompose and project it to the lattice so compound words
 # are split in the alignment
 xlat = openfst.StdVectorFst()
 openfst.Compose(compfst, lfst, xlat)
 openfst.ProjectInput(xlat)
 openfst.ArcSortInput(xlat)
 # Compose everything together