def lat_rescore(dag, lmfst, lw=9.5): """ Rescore a lattice using a language model FST. """ fst = lat2fsg.build_lattice_fsg(dag, lmfst.InputSymbols(), 1./lw) phi = lmfst.InputSymbols().Find("φ") if phi != -1: opts = openfst.StdPhiComposeOptions() opts.matcher1 = openfst.StdPhiMatcher(fst, openfst.MATCH_NONE) opts.matcher2 = openfst.StdPhiMatcher(lmfst, openfst.MATCH_INPUT, phi) c = openfst.StdComposeFst(fst, lmfst, opts) else: c = openfst.StdComposeFst(fst, lmfst) o = openfst.StdVectorFst() openfst.ShortestPath(c, o, 1) words = ['<s>'] st = o.Start() score = 0 while st != -1 and o.NumArcs(st): a = o.GetArc(st, 0) if a.olabel != 0: words.append(lmfst.InputSymbols().Find(a.ilabel)) score -= a.weight.Value() st = a.nextstate return words, score
# Turn it into an FSM rfst = fstutils.sent2fst(r) # Get the hypothesis lattice try: l = lattice.Dag(os.path.join(latdir, c + ".lat")) except IOError: try: l = lattice.Dag(os.path.join(latdir, c + ".lat.gz")) except IOError: l = lattice.Dag(htk_file=os.path.join(latdir, c + ".slf")) if opts.prune != None: l.posterior_prune(-opts.prune) # Convert it to an FSM lfst = lat2fsg.build_lattice_fsg(l, rfst.OutputSymbols(), addsyms=True, determinize=False, baseword=lattice.baseword_noclass) openfst.ArcSortInput(lfst) # Apply Levenshtein model to the input errfst = LevenshteinModel(rfst.OutputSymbols()) openfst.ArcSortInput(errfst) # Apply compound word model based on the lattice compfst = CompoundWordModel(errfst.OutputSymbols(), lfst.InputSymbols()) # Precompose and project it to the lattice so compound words # are split in the alignment xlat = openfst.StdVectorFst() openfst.Compose(compfst, lfst, xlat) openfst.ProjectInput(xlat) openfst.ArcSortInput(xlat)
r = filter(lambda x: not is_filler(x), r) # Turn it into an FSM rfst = fstutils.sent2fst(r) # Get the hypothesis lattice try: l = lattice.Dag(os.path.join(latdir, c + ".lat")) except IOError: try: l = lattice.Dag(os.path.join(latdir, c + ".lat.gz")) except IOError: l = lattice.Dag(htk_file=os.path.join(latdir, c + ".slf")) if opts.prune != None: l.posterior_prune(-opts.prune) # Convert it to an FSM lfst = lat2fsg.build_lattice_fsg(l, rfst.OutputSymbols(), addsyms=True, determinize=False, baseword=lattice.baseword_noclass) openfst.ArcSortInput(lfst) # Apply Levenshtein model to the input errfst = LevenshteinModel(rfst.OutputSymbols()) openfst.ArcSortInput(errfst) # Apply compound word model based on the lattice compfst = CompoundWordModel(errfst.OutputSymbols(), lfst.InputSymbols()) # Precompose and project it to the lattice so compound words # are split in the alignment xlat = openfst.StdVectorFst() openfst.Compose(compfst, lfst, xlat) openfst.ProjectInput(xlat) openfst.ArcSortInput(xlat) # Compose everything together