def testComposePhi(self): a = openfst.StdVectorFst() a.AddState() a.AddState() a.AddArc(0, 2, 2, 0, 1) a.AddArc(0, 3, 3, 0, 1) a.AddArc(0, 4, 4, 0, 1) a.SetStart(0) a.SetFinal(0, 0) b = openfst.StdVectorFst() b.AddState() b.AddState() b.AddArc(0, 1, 1, 1, 1) b.AddArc(0, 2, 2, 0, 0) b.AddArc(1, 3, 3, 0, 1) b.AddArc(1, 4, 4, 0, 1) b.SetStart(0) b.SetFinal(0, 0) b.SetFinal(1, 0) opts = openfst.StdPhiComposeOptions() opts.matcher2 = openfst.StdPhiMatcher(b, openfst.MATCH_INPUT, 1) # This is necessary for reasons I do not understand opts.matcher1 = openfst.StdPhiMatcher(a, openfst.MATCH_NONE) c = openfst.StdComposeFst(a, b, opts) for s in c: for arc in c.iterarcs(s): if arc.ilabel == 2: self.assertEquals(arc.weight.Value(), 0) self.assertEquals(arc.nextstate, 1) elif arc.ilabel == 3 or arc.ilabel == 4: self.assertEquals(arc.weight.Value(), 1) self.assertEquals(arc.nextstate, 2)
def lat2flat(latfile, fsgfile, lmfst): """ Subset a language model using the vocabulary of a lattice. """ dag = lattice.Dag(latfile) fst = openfst.StdVectorFst() fst.SetStart(fst.AddState()) fst.SetFinal(0, 0) syms = lmfst.InputSymbols() seen = set() for n in dag.nodes: # Skip fillers as they have been "bypassed" by PocketSphinx if n.sym.startswith("++") or n.sym == "<sil>": continue if n.sym in seen: continue seen.add(n.sym) sym = syms.Find(baseword(n.sym)) if sym == -1: continue fst.AddArc(0, sym, sym, 0, 0) fst.SetOutputSymbols(lmfst.InputSymbols()) phi = lmfst.InputSymbols().Find("φ") if phi != -1: opts = openfst.StdPhiComposeOptions() opts.matcher1 = openfst.StdPhiMatcher(fst, openfst.MATCH_NONE) opts.matcher2 = openfst.StdPhiMatcher(lmfst, openfst.MATCH_INPUT, phi) cfst = openfst.StdComposeFst(fst, lmfst, opts) else: cfst = openfst.StdComposeFst(fst, lmfst) outfst = openfst.StdVectorFst() openfst.Determinize(cfst, outfst) # Write it back out as an FSG for PocketSphinx. build_fsg_fst(outfst, fsgfile) return outfst
def lat_rescore(dag, lmfst, lw=9.5): """ Rescore a lattice using a language model FST. """ fst = lat2fsg.build_lattice_fsg(dag, lmfst.InputSymbols(), 1. / lw) phi = lmfst.InputSymbols().Find("φ") if phi != -1: opts = openfst.StdPhiComposeOptions() opts.matcher1 = openfst.StdPhiMatcher(fst, openfst.MATCH_NONE) opts.matcher2 = openfst.StdPhiMatcher(lmfst, openfst.MATCH_INPUT, phi) c = openfst.StdComposeFst(fst, lmfst, opts) else: c = openfst.StdComposeFst(fst, lmfst) o = openfst.StdVectorFst() openfst.ShortestPath(c, o, 1) words = ['<s>'] st = o.Start() score = 0 while st != -1 and o.NumArcs(st): a = o.GetArc(st, 0) if a.olabel != 0: words.append(lmfst.InputSymbols().Find(a.ilabel)) score -= a.weight.Value() st = a.nextstate return words, score
def lat2fsg(lat, fsgfile, lmfst, prune=15): if isinstance(lat, str): if lat.endswith(".slf"): dag = lattice.Dag(htkfile=lat) else: dag = lattice.Dag(lat) else: dag = lat fst = build_lattice_fsg(dag, lmfst.InputSymbols()) # Compose it (intersect, really) with the language model to get # correct N-gram scores (otherwise it is just a unigram LM). This # is the same thing as "lattice expansion". phi = lmfst.InputSymbols().Find("φ") if phi != -1: opts = openfst.StdPhiComposeOptions() opts.matcher1 = openfst.StdPhiMatcher(fst, openfst.MATCH_NONE) opts.matcher2 = openfst.StdPhiMatcher(lmfst, openfst.MATCH_INPUT, phi) cfst = openfst.StdComposeFst(fst, lmfst, opts) else: cfst = openfst.StdComposeFst(fst, lmfst) outfst = openfst.StdVectorFst(cfst) openfst.Prune(outfst, prune) # Write it back out as an FSG for PocketSphinx. build_fsg_fst(outfst, fsgfile) return outfst
def lmfst_eval(lmfst, sent): sentfst = sent2fst(sent, openfst.StdVectorFst, lmfst.InputSymbols()) phi = lmfst.InputSymbols().Find("φ") if phi != -1: opts = openfst.StdPhiComposeOptions() opts.matcher1 = openfst.StdPhiMatcher(sentfst, openfst.MATCH_NONE) opts.matcher2 = openfst.StdPhiMatcher(lmfst, openfst.MATCH_INPUT, phi) c = openfst.StdComposeFst(sentfst, lmfst, opts) else: c = openfst.StdComposeFst(sentfst, lmfst) o = openfst.StdVectorFst() openfst.ShortestPath(c, o, 1) st = o.Start() ll = 0 while st != -1 and o.NumArcs(st): a = o.GetArc(st, 0) # print o.InputSymbols().Find(a.ilabel), \ # o.OutputSymbols().Find(a.olabel), \ # -a.weight.Value() / math.log(10) ll -= a.weight.Value() st = a.nextstate return ll