Example #1
0
def lat2flat(latfile, fsgfile, lmfst):
    """
    Subset a language model using the vocabulary of a lattice.
    """
    dag = lattice.Dag(latfile)
    fst = openfst.StdVectorFst()
    fst.SetStart(fst.AddState())
    fst.SetFinal(0, 0)
    syms = lmfst.InputSymbols()
    seen = set()
    for n in dag.nodes:
        # Skip fillers as they have been "bypassed" by PocketSphinx
        if n.sym.startswith("++") or n.sym == "<sil>":
            continue
        if n.sym in seen:
            continue
        seen.add(n.sym)
        sym = syms.Find(baseword(n.sym))
        if sym == -1:
            continue
        fst.AddArc(0, sym, sym, 0, 0)
    fst.SetOutputSymbols(lmfst.InputSymbols())
    phi = lmfst.InputSymbols().Find("&phi;")
    if phi != -1:
        opts = openfst.StdPhiComposeOptions()
        opts.matcher1 = openfst.StdPhiMatcher(fst, openfst.MATCH_NONE)
        opts.matcher2 = openfst.StdPhiMatcher(lmfst, openfst.MATCH_INPUT, phi)
        cfst = openfst.StdComposeFst(fst, lmfst, opts)
    else:
        cfst = openfst.StdComposeFst(fst, lmfst)
    outfst = openfst.StdVectorFst()
    openfst.Determinize(cfst, outfst)
    # Write it back out as an FSG for PocketSphinx.
    build_fsg_fst(outfst, fsgfile)
    return outfst
Example #2
0
def lat2fsg(lat, fsgfile, lmfst, prune=15):
    if isinstance(lat, str):
        if lat.endswith(".slf"):
            dag = lattice.Dag(htkfile=lat)
        else:
            dag = lattice.Dag(lat)
    else:
        dag = lat
    fst = build_lattice_fsg(dag, lmfst.InputSymbols())
    # Compose it (intersect, really) with the language model to get
    # correct N-gram scores (otherwise it is just a unigram LM).  This
    # is the same thing as "lattice expansion".
    phi = lmfst.InputSymbols().Find("&phi;")
    if phi != -1:
        opts = openfst.StdPhiComposeOptions()
        opts.matcher1 = openfst.StdPhiMatcher(fst, openfst.MATCH_NONE)
        opts.matcher2 = openfst.StdPhiMatcher(lmfst, openfst.MATCH_INPUT, phi)
        cfst = openfst.StdComposeFst(fst, lmfst, opts)
    else:
        cfst = openfst.StdComposeFst(fst, lmfst)
    outfst = openfst.StdVectorFst(cfst)
    openfst.Prune(outfst, prune)
    # Write it back out as an FSG for PocketSphinx.
    build_fsg_fst(outfst, fsgfile)
    return outfst
def lat_rescore(dag, lmfst, lw=9.5):
    """
    Rescore a lattice using a language model FST.
    """
    fst = lat2fsg.build_lattice_fsg(dag, lmfst.InputSymbols(), 1. / lw)
    phi = lmfst.InputSymbols().Find("&phi;")
    if phi != -1:
        opts = openfst.StdPhiComposeOptions()
        opts.matcher1 = openfst.StdPhiMatcher(fst, openfst.MATCH_NONE)
        opts.matcher2 = openfst.StdPhiMatcher(lmfst, openfst.MATCH_INPUT, phi)
        c = openfst.StdComposeFst(fst, lmfst, opts)
    else:
        c = openfst.StdComposeFst(fst, lmfst)
    o = openfst.StdVectorFst()
    openfst.ShortestPath(c, o, 1)
    words = ['<s>']
    st = o.Start()
    score = 0
    while st != -1 and o.NumArcs(st):
        a = o.GetArc(st, 0)
        if a.olabel != 0:
            words.append(lmfst.InputSymbols().Find(a.ilabel))
        score -= a.weight.Value()
        st = a.nextstate
    return words, score
 def testCompose(self):
     a = openfst.StdVectorFst()
     a.AddState()
     a.AddState()
     a.AddArc(0, 1, 2, 0, 1)
     a.AddArc(0, 2, 3, 0, 1)
     a.AddArc(0, 3, 3, 1, 1)
     a.SetStart(0)
     a.SetFinal(1, 0)
     b = openfst.StdVectorFst()
     b.AddState()
     b.AddState()
     b.AddArc(0, 1, 2, 0, 1)
     b.AddArc(0, 2, 3, 0, 1)
     b.AddArc(0, 3, 3, 1, 1)
     b.SetStart(0)
     b.SetFinal(1, 0)
     c = openfst.StdComposeFst(a, b)
     for s in c:
         for arc in c.iterarcs(s):
             self.assertEquals(arc.nextstate, 1)
             if arc.ilabel == 1:
                 self.assertEquals(arc.olabel, 3)
             if arc.ilabel == 2:
                 self.assertEquals(arc.olabel, 3)
                 self.assertEquals(arc.weight.Value(), 1)
             if arc.ilabel == 3:
                 self.assertEquals(arc.olabel, 3)
                 self.assertEquals(arc.weight.Value(), 2)
 def testComposePhi(self):
     a = openfst.StdVectorFst()
     a.AddState()
     a.AddState()
     a.AddArc(0, 2, 2, 0, 1)
     a.AddArc(0, 3, 3, 0, 1)
     a.AddArc(0, 4, 4, 0, 1)
     a.SetStart(0)
     a.SetFinal(0, 0)
     b = openfst.StdVectorFst()
     b.AddState()
     b.AddState()
     b.AddArc(0, 1, 1, 1, 1)
     b.AddArc(0, 2, 2, 0, 0)
     b.AddArc(1, 3, 3, 0, 1)
     b.AddArc(1, 4, 4, 0, 1)
     b.SetStart(0)
     b.SetFinal(0, 0)
     b.SetFinal(1, 0)
     opts = openfst.StdPhiComposeOptions()
     opts.matcher2 = openfst.StdPhiMatcher(b, openfst.MATCH_INPUT, 1)
     # This is necessary for reasons I do not understand
     opts.matcher1 = openfst.StdPhiMatcher(a, openfst.MATCH_NONE)
     c = openfst.StdComposeFst(a, b, opts)
     for s in c:
         for arc in c.iterarcs(s):
             if arc.ilabel == 2:
                 self.assertEquals(arc.weight.Value(), 0)
                 self.assertEquals(arc.nextstate, 1)
             elif arc.ilabel == 3 or arc.ilabel == 4:
                 self.assertEquals(arc.weight.Value(), 1)
                 self.assertEquals(arc.nextstate, 2)
 def testComposeRho(self):
     a = openfst.StdVectorFst()
     a.AddState()
     a.AddState()
     a.AddArc(0, 2, 2, 0, 1)
     a.AddArc(0, 3, 3, 0, 1)
     a.AddArc(0, 4, 4, 0, 1)
     a.SetStart(0)
     a.SetFinal(1, 0)
     # Build an FST that matches 2 with no weight and everything
     # else adding weight 1
     b = openfst.StdVectorFst()
     b.AddState()
     b.AddArc(0, 1, 1, 1, 0)
     b.AddArc(0, 2, 2, 0, 0)
     b.SetStart(0)
     b.SetFinal(0, 0)
     opts = openfst.StdRhoComposeOptions()
     opts.matcher2 = openfst.StdRhoMatcher(b, openfst.MATCH_INPUT, 1)
     # This is necessary for reasons I do not understand
     opts.matcher1 = openfst.StdRhoMatcher(a, openfst.MATCH_NONE)
     c = openfst.StdComposeFst(a, b, opts)
     for s in c:
         for arc in c.iterarcs(s):
             self.assertEquals(arc.nextstate, 1)
             if arc.ilabel == 2:
                 self.assertEquals(arc.weight.Value(), 0)
             else:
                 self.assertEquals(arc.weight.Value(), 1)
Example #7
0
def apply_errfst(fst, errfst):
    sigma = errfst.InputSymbols().Find("&sigma;")
    opts = openfst.StdSigmaComposeOptions()
    opts.matcher1 = openfst.StdSigmaMatcher(fst, openfst.MATCH_NONE)
    opts.matcher2 = openfst.StdSigmaMatcher(errfst, openfst.MATCH_INPUT, sigma, True)
    cfst = openfst.StdComposeFst(fst, errfst, opts)
    cfst = openfst.StdVectorFst(cfst)
    openfst.ProjectOutput(cfst)
    return cfst
Example #8
0
def build_class_lmfst(lm, probdef, use_phi=False):
    """
    Build an FST from a class-based language model.  By default this
    returns the lazy composition of the class definition transducer
    and the language model.  To obtain the full language model, create
    a VectorFst from it and project it to its input.
    """
    lmfst = build_lmfst(lm, use_phi)
    classfst = build_classfst(probdef, lmfst.InputSymbols())
    openfst.ArcSortInput(lmfst)
    openfst.ArcSortInput(classfst)
    return openfst.StdComposeFst(classfst, lmfst)
Example #9
0
def lmfst_eval(lmfst, sent):
    sentfst = sent2fst(sent, openfst.StdVectorFst, lmfst.InputSymbols())
    phi = lmfst.InputSymbols().Find("&phi;")
    if phi != -1:
        opts = openfst.StdPhiComposeOptions()
        opts.matcher1 = openfst.StdPhiMatcher(sentfst, openfst.MATCH_NONE)
        opts.matcher2 = openfst.StdPhiMatcher(lmfst, openfst.MATCH_INPUT, phi)
        c = openfst.StdComposeFst(sentfst, lmfst, opts)
    else:
        c = openfst.StdComposeFst(sentfst, lmfst)
    o = openfst.StdVectorFst()
    openfst.ShortestPath(c, o, 1)
    st = o.Start()
    ll = 0
    while st != -1 and o.NumArcs(st):
        a = o.GetArc(st, 0)
        #        print o.InputSymbols().Find(a.ilabel), \
        #              o.OutputSymbols().Find(a.olabel), \
        #              -a.weight.Value() / math.log(10)
        ll -= a.weight.Value()
        st = a.nextstate
    return ll
Example #10
0
                                  baseword=lattice.baseword_noclass)
 openfst.ArcSortInput(lfst)
 # Apply Levenshtein model to the input
 errfst = LevenshteinModel(rfst.OutputSymbols())
 openfst.ArcSortInput(errfst)
 # Apply compound word model based on the lattice
 compfst = CompoundWordModel(errfst.OutputSymbols(),
                             lfst.InputSymbols())
 # Precompose and project it to the lattice so compound words
 # are split in the alignment
 xlat = openfst.StdVectorFst()
 openfst.Compose(compfst, lfst, xlat)
 openfst.ProjectInput(xlat)
 openfst.ArcSortInput(xlat)
 # Compose everything together
 cfst = openfst.StdComposeFst(rfst, errfst)
 cfst = openfst.StdComposeFst(cfst, xlat)
 # Do bestpath search
 ofst = openfst.StdVectorFst()
 openfst.ShortestPath(cfst, ofst, 1)
 st = ofst.Start()
 err = 0
 bt = []
 while st != -1 and ofst.NumArcs(st):
     a = ofst.GetArc(st, 0)
     isym = ofst.InputSymbols().Find(a.ilabel)
     osym = ofst.OutputSymbols().Find(a.olabel)
     if isym == '</s>':
         break
     if a.ilabel == openfst.epsilon:
         isym = '*INS*'