def testComposePhi(self):
     a = openfst.StdVectorFst()
     a.AddState()
     a.AddState()
     a.AddArc(0, 2, 2, 0, 1)
     a.AddArc(0, 3, 3, 0, 1)
     a.AddArc(0, 4, 4, 0, 1)
     a.SetStart(0)
     a.SetFinal(0, 0)
     b = openfst.StdVectorFst()
     b.AddState()
     b.AddState()
     b.AddArc(0, 1, 1, 1, 1)
     b.AddArc(0, 2, 2, 0, 0)
     b.AddArc(1, 3, 3, 0, 1)
     b.AddArc(1, 4, 4, 0, 1)
     b.SetStart(0)
     b.SetFinal(0, 0)
     b.SetFinal(1, 0)
     opts = openfst.StdPhiComposeOptions()
     opts.matcher2 = openfst.StdPhiMatcher(b, openfst.MATCH_INPUT, 1)
     # This is necessary for reasons I do not understand
     opts.matcher1 = openfst.StdPhiMatcher(a, openfst.MATCH_NONE)
     c = openfst.StdComposeFst(a, b, opts)
     for s in c:
         for arc in c.iterarcs(s):
             if arc.ilabel == 2:
                 self.assertEquals(arc.weight.Value(), 0)
                 self.assertEquals(arc.nextstate, 1)
             elif arc.ilabel == 3 or arc.ilabel == 4:
                 self.assertEquals(arc.weight.Value(), 1)
                 self.assertEquals(arc.nextstate, 2)
Beispiel #2
0
def lat2flat(latfile, fsgfile, lmfst):
    """
    Subset a language model using the vocabulary of a lattice.
    """
    dag = lattice.Dag(latfile)
    fst = openfst.StdVectorFst()
    fst.SetStart(fst.AddState())
    fst.SetFinal(0, 0)
    syms = lmfst.InputSymbols()
    seen = set()
    for n in dag.nodes:
        # Skip fillers as they have been "bypassed" by PocketSphinx
        if n.sym.startswith("++") or n.sym == "<sil>":
            continue
        if n.sym in seen:
            continue
        seen.add(n.sym)
        sym = syms.Find(baseword(n.sym))
        if sym == -1:
            continue
        fst.AddArc(0, sym, sym, 0, 0)
    fst.SetOutputSymbols(lmfst.InputSymbols())
    phi = lmfst.InputSymbols().Find("&phi;")
    if phi != -1:
        opts = openfst.StdPhiComposeOptions()
        opts.matcher1 = openfst.StdPhiMatcher(fst, openfst.MATCH_NONE)
        opts.matcher2 = openfst.StdPhiMatcher(lmfst, openfst.MATCH_INPUT, phi)
        cfst = openfst.StdComposeFst(fst, lmfst, opts)
    else:
        cfst = openfst.StdComposeFst(fst, lmfst)
    outfst = openfst.StdVectorFst()
    openfst.Determinize(cfst, outfst)
    # Write it back out as an FSG for PocketSphinx.
    build_fsg_fst(outfst, fsgfile)
    return outfst
def lat_rescore(dag, lmfst, lw=9.5):
    """
    Rescore a lattice using a language model FST.
    """
    fst = lat2fsg.build_lattice_fsg(dag, lmfst.InputSymbols(), 1. / lw)
    phi = lmfst.InputSymbols().Find("&phi;")
    if phi != -1:
        opts = openfst.StdPhiComposeOptions()
        opts.matcher1 = openfst.StdPhiMatcher(fst, openfst.MATCH_NONE)
        opts.matcher2 = openfst.StdPhiMatcher(lmfst, openfst.MATCH_INPUT, phi)
        c = openfst.StdComposeFst(fst, lmfst, opts)
    else:
        c = openfst.StdComposeFst(fst, lmfst)
    o = openfst.StdVectorFst()
    openfst.ShortestPath(c, o, 1)
    words = ['<s>']
    st = o.Start()
    score = 0
    while st != -1 and o.NumArcs(st):
        a = o.GetArc(st, 0)
        if a.olabel != 0:
            words.append(lmfst.InputSymbols().Find(a.ilabel))
        score -= a.weight.Value()
        st = a.nextstate
    return words, score
Beispiel #4
0
def lat2fsg(lat, fsgfile, lmfst, prune=15):
    if isinstance(lat, str):
        if lat.endswith(".slf"):
            dag = lattice.Dag(htkfile=lat)
        else:
            dag = lattice.Dag(lat)
    else:
        dag = lat
    fst = build_lattice_fsg(dag, lmfst.InputSymbols())
    # Compose it (intersect, really) with the language model to get
    # correct N-gram scores (otherwise it is just a unigram LM).  This
    # is the same thing as "lattice expansion".
    phi = lmfst.InputSymbols().Find("&phi;")
    if phi != -1:
        opts = openfst.StdPhiComposeOptions()
        opts.matcher1 = openfst.StdPhiMatcher(fst, openfst.MATCH_NONE)
        opts.matcher2 = openfst.StdPhiMatcher(lmfst, openfst.MATCH_INPUT, phi)
        cfst = openfst.StdComposeFst(fst, lmfst, opts)
    else:
        cfst = openfst.StdComposeFst(fst, lmfst)
    outfst = openfst.StdVectorFst(cfst)
    openfst.Prune(outfst, prune)
    # Write it back out as an FSG for PocketSphinx.
    build_fsg_fst(outfst, fsgfile)
    return outfst
Beispiel #5
0
def lmfst_eval(lmfst, sent):
    sentfst = sent2fst(sent, openfst.StdVectorFst, lmfst.InputSymbols())
    phi = lmfst.InputSymbols().Find("&phi;")
    if phi != -1:
        opts = openfst.StdPhiComposeOptions()
        opts.matcher1 = openfst.StdPhiMatcher(sentfst, openfst.MATCH_NONE)
        opts.matcher2 = openfst.StdPhiMatcher(lmfst, openfst.MATCH_INPUT, phi)
        c = openfst.StdComposeFst(sentfst, lmfst, opts)
    else:
        c = openfst.StdComposeFst(sentfst, lmfst)
    o = openfst.StdVectorFst()
    openfst.ShortestPath(c, o, 1)
    st = o.Start()
    ll = 0
    while st != -1 and o.NumArcs(st):
        a = o.GetArc(st, 0)
        #        print o.InputSymbols().Find(a.ilabel), \
        #              o.OutputSymbols().Find(a.olabel), \
        #              -a.weight.Value() / math.log(10)
        ll -= a.weight.Value()
        st = a.nextstate
    return ll