Python readの例、fst.read Pythonの例

コード例 #1

0

ファイルを表示

ファイル: train-features.py プロジェクト: arendu/pybrezel

def gradient(theta):
    write_learned_features(theta)
    print 'getting counts...'
    exp_counts = [fst.LogWeight.ZERO] * (len(f_names) + 1)
    obs_counts = [fst.LogWeight.ZERO] * (len(f_names) + 1)
    for idx, (exp_file, obs_chain_file) in enumerate(zip(exp_machines, obs_chain)):
        sys.stdout.write('%d \r' % idx)
        sys.stdout.flush()
        exp = fst.read(path + exp_file)
        obs_c = fst.read(path + obs_chain_file)
        exp_wt = apply_weights(exp, theta)
        (e_counts, o_counts) = get_counts_for_machine(exp_wt, obs_c)
        exp_counts = accumilate_counts(e_counts, exp_counts)
        obs_counts = accumilate_counts(o_counts, obs_counts)

    grad = np.zeros(len(theta))
    for i, o in f_names:
        k = f_names[i, o]
        ok = obs_counts[k]
        ek = exp_counts[k]
        #exp(c)-exp(e)
        s1 = expm(-float(ok))
        s2 = expm(-float(ek))
        grad[k] = s1 - s2
        #print grad[k], '=', s2, '-', s1, i, o
        #pdb.set_trace()
    print '\ngrad computed'
    return grad

コード例 #2

0

ファイルを表示

ファイル: train-features.py プロジェクト: arendu/pybrezel

def value(theta):
    likelihood = 0.0
    print 'likelihoods'
    for idx, (e_file, o_chain_file) in enumerate(zip(exp_machines, obs_chain)):
        sys.stdout.write('%d \r' % idx)
        sys.stdout.flush()
        #print e_file
        e = fst.read(path + e_file)
        o_chain = fst.read(path + o_chain_file)
        likelihood += get_likelihood(e, o_chain, theta)
    reg = np.linalg.norm(theta, ord=1)
    print 'll', likelihood, 'reg', reg
    return likelihood

コード例 #3

0

ファイルを表示

ファイル: train-features-with-comp.py プロジェクト: arendu/pybrezel

def get_likelihood(inp_file, E_file, o_chain_file, theta):
    inp = fst.read(path + inp_file)
    E = fst.read(path + E_file)
    o_chain = fst.read(path + o_chain_file)
    E_wt = apply_weights(E, theta)
    exp_wt = inp.compose(E_wt)  #apply_weights(exp, theta)
    #e_wt.write('e_wt.fst', e_wt.isyms, e_wt.osyms)
    exp_wt = renormalize(exp_wt)
    #e_wt.write('e_norm.fst', e_wt.isyms, e_wt.osyms)
    o = exp_wt.compose(o_chain)
    #o.write('obs.after.fst', o.isyms, o.osyms)
    ll = o.shortest_distance(True)[0]
    return float(ll)

コード例 #4

0

ファイルを表示

ファイル: remove_fst_weights.py プロジェクト: zhanghaobaba/attention-lvcsr

def main(args):
    L = fst.read(args.fst_file)

    for state in L:
        for arc in state:
            arc.weight = L.SEMIRING(0.0)

    L.write(args.fst_file, keep_isyms=True, keep_osyms=True)

コード例 #5

0

ファイルを表示

ファイル: remove_fst_weights.py プロジェクト: DingKe/attention-lvcsr

def main(args):
    L = fst.read(args.fst_file)

    for state in L:
        for arc in state:
            arc.weight = L.SEMIRING(0.0)

    L.write(args.fst_file, keep_isyms=True, keep_osyms=True)

コード例 #6

0

ファイルを表示

ファイル: remove_fst_weights.py プロジェクト: nke001/attention-lvcsr

def main(args):
    L = fst.read(args.fst_file)

    for state in L:
        for arc in state:
            arc.weight = fst.TropicalWeight(0.0)

    L.write(args.fst_file, keep_isyms=True, keep_osyms=True)

コード例 #7

0

ファイルを表示

ファイル: remove_fst_weights.py プロジェクト: Aditay/attention-lvcsr

def main(args):
    L = fst.read(args.fst_file)

    for state in L:
        for arc in state:
            arc.weight = fst.TropicalWeight(0.0)

    L.write(args.fst_file, keep_isyms=True, keep_osyms=True)

コード例 #8

0

ファイルを表示

ファイル: check_all_fst_weights_are_zero.py プロジェクト: nke001/attention-lvcsr

def main(args):
    L = fst.read(args.fst_file)

    for state in L:
        for arc in state:
            if arc.weight != fst.TropicalWeight(0.0):
                sys.stderr.write(
                    "Nonzero weight in the fst: node {} arc {}".format(state, arc))
                exit(1)

コード例 #9

0

ファイルを表示

ファイル: check_all_fst_weights_are_zero.py プロジェクト: zhanghaobaba/attention-lvcsr

def main(args):
    L = fst.read(args.fst_file)

    for state in L:
        for arc in state:
            if arc.weight != fst.TropicalWeight(0.0):
                sys.stderr.write(
                    "Nonzero weight in the fst: node {} arc {}".format(state, arc))
                exit(1)

コード例 #10

0

ファイルを表示

ファイル: fstphrases_messy.py プロジェクト: AbeHandler/rookie

def get_fst(name):
    global FSTS
    if name not in FSTS:
        here = os.path.dirname(__file__)
        fstdir = os.path.join(here, "grammar", "compiled_fsts")
        filename = os.path.join(fstdir, "%s.bin" % name)
        assert os.path.exists(filename), "FST file does not exist: " + filename
        FSTS[name] = fst.read(filename)
    return FSTS[name]

コード例 #11

0

ファイルを表示

ファイル: fstphrases.py プロジェクト: AbeHandler/rookie

def get_fst(name):
    global FSTS
    if name not in FSTS:
        here = os.path.dirname(__file__)
        fstdir = os.path.join(here, "grammar", "compiled_fsts")
        filename = os.path.join(fstdir, "%s.bin" % name)
        assert os.path.exists(filename), "FST file does not exist: " + filename
        FSTS[name] = fst.read(filename)
        FSTS[name + "_vocab"] = set(sym for sym,num in FSTS[name].isyms.items())
    return FSTS[name]

コード例 #12

0

ファイルを表示

ファイル: train-features-local.py プロジェクト: arendu/pybrezel

def value(theta):
    likelihood = 0.0
    print 'likelihoods'
    for idx, obs_trellis_file in enumerate(obs_machines):
        sys.stdout.write('%d \r' % idx)
        sys.stdout.flush()
        obs_trellis = fst.read(path + obs_trellis_file)
        likelihood += get_likelihood(obs_trellis, theta)
    #reg = np.linalg.norm(theta, ord=1)
    print 'll', likelihood  #, 'reg', reg
    return likelihood

コード例 #13

0

ファイルを表示

ファイル: parse_fst.py プロジェクト: aahriman/graduate_work

def posibilities(path, words):
    ''' Return list of posibilities for every word in fstVector '''
    
    fstVector = fst.read(path); 
    fstVector.remove_epsilon()
    posibilities = [];
    _posibilities(fstVector,fstVector.start, words, posibilities, 0)
    for i in posibilities:
	# remove empty sets
         if len(i) == 0:
             posibilities.remove(i)
    return posibilities

コード例 #14

0

ファイルを表示

ファイル: autosgt.py プロジェクト: shiranD/autosuggestion

def generate_suggestions(prefix):
    """
    To extract suggestions the first step was to traverse the fst
    in fstfile following the charecters of the given prefix. From
    there the state of the final letter of prefix is saved and the next
    part constructs an fst of the branch the grows from the saved state.
    It is done in bds approach. Later, extract all paths from acceptor in
    a dfs manner is done with path weight calculation. Then all paths 
    are sorted by weights and the first three are jsoned.
    INPUT:
       a string
    OUTPUT:
       a json file with up to three values for Suggestion entry
    """    

    fstfile = "/Users/dudy/CSLU/summerIntern/src/prfx_tree.fst"
    sym = fst.read_symbols("/Users/dudy/CSLU/summerIntern/src/syms")
    lm = fst.read(fstfile)
    prefix = prefix.lower()

    # look for subtree given prefix
    stateid = 0
    for ch in prefix:
        state = lm[stateid]
        for arc in state.arcs:
            if sym.find(arc.ilabel)==ch:
                print ch
                stateid = arc.nextstate
                break

    # construct desired subtree (bds)
    reduced = bfs(stateid, lm, sym)
    # read strings (dfs)
    top3 = dfs(reduced, sym)

    # take first three (if exists)
    suggest = []
    for (suffix, _) in top3:
        suggest.append(suffix)

    # dict it    
    result = {}
    result["Suggestions:"] = suggest

    # json it
    json_file = "auto.json"
    with open(json_file, "w") as fp:
        json.dump(result, fp)

コード例 #15

0

ファイルを表示

def generate_suggestions(prefix):
    """
    To extract suggestions the first step was to traverse the fst
    in fstfile following the charecters of the given prefix. From
    there the state of the final letter of prefix is saved and the next
    part constructs an fst of the branch the grows from the saved state.
    It is done in bds approach. Later, extract all paths from acceptor in
    a dfs manner is done with path weight calculation. Then all paths 
    are sorted by weights and the first three are jsoned.
    INPUT:
       a string
    OUTPUT:
       a json file with up to three values for Suggestion entry
    """

    fstfile = "/Users/dudy/CSLU/summerIntern/src/prfx_tree.fst"
    sym = fst.read_symbols("/Users/dudy/CSLU/summerIntern/src/syms")
    lm = fst.read(fstfile)
    prefix = prefix.lower()

    # look for subtree given prefix
    stateid = 0
    for ch in prefix:
        state = lm[stateid]
        for arc in state.arcs:
            if sym.find(arc.ilabel) == ch:
                print ch
                stateid = arc.nextstate
                break

    # construct desired subtree (bds)
    reduced = bfs(stateid, lm, sym)
    # read strings (dfs)
    top3 = dfs(reduced, sym)

    # take first three (if exists)
    suggest = []
    for (suffix, _) in top3:
        suggest.append(suffix)

    # dict it
    result = {}
    result["Suggestions:"] = suggest

    # json it
    json_file = "auto.json"
    with open(json_file, "w") as fp:
        json.dump(result, fp)

コード例 #16

0

ファイルを表示

ファイル: check_ext_deterministic.py プロジェクト: zhanghaobaba/attention-lvcsr

def main(args):
    L = fst.read(args.fst_file)

    for state in L:
        ilab = []
        for arc in state:
            ilab.append(arc.ilabel)
        ilabs = set(ilab)
        if 0 in ilabs and len(ilab) != 1:
            sys.stderr.write(
                "Node {} has a non-epsilon arc that is not unique: {}".format(
                    state, ilab))
            exit(1)
        if len(ilabs) != len(ilab):
            sys.stderr.write(
                "Node {} has duplicated ilabels on edges: {}".format(
                    state, ilab))
            exit(1)

コード例 #17

0

ファイルを表示

ファイル: check_ext_deterministic.py プロジェクト: DingKe/attention-lvcsr

def main(args):
    L = fst.read(args.fst_file)

    for state in L:
        ilab = []
        for arc in state:
            ilab.append(arc.ilabel)
        ilabs = set(ilab)
        if 0 in ilabs and len(ilab) != 1:
            sys.stderr.write(
                "Node {} has a non-epsilon arc that is not unique: {}"
                .format(state, ilab))
            exit(1)
        if len(ilabs) != len(ilab):
            sys.stderr.write(
                "Node {} has duplicated ilabels on edges: {}"
                .format(state, ilab))
            exit(1)

コード例 #18

0

ファイルを表示

ファイル: kaldi_calibration.py プロジェクト: choko/alex

def load_lat(fn):
    lat = fst.read(fn)
    lat = fst.StdVectorFst(lat)

    return lat

コード例 #19

0

ファイルを表示

ファイル: align.py プロジェクト: arendu/pybrezel

    learned_weights = dict(
        (int(l.split('\t')[0]), float(l.split('\t')[-1])) for l in codecs.open(learned_weight_file, 'r', 'utf-8').readlines())

    filenames = codecs.open(path + 'filenames', 'r', 'utf-8').readlines()[1:]
    nat_sort_filenames = natural_sort(filenames)
    inp_machines, obs_chain, exp_machines = zip(*[tuple(l.split()) for l in nat_sort_filenames])

    obs_trelis = [o.replace('y', 'obs') for o in obs_chain]
    source = [l.split() for l in codecs.open(path + 'en', 'r', 'utf-8').readlines()]
    target = [l.split() for l in codecs.open(path + 'fr', 'r', 'utf-8').readlines()]

    all_alignments = []
    for idx, (ot, s, t) in enumerate(zip(obs_trelis, source, target)[:53]):
        print idx, s, t
        obs_t = fst.read(path + ot)
        sym_features = obs_t.isyms
        sym_targets = obs_t.osyms
        print path + ot
        obs_t.write('obs_t.fst')
        obs_wt = apply_weights(obs_t, learned_weights)
        obs_wt.write('obs_wt.fst', obs_t.isyms, obs_t.osyms)
        os.system('fstmap --map_type="to_standard" obs_wt.fst > obs_wt.std.fst')
        obs_wt_std = fst.read('obs_wt.std.fst')
        best_path = obs_wt_std.shortest_path()
        best_path.write('best_path.fst', obs_t.isyms, obs_t.osyms)
        all_alignments += do_align(idx + 1, best_path, s, t)
    writer = codecs.open('never.gonna.work.20.alignments.out', 'w')
    writer.write('\n'.join(all_alignments))
    writer.flush()
    writer.close()

コード例 #20

0

ファイルを表示

ファイル: fstlm.py プロジェクト: hajiejue/Joint-GAN-for-robust-E2E-Speech-Recognition

 def __init__(self, path):
     self.path = path
     self.fst = fst.read(self.path)
     self.isyms = dict(self.fst.isyms.items())

コード例 #21

0

ファイルを表示

ファイル: compounder.py プロジェクト: Wenlin-Zhang/kaldi-offline-transcriber

    c.start = c.add_state()
    space_id = syms["<space>"]
    c.add_arc(0, 0, space_id, syms["<eps>"])
    c.add_arc(0, 0, space_id, syms["+C+"])
    c.add_arc(0, 0, space_id, syms["+D+"])
    for word_id in word_ids:
        c.add_arc(0, 0, word_id, word_id)
    c[0].final = True
    return c


if __name__ == '__main__':
    if len(sys.argv) != 3:
        print("Usage: %s G.fst words.txt" % sys.argv[0], file=sys.stderr)

    g = fst.read(sys.argv[1])

    syms = {}
    syms_list = []
    for l in open(sys.argv[2]):
        ss = l.split()
        syms[ss[0]] = int(ss[1])
        syms_list.append(ss[0])

    unk_id = syms["<unk>"]
    # Following is needed to avoid line buffering
    while 1:
        l = sys.stdin.readline()
        if not l: break
        unks = []
        words = l.split()

コード例 #22

0

ファイルを表示

ファイル: compounder.py プロジェクト: baaslaawe/kaldi-offline-transcriber

  c.start = c.add_state()
  space_id = syms["<space>"]
  c.add_arc(0, 0, space_id, syms["<eps>"])
  c.add_arc(0, 0, space_id, syms["+C+"])
  c.add_arc(0, 0, space_id, syms["+D+"])
  for word_id in word_ids:
    c.add_arc(0, 0, word_id, word_id)  
  c[0].final = True
  return c


if __name__ == '__main__':
  if len(sys.argv) != 3:
    print >> sys.stderr, "Usage: %s G.fst words.txt" % sys.argv[0]
    
  g = fst.read(sys.argv[1])

  syms = {}
  syms_list = []
  for l in open(sys.argv[2]):
    ss = l.split()
    syms[ss[0]] = int(ss[1])
    syms_list.append(ss[0])
    
  unk_id = syms["<unk>"]  
  # Following is needed to avoid line buffering
  while 1:
    l = sys.stdin.readline()
    if not l: break
    unks = []
    words = l.split()

コード例 #23

0

ファイルを表示

ファイル: FST_test.py プロジェクト: boyentenbi/neural-transcriber

import graphviz
import pydot
# import pywrapfst
import fst
import nltk
import re
import os

LexM = fst.read("lex_model/lex-uw.fst")
LM_expr = "^(.*)\.pru$"
folder_name = "lang_model"
file_list = [os.path.join(folder_name, fname) for fname in os.listdir(folder_name)]
pruned_models = [re.match(LM_expr, filename).group(1) for filename in file_list if re.match(LM_expr, filename)]

i_table = LexM.isyms
o_table = LexM.osyms

mod_name = "lang_model/3-gram-3"

LG = fst.read(mod_name + ".pi").copy()

test_word = fst.Acceptor(syms=i_table)
test_word.add_arc(0, 1, 'HH')
test_word.add_arc(1, 2, 'EY')
test_word[2].final = True

test_comp = test_word >> LG

コード例 #24

0

ファイルを表示

ファイル: shortestPaths.py プロジェクト: arendu/WFST-Translation

__author__ = 'arenduchintala'
import sys, fst


def parseargs(args):
    try:
        in_fst_path = args[args.index('-in') + 1]
        out_fst_path = args[args.index('-out') + 1]
        n = int(args[args.index('-n') + 1])
        return [in_fst_path, out_fst_path, n]
    except (ValueError, IndexError):
        sys.stderr.write('Usage: -in [name of fst(final)] -out [name of shortest path fst] -n [number of paths]')
        exit()


if __name__ == '__main__':
    [in_fst, out_fst, n] = parseargs(sys.argv)
    sym_f = fst.read_symbols('data/symf.bin')
    sym_e = fst.read_symbols('data/syme.bin')
    f = fst.read(in_fst)
    sp = f.shortest_path(n)
    sp.remove_epsilon()
    sp.write(out_fst, sym_f, sym_e)

コード例 #25

0

ファイルを表示

 def load(self):
     self.fst = fst.read(self.path)
     self.isyms = dict(self.fst.isyms.items())

コード例 #26

0

ファイルを表示

ファイル: kaldi_calibration.py プロジェクト: kangliqiang/alex

def load_lat(fn):
    lat = fst.read(fn)
    lat = fst.StdVectorFst(lat)

    return lat

コード例 #27

0

ファイルを表示

ファイル: ops.py プロジェクト: ZhangAustin/attention-lvcsr

 def load(self):
     self.fst = fst.read(self.path)
     self.isyms = dict(self.fst.isyms.items())