def __init__(self, path, nn_char_map, no_transition_cost=1e12, **kwargs): # Since we currently support only type, it is ignored. # if type_ != 'fst': # raise ValueError("Supports only FST's so far.") fst = FST(path) fst_char_map = dict(fst.fst.isyms.items()) del fst_char_map['<eps>'] if not len(fst_char_map) == len(nn_char_map): raise ValueError() remap_table = { nn_char_map[character]: fst_code for character, fst_code in fst_char_map.items() } transition = FSTTransition(fst, remap_table, no_transition_cost) # This SequenceGenerator will be used only in a very limited way. # That's why it is sufficient to equip it with a completely # fake readout. dummy_readout = Readout(source_names=['add'], readout_dim=len(remap_table), merge=Merge(input_names=['costs'], prototype=Identity()), post_merge=Identity(), emitter=SoftmaxEmitter()) super(LanguageModel, self).__init__(transition=transition, fork=Fork(output_names=[ name for name in transition.apply.sequences if name != 'mask' ], prototype=Identity()), readout=dummy_readout, **kwargs)
#!/usr/bin/env python """ explain_lm FST UTT Explain the cost assigned to an utternace UTT by the fst FST. """ import sys from lvsr.ops import FST fst = FST(sys.argv[1]) s = sys.argv[2] s = s.replace('<noise>', '%') subst = {' ': '<spc>', '%': '<noise>'} fst.explain([subst.get(c, c) for c in s])
def main(fst_path, string): fst = FST(fst_path) s = string.replace("<noise>", "%") subst = {"^": "<bol>", "$": "<eol>", " ": "<spc>", "%": "<noise>"} fst.explain([subst.get(c, c) for c in s])
def main(fst_path, string): fst = FST(fst_path) s = string.replace('<noise>', '%') subst = {'^': '<bol>', '$': '<eol>', ' ': '<spc>', '%': '<noise>'} fst.explain([subst.get(c, c) for c in s])