Ejemplo n.º 1
0
    def generate_inflections(self, q, lemma):

        paths = fst.compose(q, self.lattice)
        two_state = fst.compose(paths, self.refiner)
        output = two_state.project(project_output=True)
        output.rmepsilon()
        output = fst.determinize(output)
        output.minimize()
        # read fst out
        dist = []
        labels = []
        state = output.start()
        for arc in output.arcs(state):
            label = self.input_syms.find(arc.ilabel)
            pr = float(arc.weight)
            dist.append(math.e**(-pr))
            labels.append(label)
        sum_value = sum(dist)
        norm_dist = [prob / sum_value for prob in dist]
        relabels = []
        inf_d = {}
        #for label in labels:
        for label, dist in zip(labels, norm_dist):
            delete, insert = label.split("_")
            l = len(delete)
            label = "".join(lemma[:-l]) + insert
            relabels.append(label)
            inf_d[label] = dist
        return inf_d
Ejemplo n.º 2
0
 def generate_inflections(self,q, lemma):
     
     paths = fst.compose(q, self.lattice)
     two_state = fst.compose(paths, self.refiner)
     output = two_state.project(project_output=True)
     output.rmepsilon()
     output = fst.determinize(output)
     output.minimize()
     # read fst out
     dist = []
     labels = []
     state = output.start()
     for arc in output.arcs(state):
         label = self.input_syms.find(arc.ilabel)
         pr = float(arc.weight)
         dist.append(pr)
         labels.append(label)
     sum_value = sum(dist)
     norm_dist = [prob/sum_value for prob in dist]
     relabels = []
     for label in labels:
         delete, insert = label.split("_")
         l = len(delete)
         label = lemma[:-l]+insert
         #print(lemma, delete, insert, label)
         relabels.append(label)
     return str(sorted(zip(relabels, norm_dist), key=lambda x:x[1]))
Ejemplo n.º 3
0
def apply_fst(elements, automata_op, is_project=True, **kwargs):
    """Compose a linear automata generated from `elements` with `automata_op`.

    Args:
        elements (list): ordered list of edge symbols for a linear automata.
        automata_op (Fst): automata that will be applied.
        is_project (bool, optional): whether to keep only the output labels.
        kwargs:
            Additional arguments to the compiler of the linear automata .
    """
    linear_automata = linear_fst(elements, automata_op, **kwargs)
    out = fst.compose(linear_automata, automata_op)
    if is_project:
        out.project(project_output=True)
    return out
Ejemplo n.º 4
0
def main():
    parser = argparse.ArgumentParser("")
    parser.add_argument("--lexicon", type=str)
    parser.add_argument("--word", type=str)
    parser.add_argument("--phone", type=str)
    parser.add_argument("--G", type=str)
    parser.add_argument("--L", type=str)
    parser.add_argument("--LG", type=str)
    args = parser.parse_args()

    word_sym = load_symbols(args.word)
    phone_sym = load_symbols(args.phone)

    l = make_fst(word_sym, phone_sym, args.lexicon)
    l.write(args.L)
    g = fst.Fst.read(args.G)
    lg = fst.compose(l, g)
    lg.write(args.LG)
Ejemplo n.º 5
0
    def make_graph(self,
                   lexicon_file,
                   graphemes_file,
                   grammar_file,
                   sil_symbol,
                   disambig_graphemes_file='graphemes_disambig.txt',
                   words_file='words.txt',
                   graph_file='LG.fst'):
        """build decode graph and write to disk

        Args:
            lexicon_file: lexicon file from word to graphemes sequence
            graphemes_file: graphemes id file
            grammar_file: arpa language model file
            disambig_graphemes_file: ouput graphemes table from grapheme to id
                isymbols_table for result WFST
            words_file: output words table from word to id 
                osymbols_table for result WFST
            graph_file: write result graph to graph_file, default: LG.fst
        """

        if self.graph_type == 'TLG':
            L = self.lexicon_builder(lexicon_file, graphemes_file, sil_symbol)
            logging.info('grapheme should be  phones or syllables')
            logging.info(
                'please confirm the sil symbol is "SIL" or some other you defined'
            )
        elif self.graph_type == 'LG':
            L = self.lexicon_builder(lexicon_file, graphemes_file, sil_symbol)
            logging.info('grapheme should be characters')
            logging.info(
                'please confirm the sil symbol is <space> or some other you defined'
            )
        self.lexicon_builder.write_words_table(words_file)
        self.lexicon_builder.write_disambig_graphemes_table(
            disambig_graphemes_file)
        logging.info('make lexicon fst successfully')

        G = self.grammar_builder(grammar_file, words_file)
        logging.info('make grammar fst successfully')

        LG = fst.compose(L, G)
        logging.info('LG compose successfully')
        LG = fst.determinize(LG)
        logging.info('LG determinize successfully')
        LG.minimize()
        logging.info('LG minimize successfully')
        LG.arcsort(sort_type='ilabel')

        if self.graph_type == 'TLG':
            T = self.token_builder(disambig_graphemes_file)  # 指定 blank
            self.graph = fst.compose(T, LG)
            logging.info('TLG compose successfully')
            remove_unk_arc(self.graph, self.lexicon_builder.unk_ids)
            logging.info('TLG fst remove unk successfully')

        elif self.graph_type == 'LG':
            self.graph = LG
            remove_unk_arc(self.graph, self.lexicon_builder.unk_ids)
            logging.info('LG fst remove unk successfully')
            remove_disambig_symbol(self.graph,
                                   self.lexicon_builder.disambig_ids)
            logging.info('LG fst remove disambig  successfully')

        self.graph.arcsort(sort_type='ilabel')
        self.graph.write(graph_file)
        logging.info('write graph successfully')
        return self.graph
Ejemplo n.º 6
0
def decode(input_fst, model):
    res = fst.compose(input_fst, model)
    res = fst.shortestpath(res)
    return res