Python HfstTransducer Exemples, hfst.HfstTransducer Python Exemples

Exemple #1

0

Afficher le fichier

 def _compile_fst(self) -> None:
     rules_tr = FST.load_transducer(shared.filenames['rules-tr'])
     self.inv_rules_tr = hfst.HfstTransducer(rules_tr)
     self.inv_rules_tr.invert()
     logging.getLogger('main').info('Building lexicon transducer...')
     lexicon_tr = FST.load_transducer(\
                    shared.filenames['lexicon-tr'])
     self.fst = hfst.HfstTransducer(lexicon_tr)
     logging.getLogger('main').info('Composing with rules...')
     self.fst.compose(rules_tr)
     self.fst.minimize()
     self.fst.invert()
     self.fst.convert(hfst.ImplementationType.HFST_OLW_TYPE)

Exemple #2

0

Afficher le fichier

 def getGenerator(self):
     if not self.transducer:
         self.load_filename(self.fsa)
     generator = hfst.HfstTransducer(self.transducer)
     generator.remove_epsilons()
     generator.lookup_optimize()
     self.generator = generator

Exemple #3

0

Afficher le fichier

def seq_to_transducer(alignment, weight=0.0, type=None, alphabet=None):
    if type is None:
        type=shared.config['FST'].getint('transducer_type')
    tr = hfst.HfstBasicTransducer()
    if alphabet is None:
        alphabet = ()
    alphabet = tuple(sorted(set(alphabet) | set(sum(alignment, ()))))
    tr.add_symbols_to_alphabet(alphabet)
    last_state_id = 0
    for (x, y) in alignment:
        state_id = tr.add_state()
        if (x, y) == (hfst.IDENTITY, hfst.IDENTITY):
            tr.add_transition(last_state_id, 
                              hfst.HfstBasicTransition(state_id,
                                                          hfst.IDENTITY,
                                                          hfst.IDENTITY,
                                                          0.0))
            tr.add_transition(state_id, 
                              hfst.HfstBasicTransition(state_id,
                                                          hfst.IDENTITY,
                                                          hfst.IDENTITY,
                                                          0.0))
            for a in tr.get_alphabet():
                if not a.startswith('@_'):
                    tr.add_transition(last_state_id, hfst.HfstBasicTransition(state_id, a, a, 0.0))
                    tr.add_transition(state_id, hfst.HfstBasicTransition(state_id, a, a, 0.0))
        else:
            tr.add_transition(last_state_id, 
                              hfst.HfstBasicTransition(state_id, x, y, 0.0))
        last_state_id = state_id
    tr.set_final_weight(last_state_id, weight)
    return hfst.HfstTransducer(tr, type)

Exemple #4

0

Afficher le fichier

Fichier : fsa.py Projet : meghavarshini/ml-phonetic-analyser

 def getAnalyser(self):
     if not self.transducer:
         self.load_filename(self.fsa)
     analyser = hfst.HfstTransducer(self.transducer)
     analyser.remove_epsilons()
     analyser.lookup_optimize()
     self.analyser = analyser

Exemple #5

0

Afficher le fichier

 def apply(self, line):
     tok = hfst.HfstTokenizer()
     Transducer = hfst.HfstTransducer(self.fallbackTransducer)
     Transducer.push_weights_to_end()
     words = hfst.tokenized_fst(tok.tokenize(line))
     words.compose(Transducer)
     words.minimize()
     return words

Exemple #6

0

Afficher le fichier

 def load(filename: str, lexicon, model, **kwargs) -> None:
     kwargs['compile'] = False
     analyzer = Analyzer(lexicon, model, **kwargs)
     analyzer.fst = FST.load_transducer(filename)
     rules_tr = FST.load_transducer(shared.filenames['rules-tr'])
     analyzer.inv_rules_tr = hfst.HfstTransducer(rules_tr)
     analyzer.inv_rules_tr.invert()
     return analyzer

Exemple #7

0

Afficher le fichier

def read_examples(filename="test.pstr", build_fsts=True):
    """Reads the examples from the file whose name is 'filename'.
    
    The file must contain one example per line and each line consists of
    a space separated sequence of pair-symbols.  The examples are processed into 
    """
    if build_fsts:
        import hfst
        examples_bfst = hfst.HfstBasicTransducer()
    exfile = open(filename, "r")
    for line_nl in exfile:
        line = line_nl.strip()
        if not line or line.startswith("!"):
            continue
        pairsym_lst = re.split("\s+", line)
        symbol_pair_lst = [
            cfg.pairsym2sympair(pairsym) for pairsym in pairsym_lst
        ]
        # print("symbol_pair_lst:", symbol_pair_lst) ##
        pair_symbol_str = " ".join([
            cfg.sympair2pairsym(insym, outsym)
            for insym, outsym in symbol_pair_lst
        ])
        # print("pair_symbol_lst:", pair_symbol_lst) ##
        cfg.example_lst.append(pair_symbol_str)
        cfg.example_set.add(pair_symbol_str)  # spaces normalized
        #LINE_FST = hfst.tokenized_fst(symbol_pair_lst)
        # twbt.printfst(LINE_FST, True) ##
        if build_fsts:
            examples_bfst.disjunct(symbol_pair_lst, 0)
        for insym, outsym in symbol_pair_lst:
            cfg.symbol_pair_set.add((insym, outsym))
    exfile.close()
    if cfg.verbosity >= 30:
        print("List of examples:", cfg.example_lst)
        print("List of alphabet symbol pairs:", sorted(cfg.symbol_pair_set))
    if build_fsts:
        cfg.examples_fst = hfst.HfstTransducer(examples_bfst)
        cfg.examples_fst.set_name(filename)
        cfg.examples_fst.minimize()
        if cfg.verbosity >= 30:
            twbt.ppfst(cfg.examples_fst, False,
                       title="Example file as FST")  ##
    for insym, outsym in cfg.symbol_pair_set:
        cfg.input_symbol_set.add(insym)
        cfg.output_symbol_set.add(outsym)
    for insym, outsym in cfg.symbol_pair_set:
        pair_symbol = cfg.sympair2pairsym(insym, outsym)
        cfg.pair_symbol_set.add(pair_symbol)
    if build_fsts:
        pair_symbol_lst = [
            insym + ':' + outsym for insym, outsym in cfg.symbol_pair_set
        ]
        pair_symbol_str = " ".join(sorted(pair_symbol_lst))
        # print("symbol pairs:", pair_symbol_str) ##
        cfg.examples_fst.set_property("x-pair_symbols", pair_symbol_str)
    return

Exemple #8

0

Afficher le fichier

Fichier : twbt.py Projet : koskenni/pytwolc

def ppdef(XRC, name, displayed_formula):
    FST = XRC.compile(name)
    BFST = hfst.HfstBasicTransducer(FST)
    FST = hfst.HfstTransducer(BFST)
    FST.set_name(name + " = " + displayed_formula)
    ppfst(FST, True)
    #alph = [pairname(insym, outsym) for insym, outsym
    #        in FST.get_transition_pairs()]
    #print(name, '=',', '.join(sorted(alph)))
    return

Exemple #9

0

Afficher le fichier

def tag_acceptor(tag, alphabet):
    tr = hfst.HfstBasicTransducer()
    for c in alphabet:
        if shared.compiled_patterns['symbol'].match(c):
            tr.add_transition(0,
                hfst.HfstBasicTransition(0, c, c, 0.0))
    tr.set_final_weight(0, 0.0)
    tr_c = hfst.HfstTransducer(tr)
    tr_c.concatenate(seq_to_transducer(tuple(zip(tag, tag))))
    return tr_c

Exemple #10

0

Afficher le fichier

def create_new_words_acceptor_if_not_exists(filename, analyzer, lexicon):
    if not file_exists(filename):
        new_words_acceptor = hfst.HfstTransducer(analyzer.fst)
        new_words_acceptor.convert(
            hfst.ImplementationType.TROPICAL_OPENFST_TYPE)
        new_words_acceptor.input_project()
        new_words_acceptor.minimize()
        new_words_acceptor.subtract(lexicon.to_fst())
        new_words_acceptor.minimize()
        FST.save_transducer(new_words_acceptor, filename)

Exemple #11

0

Afficher le fichier

Fichier : twexamp.py Projet : koskenni/pytwolc

def pairs_to_fst(pair_set):
    """Converts a seq of symbol pairs into a fst that accepts any of them
"""
    pairs_bfst = hfst.HfstBasicTransducer()
    for pair in pair_set:
        pairs_bfst.disjunct((pair, ), 0)  # arg in tokenized format
    fst = hfst.HfstTransducer(pairs_bfst)
    fst.remove_epsilons()
    fst.minimize()
    return fst

Exemple #12

0

Afficher le fichier

Fichier : twbt.py Projet : koskenni/pytwolc

def fsa_to_fst(FSA, separator='^'):
    BFSA = hfst.HfstBasicTransducer(FSA)
    sym_pairs = BFSA.get_transition_pairs()
    dic = {}
    for sym_pair in sym_pairs:
        insym, outsym = sym_pair
        in_sym, out_sym = outsym.split(separator)
        dic[sym_pair] = (in_sym, out_sym)
    BFSA.substitute(dic)
    FST = hfst.HfstTransducer(BFSA)
    return FST

Exemple #13

0

Afficher le fichier

Fichier : fs.py Projet : koskenni/pytwolc

def string_to_fsa(grapheme_string):
    """Return a FSA which accepts the sequence of graphemes in the string"""
    bfsa = hfst.HfstBasicTransducer()
    grapheme_list = list(grapheme.graphemes(grapheme_string))
    string_pair_path = tuple(zip(grapheme_list, grapheme_list))
    if cfg.verbosity >= 10:
        print(grapheme_list)
        print(string_pair_path)
    bfsa.disjunct(string_pair_path, 0)
    fsa = hfst.HfstTransducer(bfsa)
    return (fsa)

Exemple #14

0

Afficher le fichier

def fst_to_fsa(FST):
    global mphon_separator
    FB = hfst.HfstBasicTransducer(FST)
    sym_pairs = FB.get_transition_pairs()
    dict = {}
    for sym_pair in sym_pairs:
        in_sym, out_sym = sym_pair
        joint_sym = in_sym + mphon_separator + out_sym
        dict[sym_pair] = (joint_sym, joint_sym)
    FB.substitute(dict)
    RES = hfst.HfstTransducer(FB)
    return RES

Exemple #15

0

Afficher le fichier

def rootgen_transducer(rootdist):
    # create an automaton for word generation
    if shared.config['Features'].getint('rootdist_n') != 1:
        raise NotImplementedError('Not implemented for rootdist_n != 1')
    weights = rootdist.features[0].log_probs
    tr = hfst.HfstBasicTransducer()
    tr.set_final_weight(0, weights[('#',)])
    for char, weight in weights.items():
        if char != ('#',):
            tr.add_transition(0, 
                hfst.HfstBasicTransition(0, char[0], char[0], weight))
    return hfst.HfstTransducer(tr)

Exemple #16

0

Afficher le fichier

Fichier : twbt.py Projet : koskenni/pytwolc

def fst_to_fsa(FST, separator='^'):
    """Converts FST into an FSA by joining input and output symbols with separator"""
    FB = hfst.HfstBasicTransducer(FST)
    sym_pairs = FB.get_transition_pairs()
    dict = {}
    for sym_pair in sym_pairs:
        in_sym, out_sym = sym_pair
        joint_sym = in_sym + separator + out_sym
        dict[sym_pair] = (joint_sym, joint_sym)
    FB.substitute(dict)
    FSA = hfst.HfstTransducer(FB)
    # print("fst_to_fsa:\n", FSA) ##
    return FSA

Exemple #17

0

Afficher le fichier

Fichier : samplers.py Projet : maciejjan/morle

    def _compute_leaf_prob(self):
        logging.getLogger('main').info('Computing leaf probabilities...')
        self.leaf_prob = np.ones((len(self.lexicon), len(self.tagset)),
                                 dtype=np.float64)
        edge_set = EdgeSet(lexicon)

        def _empty_edge_set(edge_set):
            lexicon = edge_set.lexicon
            n = len(edge_set)
            probs = 1 - self.model.edges_prob(edge_set)
            for e_id, edge in enumerate(edge_set):
                word = lexicon.get_by_symstr(''.join(edge.source.word))[0]
                w_id = lexicon.get_id(word)
                t_id = self.tag_idx[edge.source.tag]
                self.leaf_prob[w_id, t_id] *= probs[e_id]
            edge_set = EdgeSet(lexicon)
            print(n)
            return edge_set

        lexicon_tr = self.lexicon.to_fst()
        lexicon_tr.concatenate(FST.generator(self.tagset))
        rules_tr = self.model.rule_set.to_fst()
        tr = hfst.HfstTransducer(lexicon_tr)
        tr.compose(rules_tr)
        tr.determinize()
        tr.minimize()
        FST.save_transducer(tr, 'tr.fsm')

        tr_path = full_path('tr.fsm')
        cmd = ['hfst-fst2strings', tr_path]
        p = subprocess.Popen(cmd,
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.DEVNULL,
                             universal_newlines=True,
                             bufsize=1)
        while True:
            line = p.stdout.readline().strip()
            if line:
                w1, w2 = line.split(':')
                n1 = LexiconEntry(w1)
                n2 = LexiconEntry(w2)
                rules = extract_all_rules(n1, n2)
                for rule in rules:
                    if rule in rule_set:
                        edge_set.add(GraphEdge(n1, n2, rule))
            else:
                break
            if len(edge_set) > 300000:
                edge_set = _empty_edge_set(edge_set)
        edge_set = _empty_edge_set(edge_set)

Exemple #18

0

Afficher le fichier

def tag_absorber(alphabet):
    tr = hfst.HfstBasicTransducer()
    for c in alphabet:
        if shared.compiled_patterns['symbol'].match(c):
            tr.add_transition(0,
                hfst.HfstBasicTransition(0, c, c, 0.0))
        elif shared.compiled_patterns['tag'].match(c):
            tr.add_transition(0,
                hfst.HfstBasicTransition(1, c, hfst.EPSILON, 0.0))
            tr.add_transition(1,
                hfst.HfstBasicTransition(1, c, hfst.EPSILON, 0.0))
    tr.set_final_weight(0, 0.0)
    tr.set_final_weight(1, 0.0)
    return hfst.HfstTransducer(tr)

Exemple #19

0

Afficher le fichier

Fichier : alergia.py Projet : maciejjan/morle

 def to_hfst(self) -> hfst.HfstTransducer:
     result = hfst.HfstBasicTransducer()
     for state in self.states.values():
         total_freq = state.get_total_freq()
         for t in state.transitions.values():
             weight = -math.log(t.freq / total_freq)
             result.add_transition(
                 state.id,
                 hfst.HfstBasicTransition(t.target_state_id, t.symbol,
                                          t.symbol, weight))
         if state.final_freq > 0:
             final_weight = -math.log(state.final_freq / total_freq)
             result.set_final_weight(state.id, final_weight)
     return hfst.HfstTransducer(result)

Exemple #20

0

Afficher le fichier

def set_weights(fsa):
    """Sets weights to transitions using mphon_weight()
    """
    bfsa = hfst.HfstBasicTransducer(fsa)
    for state in bfsa.states():
        for arc in bfsa.transitions(state):
            tostate = arc.get_target_state()
            insym = arc.get_input_symbol()
            outsym = arc.get_output_symbol()
            w = mphon_weight(insym)
            arc.set_weight(w)
    weighted_fsa = hfst.HfstTransducer(bfsa)
    if cfg.verbosity >=20:
        print("set_weights:\n", weighted_fsa)
    return weighted_fsa

Exemple #21

0

Afficher le fichier

def delenv(alphabet, max_affix_size, max_infix_size, max_infix_slots,
           deletion_symbol='@_DEL_@', deletion_slot_symbol='@_DELSLOT_@'):
    
    def add_deletion_chain(tr, alphabet, state, length):
        tr.add_transition(state,
                          hfst.HfstBasicTransition(
                              state+1, hfst.EPSILON, deletion_slot_symbol, 0.0))
        for i in range(1, length+1):
            for c in alphabet:
                if c not in (hfst.EPSILON, hfst.IDENTITY, hfst.UNKNOWN):
                    tr.add_transition(state+i,
                                      hfst.HfstBasicTransition(
                                          state+i+1, 
                                          c, deletion_symbol, 0.0))
        last_state = state + length + 1
        for i in range(length+1):
            tr.add_transition(state+i,
                              hfst.HfstBasicTransition(
                                  last_state,
                                  hfst.EPSILON, hfst.EPSILON, 0.0))
        return last_state

    def add_identity_loop(tr, alphabet, state):
        for c in alphabet:
            if c not in (hfst.EPSILON, hfst.IDENTITY, hfst.UNKNOWN):
                tr.add_transition(state,
                                  hfst.HfstBasicTransition(state+1, c, c, 0.0))
                tr.add_transition(state+1,
                                  hfst.HfstBasicTransition(state+1, c, c, 0.0))
        return state+1

    tr = hfst.HfstBasicTransducer()
    # prefix
    state = add_deletion_chain(tr, alphabet, 0, max_affix_size)
    state = add_identity_loop(tr, alphabet, state)
    # infixes
    for i in range(max_infix_slots):
        state = add_deletion_chain(tr, alphabet, state, max_infix_size)
        state = add_identity_loop(tr, alphabet, state)
    # suffix
    state = add_deletion_chain(tr, alphabet, state, max_affix_size)
    tr.set_final_weight(state, 0.0)
    tr_c = hfst.HfstTransducer(tr)
    tr_c.remove_epsilons()
    tr_c.minimize()
    return tr_c

Exemple #22

0

Afficher le fichier

Fichier : twbt.py Projet : koskenni/pytwolc

def expanded_examples(TR, insyms, symbol_pair_set):
    # print("symbol_pair_set =", symbol_pair_set) ##
    BT = hfst.HfstBasicTransducer(TR)
    # print("BT.get_transition_pairs() =", BT.get_transition_pairs()) ##
    for insym in insyms:
        lst = [(ins, outs)
               for ins, outs
               in symbol_pair_set if ins == insym]
        for sympair in lst:
            # print("sympair, lst =", sympair, lst) ##
            BT.substitute(sympair, tuple(lst))
    T = hfst.HfstTransducer(BT)
    T.set_name("negative and positive together")
    T.minimize()
    # ppfst(T, True) ##
    #T.minus(TR)
    #T.minimize()
    return(T)

Exemple #23

0

Afficher le fichier

Fichier : possible-edges.py Projet : maciejjan/morle

def compute_possible_edges(lexicon: Lexicon, rule_set: RuleSet) -> EdgeSet:
    # build the transducer
    lexicon_tr = lexicon.to_fst()
    tag_seqs = extract_tag_symbols_from_rules(rule_set)
    if tag_seqs:
        lexicon_tr.concatenate(FST.generator(tag_seqs))
    rules_tr = rule_set.to_fst()
    tr = hfst.HfstTransducer(lexicon_tr)
    tr.compose(rules_tr)
    tr.determinize()
    tr.minimize()
    lexicon_tr.invert()
    tr.compose(lexicon_tr)
    tr.determinize()
    tr.minimize()
    FST.save_transducer(tr, 'tr.fsm')

    tr_path = full_path('tr.fsm')
    cmd = ['hfst-fst2strings', tr_path]
    p = subprocess.Popen(cmd,
                         stdin=subprocess.PIPE,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.DEVNULL,
                         universal_newlines=True,
                         bufsize=1)
    edge_set = EdgeSet(lexicon)
    while True:
        line = p.stdout.readline().strip()
        if line:
            w1, w2 = line.split(':')
            w1_without_tag = re.sub(shared.compiled_patterns['tag'], '', w1)
            w2_without_tag = re.sub(shared.compiled_patterns['tag'], '', w2)
            if w1_without_tag != w2_without_tag:
                n1 = LexiconEntry(w1)
                n2 = LexiconEntry(w2)
                rules = algorithms.align.extract_all_rules(n1, n2)
                for rule in rules:
                    if rule in rule_set:
                        n1_wt = lexicon.get_by_symstr(w1_without_tag)[0]
                        n2_wt = lexicon.get_by_symstr(w2_without_tag)[0]
                        edge_set.add(GraphEdge(n1_wt, n2_wt, rule))
        else:
            break
    return edge_set

Exemple #24

0

Afficher le fichier

def contexts_to_condition(*contexts):
    """A list of contexsts is converted into a condition.
    
    Each context in the list is converted separately and
    the result is the union of these and is returned as an FST.
    """
    global pistar_fst
    result_fst = hfst.HfstTransducer()
    for leftc, rightc in contexts:
        #twbt.ppfst(leftc, title="leftc") ##
        #twbt.ppfst(rightc, title="rightc") ##
        context_fst = context_to_condition(leftc, rightc)
        result_fst.disjunct(context_fst)
        result_fst.minimize()
        leftc_name = leftc.get_name()
        rightc_name = rightc.get_name()
        result_fst.set_name(leftc_name + "_" + rightc_name)
        #twbt.ppfst(result_fst, title="result") ##
    return (result_fst)

Exemple #25

0

Afficher le fichier

def remove_bad_transitions(fsa):
    """Copy the FSA excluding transitions with consonants and vowels"""
    old_bfsa = hfst.HfstBasicTransducer(fsa)
    new_bfsa = hfst.HfstBasicTransducer()
    for state in old_bfsa.states():
        new_bfsa.add_state(state)
        if old_bfsa.is_final_state(state):
            new_bfsa.set_final_weight(state, 0.0)
        for arc in old_bfsa.transitions(state):
            in_sym = arc.get_input_symbol()
            if mphon_is_valid(in_sym):
                target_st = arc.get_target_state()
                new_bfsa.add_transition(state, target_st, in_sym, in_sym, 0)
    result_fsa = hfst.HfstTransducer(new_bfsa)
    result_fsa.minimize()
    if cfg.verbosity >= 20:
        print("remove_bad_transitions:")
        print(result_fsa)
    return result_fsa

Exemple #26

0

Afficher le fichier

def delfilter(alphabet, length, deletion_symbol='@_DEL_@',
              deletion_slot_symbol='@_DELSLOT_@'):
    tr = hfst.HfstBasicTransducer()
    tr.set_final_weight(0, 0.0)
    tr.add_transition(0,
                      hfst.HfstBasicTransition(
                          0, deletion_slot_symbol, deletion_slot_symbol, 0.0))
    printable_chars = set(alphabet) -\
                      { hfst.EPSILON, hfst.IDENTITY, hfst.UNKNOWN,
                        deletion_symbol }
    for i in range(length):
        for c in printable_chars:
            tr.add_transition(i,
                              hfst.HfstBasicTransition(i+1, c, c, 0.0))
        tr.add_transition(i+1,
                          hfst.HfstBasicTransition(
                              i, deletion_symbol, hfst.EPSILON, 0.0))
        tr.add_transition(i+1,
                          hfst.HfstBasicTransition(
                              i+1, deletion_slot_symbol, deletion_slot_symbol, 0.0))
        tr.set_final_weight(i+1, 0.0)
    first_negative_state = length+1
    tr.add_transition(0, hfst.HfstBasicTransition(
                             first_negative_state, deletion_symbol,
                             hfst.EPSILON, 0.0))
    for c in printable_chars:
        tr.add_transition(first_negative_state, 
                          hfst.HfstBasicTransition(0, c, c, 0.0))
    for i in range(length-1):
        tr.add_transition(first_negative_state+i,
                          hfst.HfstBasicTransition(
                              first_negative_state+i+1, 
                              deletion_symbol, hfst.EPSILON, 0.0))
        tr.add_transition(first_negative_state+i+1,
                          hfst.HfstBasicTransition(
                              first_negative_state+i+1, deletion_slot_symbol, deletion_slot_symbol, 0.0))
        for c in printable_chars:
            tr.add_transition(first_negative_state+i+1,
                              hfst.HfstBasicTransition(
                                  first_negative_state+i, c, c, 0.0))
    tr_c = hfst.HfstTransducer(tr)
    return tr_c

Exemple #27

0

Afficher le fichier

def accum_input_labels(fst, separator=""):
    """Encode, weight and prune a transducer

fst -- transducer to be processed, input labels are strings of alphabet symbols and output labels are single alphabet symbols

separator -- null string or a symbol not part of the alphabet

Returns a transducer where input labels of thrasitions are concatenations of the input label and the output label of the original transition, the weights are according to the weights of the resulting morphophonemes and all transitions with invalid morphophoneme labels are discarded.
"""
    if cfg.verbosity >= 10:
        print("to be accumulated:\n", fst)
    bfst = hfst.HfstBasicTransducer(fst)
    result_bfst = hfst.HfstBasicTransducer()
    for state in bfst.states():
        result_bfst.add_state(state)
        if bfst.is_final_state(state):
            weight = bfst.get_final_weight(state)
            result_bfst.set_final_weight(state, weight)
        for arc in bfst.transitions(state):
            tostate = arc.get_target_state()
            insym = arc.get_input_symbol()
            outsym = arc.get_output_symbol()
            weight = arc.get_weight()
            new_insym = insym + separator + outsym
            if cfg.verbosity >= 25:
                print("arc", state, tostate, insym, outsym, weight)
            if not alphabet.mphon_is_valid(new_insym):
                continue
            new_weight = alphabet.mphon_weight(new_insym)
            result_arc = hfst.HfstBasicTransition(tostate,
                                                  new_insym,
                                                  new_insym,
                                                  new_weight)
            result_bfst.add_transition(state, result_arc)
            if cfg.verbosity >= 25:
                print("after addition of transition:\n", result_bfst)
    result_fst = hfst.HfstTransducer(result_bfst)
    result_fst.minimize()
    if cfg.verbosity >= 10:
        print("accumulated fst:\n", result_fst)
    return result_fst

Exemple #28

0

Afficher le fichier

Fichier : pyhguessify.py Projet : flammie/pyhfst-guesser

def make_guesser(fsa: hfst.HfstTransducer, prefix: bool, suffix: bool,
                 substring: bool, verbose: bool):
    """Make guesser from automaton."""
    if verbose:
        print("Converting...", end=" ")
    guesser = hfst.HfstBasicTransducer(fsa)
    if suffix:
        prefixloop = make_suffix_guesser(guesser, verbose)
        guesser = prefixloop
    elif prefix:
        suffixloop = make_prefix_guesser(guesser, verbose)
        guesser = suffixloop
    # substring guesser can combine with affix guesser
    if substring:
        substringer = make_substring_guesser(guesser, verbose)
        guesser = substringer
    if verbose:
        print("Converting...", end=" ")
    fsa = hfst.HfstTransducer(guesser)
    if verbose:
        print("done!")
    return fsa

Exemple #29

0

Afficher le fichier

Fichier : aligner.py Projet : koskenni/twol

def align_two_words(in_word, out_word, aligner_fst, zero, number):
    w1 = hfst.fst(in_word)
    w1.insert_freely((zero, zero))
    w1.minimize()
    ###print(w1)

    w2 = hfst.fst(out_word)
    w2.insert_freely((zero, zero))
    w2.minimize()
    ###print(w2)

    w3 = hfst.HfstTransducer(w1)
    w3.compose(aligner_fst)
    w3.compose(w2)
    ###print(w1)

    w3.n_best(number)
    w3.minimize()
    ###print(w3)

    raw_paths = w3.extract_paths(output='raw')
    if cfg.verbosity >= 10:
        print("raw_paths:", raw_paths)
    return raw_paths

Exemple #30

0

Afficher le fichier

Fichier : hfst-prune-alphabet.py Projet : cval-c4q/hfst

import hfst

force = False
from sys import argv
if len(argv) > 3:
    raise RuntimeError(
        'Usage: hfst-prune-alphabet.py [-f|--force] [-S|--safe]')
for arg in argv[1:]:
    if arg == '-f' or arg == '--force':
        force = True
    elif arg == '-S' or arg == '--safe':
        force = False
    else:
        raise RuntimeError('unknown option: ' + arg)

istr = hfst.HfstInputStream()
ostr = hfst.HfstOutputStream(type=istr.get_type())

while (not istr.is_eof()):
    tr = istr.read()
    tr = hfst.HfstBasicTransducer(tr)
    tr.prune_alphabet(force)
    tr = hfst.HfstTransducer(tr, istr.get_type())
    tr.write(ostr)
    ostr.flush()

istr.close()
ostr.close()