def get_all_outputs(self): transducer = self.get_transducer() transducer_symbol_table = SegmentTable().transducer_symbol_table outputs = list() for path in transducer.paths(): output = "" for i in path: symbol = transducer_symbol_table.find(i.olabel) if symbol != u"\u03b5" and symbol != MORPHEME_BOUNDARY and symbol != WORD_BOUNDARY: output += symbol outputs.append(output) return outputs
def get_all_outputs(self, with_noise=True): transducer = self.get_transducer(with_noise=with_noise) if configurations["MINIMIZE_TRANSDUCER"]: transducer = self.minimize_transducer(transducer) transducer_symbol_table = SegmentTable().transducer_symbol_table outputs = list() for path in transducer.paths(): output = "" for i in path: symbol = transducer_symbol_table.find(i.olabel) if symbol != u"\u03b5" and symbol != MORPHEME_BOUNDARY and symbol != WORD_BOUNDARY: output += symbol outputs.append(output) return outputs
def get_transducer_outputs(transducer, limit=float("inf")): transducer_symbol_table = SegmentTable().transducer_symbol_table outputs = list() counter = 0 for path in transducer.paths(): output = "" for arc in path: symbol = transducer_symbol_table.find(arc.olabel) if symbol != u"\u03b5": output += symbol outputs.append(output) counter += 1 if counter > limit: break return outputs
def get_from_pyfst_transducer(cls, transducer): transducer_symbol_table = SegmentTable().transducer_symbol_table nfa = ParsingNFA() nfa.final_states = list() arcs_dict = dict() probabilities = dict() num_states = len(list(transducer.states)) transition_matrix = np.ones( (num_states, num_states)) * NO_TRANSITION_IDX for state in transducer: m = re.match( r".*#(\w*).*", str(state) ) # get sate number from the string: "<StdState #x with y arcs>" nfa_state1 = m.group(1) if state.initial: nfa.initial_state = nfa_state1 if state.final: nfa.final_states.append(nfa_state1) for arc in state: nfa_state2 = str(arc.nextstate) output_symbol = transducer_symbol_table.find(arc.olabel) if output_symbol == u"\u03b5": output_symbol = NULL_SEGMENT if nfa_state1 not in arcs_dict: arcs_dict[nfa_state1] = {} probabilities[nfa_state1] = [] if output_symbol not in arcs_dict[nfa_state1]: arcs_dict[nfa_state1][output_symbol] = [] arcs_dict[nfa_state1][output_symbol].append(nfa_state2) probabilities[nfa_state1].append((output_symbol, nfa_state2)) segment_idx = NULL_SEGMENT_IDX if output_symbol == NULL_SEGMENT else arc.olabel transition_matrix[int(nfa_state1), int(nfa_state2)] = segment_idx nfa.arcs_dict = arcs_dict nfa.probabilities = probabilities nfa.transition_matrix = transition_matrix return nfa
def pyfst_to_dfa(transducer, alphabet): transducer_symbol_table = SegmentTable().transducer_symbol_table nfa = NFA() nfa.Sigma = alphabet delta = dict() States = list() nfa.Initial = set() for state in transducer: m = re.match(r".*#(\w*).*", str(state)) # get sate number from the string: "<StdState #x with y arcs>" nfa_state1_name = m.group(1) States.append(nfa_state1_name) nfa_state1 = States.index(nfa_state1_name) if state.initial: nfa.Initial.add(nfa_state1) if state.final: nfa.Final.add(nfa_state1) for state in transducer: m = re.match(r".*#(\w*).*", str(state)) nfa_state1_name = m.group(1) nfa_state1 = States.index(nfa_state1_name) for arc in state: nfa_state2 = States.index(str(arc.nextstate)) output_symbol = transducer_symbol_table.find(arc.olabel) if output_symbol == u"\u03b5": output_symbol = FAdo.common.Epsilon if nfa_state1 not in delta: delta[nfa_state1] = dict() if output_symbol not in delta[nfa_state1]: delta[nfa_state1][output_symbol] = set() delta[nfa_state1][output_symbol].add(nfa_state2) nfa.delta = delta nfa.States = States dfa = nfa.toDFA() return dfa