def to_wfa(self): trs = [] for _, tr_dest in self._trans.items(): for sym, dst in tr_dest.items(): if isinstance(dst, set): for tr in dst: trs.append(core_wfa.Transition(tr.src, tr.dest, \ tr.symbol, tr.weight)) else: trs.append(core_wfa.Transition(dst.src, dst.dest, \ dst.symbol, dst.weight)) return core_wfa_export.CoreWFAExport(trs, self._fin, self._ini)
def _merge_states(self, partition): """Merge states according to a partition on a set of states and create a new automaton. Return: NFA (with merged states and modified transitions according to a partitioning). Keyword arguments: partition -- Set([Frozenset([State])]) partitioning on a set of states. """ new_transitions = set([]) state_map = {} for part in iter(partition): for st in iter(part): state_map[st] = part for trans in self.get_transitions(): add_trans = core_wfa.Transition(state_map[trans.src], state_map[trans.dest], trans.symbol, trans.weight) new_transitions.add(add_trans) new_finals = {} for key, _ in self.get_finals().iteritems(): new_finals[state_map[key]] = 1.0 new_initial = {} ini = self.get_starts().keys()[0] new_initial[state_map[ini]] = 1.0 return NFA(list(new_transitions), new_finals, new_initial, self.get_alphabet())
def _convert_to_pa(self, root, reduced=False): self._pa = core_wfa_export.CoreWFAExport() self._pa.set_starts({root: 1.0}) self._tree_to_pa(root, set([root]), reduced) for pred, state in self._sinks.iteritems(): un_weight = 1.0 / (len(pred.symbols) + 1.0) self._pa.get_finals()[state] = un_weight for symbol in list(pred.symbols): self._pa.get_transitions().append( core_wfa.Transition(state, state, symbol, un_weight)) self._pa = self._pa.get_trim_automaton() self._pa.__class__ = core_wfa_export.CoreWFAExport
def _create_path(self, root, pref, string): if len(string) == 0: root.value += 1 root.freq += 1 self._nodes[root] += 1 return new = tree.Tree(0, {}) new.name = pref self._nodes[new] = 0 trans = core_wfa.Transition(root, new, ord(string[0]), 1) root.transitions[ord(string[0])] = trans root.freq += 1 self._edges.append(trans) self._create_path(new, pref + string[1:2], string[1:])
def get_dfa(self): """Get the deterministic version of the finite automaton. Implemented using the powerset construction. Return: NFA (but deterministic) Note: States of the returned NFA are frozenset([State]) """ initial = frozenset(super(NFA, self).get_starts().keys()) remaining = deque([initial]) completed = set() finals = frozenset(super(NFA, self).get_finals().keys()) tr_dict = super(NFA, self).get_dictionary_transitions() new_transitions = [] new_finals = dict() state_dict_orig = dict() for symbol in super(NFA, self).get_alphabet(): state_dict_orig[symbol] = set() while remaining: act = remaining.popleft() completed.add(act) if (act & finals) != frozenset(): new_finals[act] = 1.0 state_dict = copy.deepcopy(state_dict_orig) for state in act: for transition in tr_dict[state]: state_dict[transition.symbol].add(transition.dest) for symbol, state in state_dict.iteritems(): state = frozenset(state) if state == frozenset(): continue new_transitions.append(core_wfa.Transition(act, state, symbol, 1.0)) if (state not in completed) and (state not in remaining): remaining.append(state) return NFA(new_transitions, new_finals, {initial: 1.0}, super(NFA, self).get_alphabet())
def _tree_to_pa(self, node, visited, reduced): if node.data != self._predicates.get_top(): return if node.value > 0: self._pa.get_finals()[node] = float(node.value) / node.freq assert len(node.transitions) != 0, "TreeToPA -- incomplete tree" for trans in node.transitions: trans.weight = float(trans.weight) / trans.src.freq if reduced and (trans.dest.data != self._predicates.get_top()): self._pa.get_transitions().append( core_wfa.Transition(trans.src, self._sinks[trans.dest.data], trans.symbol, trans.weight)) else: self._pa.get_transitions().append(trans) if trans.dest not in visited: visited.add(trans.dest) self._tree_to_pa(trans.dest, visited, reduced)
def add_selfloop(self, states): """Add self-loops containing all symbols from an alphabet to states in states. Keyword arguments: states -- Set of states where the self-loops are added. """ alphabet = super(NFA, self).get_alphabet() transitions = super(NFA, self).get_transitions() finals = super(NFA, self).get_finals() new_transitions = [] for transition in transitions: if transition.src not in states: new_transitions.append(transition) for state in list(states): for symbol in alphabet: new_transitions.append(core_wfa.Transition(state, state, symbol, 1.0)) finals[state] = 1.0 super(NFA, self).set_transitions(new_transitions) super(NFA, self).set_finals(finals)
def process_self_loop_state_approx(self, state, sparse=False): """Compute approximate state labels of the PA for a state state of the NFA. It is assumed that the state state has self-loops (transitions from predecessors are ignored). Keyword arguments: state -- State of the NFA. """ loop_transitions = [] lang_aggr = dict() lang_weight = 0.0 for sym in self._nfa.get_alphabet(): loop_transitions.append(core_wfa.Transition( state, state, sym, 1.0)) loop_nfa = nfa.NFA(loop_transitions, {state: 1.0}, {state: 1.0}) loop_nfa.rename_states() pa_ini_states = self._reachable_states[state] pa_copy = copy.copy(self._pa) pa_copy.set_starts(pa_ini_states) spa = pa_copy.product(loop_nfa) spa = spa.get_trim_automaton() spa.rename_states() spa.__class__ = matrix_wfa.MatrixWFA #Get initial and final vectors and compute the transition closure closure = spa.compute_transition_closure(CLOSURE_MODE, sparse, ITERATIONS) wfa_wrap = WFAReachabilityWrap(spa, pa_copy.get_initial_vector(sparse), spa.get_final_ones(sparse), closure) lang_weight, lang_aggr = self._get_pa_states_reachability( wfa_wrap, lang_aggr, lang_weight) self._reachable_states[state] = lang_aggr self._language_sum[state] = lang_weight
def _add_transitions(src, dest, symbols): for sym in list(symbols): src.transitions.append(core_wfa.Transition(src, dest, sym, 0))
def get_unambiguous_nfa(self, max_states=None): """Convert general NFA into UFA. Algorithm from article Mohri: A Disambiguation Algorithm for Finite Automata and Functional Transducers. The resulting UFA can be exponentialy more succinct than input NFA. Return instance of NFA. """ #TODO: Add support for multiple initial states if len(super(NFA, self).get_starts()) != 1: raise NFAOperationException("Only NFA with a single initial state can be converted unambiguous automaton.") queue = deque([]) #queue = set([]) q_prime = set() finals = super(NFA, self).get_finals().keys() b = super(NFA, self).product(self) b = b.get_trim_automaton() b_states = b.get_states() initial_state = super(NFA, self).get_starts().keys()[0] s = frozenset([initial_state]) initial = (initial_state, s) queue.append(initial) q_prime.add(initial) num_states = 1 finals_set = set([]) relation = set([(initial, initial)]) relation_dict = {} relation_dict[initial] = set([initial]) new_transitions = set() tr_dict = super(NFA, self).get_dictionary_transitions() new_tr_dic = {} while queue: p, s = queue.popleft() if p in finals: is_final = True for item in iter(relation_dict.get((p,s), [])): #aux.get_related(relation, (p, s)): if item in finals_set: is_final = False break if is_final: finals_set.add((p, s)) for transition in tr_dict[p]: delta = [] for state in list(s): for state_tr in tr_dict[state]: if state_tr.symbol == transition.symbol: delta.append(state_tr.dest) t_set = set() for r in delta: if (transition.dest, r) in b_states: t_set.add(r) t_set = frozenset(t_set) cont = True for item in iter(relation_dict.get((p,s), [])): #aux.get_related(relation, (p, s)): if (item, transition.symbol, (transition.dest, t_set)) in new_transitions: cont = False break if cont: if (transition.dest, t_set) not in q_prime: q_prime.add((transition.dest, t_set)) queue.append((transition.dest, t_set)) num_states += 1 if (max_states is not None) and (num_states > max_states): return None trans_item = ((p, s), transition.symbol, (transition.dest, t_set)) new_transitions.add(trans_item) if trans_item[0] not in new_tr_dic: new_tr_dic[trans_item[0]] = [] new_tr_dic[trans_item[0]].append(trans_item) tmp = list(relation_dict.get((p,s), [])) for item in tmp: #aux.get_related(relation, (p, s)): for tr_prime in new_tr_dic.get(item, []): #iter(new_transitions): if (tr_prime[0] == item) and (tr_prime[1] == transition.symbol): relation.add(((transition.dest, t_set), tr_prime[2])) relation.add((tr_prime[2], (transition.dest, t_set))) try: relation_dict[tr_prime[2]].add((transition.dest, t_set)) except KeyError: relation_dict[tr_prime[2]] = set([(transition.dest, t_set)]) try: relation_dict[(transition.dest, t_set)].add(tr_prime[2]) except KeyError: relation_dict[(transition.dest, t_set)] = set([tr_prime[2]]) transitions = [] finals = dict() for item in list(new_transitions): transitions.append(core_wfa.Transition(item[0], item[2], item[1], 1.0)) for fin in list(finals_set): finals[fin] = 1.0 alphabet = self.get_alphabet() return NFA(transitions, finals, {initial: 1.0}, alphabet)