def create_nfa(final_states_name, productions): alphabet = set() for i in range(ord(' '), ord('~') + 1): alphabet.add(chr(i)) nfa = NFA(alphabet) name_to_state_dict = {} def get_state_by_name(name): if name not in name_to_state_dict: name_to_state_dict[name] = nfa.create_node() return name_to_state_dict[name] for final_state_name in final_states_name: node = get_state_by_name(final_state_name) node.data = {'token': final_state_name} nfa.final_state.add(node) # print 'index: %3d, name: %s' % (node.index, final_state_name) for p in productions: # print p left_state = get_state_by_name(p['left']) if p['epsilon']: nfa.add_transfer(nfa.S, Epsilon, left_state) continue if p['right'] is not None: f = get_state_by_name(p['right']) else: f = nfa.S for i in p['terminate'][:-1]: node = nfa.create_node() nfa.add_transfer(f, i, node) f = node nfa.add_transfer(f, p['terminate'][-1:], left_state) nfa.name_to_state_dict = name_to_state_dict return nfa
def build_concatenation_nfa(first: NFA, second: NFA) -> NFA: for s in first.accepting_states: s.add_epsilon_transition(second.initial_state) return NFA(states=first.states.union(second.states), alphabet=first.alphabet.union(second.alphabet), initial_state=first.initial_state, accepting_states=second.accepting_states)
def build_symbol_nfa(s: str) -> NFA: initial, accepting = State("initial_{symbol}".format(symbol=s)), State( "accepting_{symbol}".format(symbol=s)) initial.add_transition(s, accepting) return NFA(states={initial, accepting}, alphabet={s}, initial_state=initial, accepting_states={accepting})
def build_closure_nfa(source: NFA) -> NFA: accepting = State("accepting_closure") initial = State("initial_closure", epsilon_transitions=[source.initial_state, accepting]) for s in source.accepting_states: s.add_epsilon_transitions([source.initial_state, accepting]) return NFA(states=source.states.union({initial, accepting}), alphabet=source.alphabet, initial_state=initial, accepting_states={accepting})
def build_union_nfa(first: NFA, second: NFA) -> NFA: initial, accepting = State("initial_union", epsilon_transitions=[first.initial_state, second.initial_state]), \ State("accepting_union") for sf, ss in zip(first.accepting_states, second.accepting_states): sf.add_epsilon_transition(accepting) ss.add_epsilon_transition(accepting) return NFA(states=first.states.union(second.states).union( {initial, accepting}), alphabet=first.alphabet.union(second.alphabet), initial_state=initial, accepting_states={accepting})
def test_accepts(self): test_data = { "valid_words": ["ab", "aab"], "invalid_words": ["aba", "abab", "aabaab", "invalid"] } filepath = Path(__file__).parent.joinpath("resources", "nfa.json") nfa = NFA.from_json_file(filepath) for w in test_data["valid_words"]: with self.subTest("Should have validated the word.", w=w): self.assertTrue(nfa.accepts(w)) for w in test_data["invalid_words"]: with self.subTest("Should not have validated the word", w=w): self.assertFalse(nfa.accepts(w))
def getAutomata(self): """ deal with the information collected""" isDeterministic = True if len(self.initials) > 1 or "@epsilon" in self.states: isDeterministic = False else: for s in self.transitions: for c in self.transitions[s]: if len(self.transitions[s][c]) > 1: isDeterministic = False break if not isDeterministic: break if isDeterministic: if "l" in self.eq.keys(): fa = DFCA() fa.setLength = self.eq["l"] else: fa = DFA() else: fa = NFA() for s in self.states: fa.addState(s) fa.setFinal(fa.indexList(self.finals)) if isDeterministic: fa.setInitial(fa.stateIndex(common.uSet(self.initials))) for s1 in self.transitions: for c in self.transitions[s1]: fa.addTransition( fa.stateIndex(s1), c, fa.stateIndex(common.uSet(self.transitions[s1][c]))) else: fa.setInitial(fa.indexList(self.initials)) for s1 in self.transitions: for c in self.transitions[s1]: for s2 in fa.indexList(self.transitions[s1][c]): fa.addTransition(fa.stateIndex(s1), c, s2) return fa
def startNFASemRule(self, lst, context=None): """ :param lst: :param context:""" new = NFA() new.Sigma = self.alphabet while self.states: x = self.states.pop() new.addState(x) while self.initials: x = self.initials.pop() new.addInitial(new.stateIndex(x)) while self.finals: x = self.finals.pop() new.addFinal(new.stateIndex(x)) while self.transitions: (x1, x2, x3) = self.transitions.pop() new.addTransition(new.stateIndex(x1), x2, new.stateIndex(x3)) self.theList.append(new) self.initLocal()
def compile(self, grammar_type="regex"): """ 根据文法类型进行编译, 产生dfa. regex 表示 正则表达式, regular 表示 正规文法 :param grammar: 文法类型 :return: """ if grammar_type == 'regex': nfas = [] for le in self.lexs: # print le nfas.append(Regex.compile_nfa(le[1], extend=True, type=le[0])) nfa = NFA.combine(*nfas) self.lex_dfa = nfa.convert_dfa(copy_meta=["type"]) return elif grammar_type == "regular": """ 本来没有想到会做三型文法解析, 由于parser里也有文法解析.. 此处应该跟那边合并.. """ nfas = [] grammar = defaultdict(list) g_in, g_out = defaultdict(int), defaultdict(int) all_symbol = set() for l_hand, r_hand in self.lexs: l_hand = l_hand[1:-1] r_hands = [[x[1:-1] for x in r.strip().split()] for r in r_hand.split('|')] for hand in r_hands: for h in hand: g_in[h] += 1 all_symbol.add(h) g_out[l_hand] += 1 all_symbol.add(l_hand) grammar[l_hand].extend(r_hands) grammar['limit'] = [[' '], ['\t'], ['\n']] ter, not_ter = [], [] for sym in all_symbol: if g_in[sym] == 0: not_ter.append(sym) if g_out[sym] == 0: ter.append(sym) # print ter, not_ter nfas = [] for token_type in not_ter: nfa = NFA() nfa.start = NFANode(r_name=token_type) end_node = NFANode(type=token_type) end_node.end = True nfa.end = {end_node} vis = {token_type: nfa.start} def get_node(name): if name in vis: return vis[name] vis[name] = NFANode(r_name=name) return vis[name] que = Queue() que.put(token_type) while not que.empty(): t = que.get() node = get_node(t) if node.meta.get('vis', 0) > 0: continue node.meta['vis'] = node.meta.get('vis', 0) + 1 for r_hand in grammar[t]: node.next.setdefault(r_hand[0], set()) if len(r_hand) == 2: node.next[r_hand[0]].add(get_node(r_hand[1])) que.put(r_hand[1]) else: node.next[r_hand[0]].add(end_node) nfas.append(nfa) nfa = NFA.combine(*nfas) self.lex_dfa = nfa.convert_dfa(copy_meta=["type"]) return
def compile_nfa(pattern): """ :param pattern: 正则 :return: NFA """ # print 'compile nfa [%s]' % (pattern, ) assert isinstance(pattern, str) if is_base(pattern): if pattern in Regex.meta_bases: pattern = pattern[1:] nfa = NFA() enode = NFANode() enode.end = True nfa.start.next[pattern] = {enode} nfa.end.add(enode) return nfa for i in range(1, len(pattern)): s1, s2 = pattern[:i], pattern[i+1:] if pattern[i]=='|' and is_regex(s1) and is_regex(s2): nfa1, nfa2 = map(compile_nfa, [s1, s2]) nfa = NFA() nfa.start.next["ep"] = set() nfa.start.next["ep"].update([nfa1.start, nfa2.start]) enode = NFANode() enode.end = True nfa.end.add(enode) for node in nfa1.end | nfa2.end: if "ep" not in node.next: node.next["ep"] = set() node.next["ep"].add(enode) node.end = False nfa1.end, nfa2.end = set(), set() return nfa for i in range(1, len(pattern)): s1, s2 = pattern[:i], pattern[i:] if is_regex(s1) and is_regex(s2): # print 'rs 连接型' nfa1, nfa2 = map(compile_nfa, [s1, s2]) nfa = NFA() snode = nfa.start enode = NFANode() enode.end = True nfa.end = {enode} for node in nfa1.end: node.end = False if "ep" not in node.next: node.next["ep"] = set() node.next["ep"].add(nfa2.start) for node in nfa2.end: node.end = False if "ep" not in node.next: node.next["ep"] = set() node.next["ep"].add(enode) snode.next["ep"] = {nfa1.start} #虽然我觉得nfa.start = {nfa1.start} 也可以 , 还是按照教材把 return nfa if pattern[-1] == '*' and is_regex(pattern[:-1]): # print 'r* 型' nfa0 = compile_nfa(pattern[:-1]) nfa = NFA() snode = nfa.start enode = NFANode() enode.end = True nfa.end.add(enode) snode.next["ep"] = {enode, nfa0.start} for node in nfa0.end: if "ep" not in node.next: node.next["ep"] = set() node.next["ep"].update([nfa0.start, enode]) node.end = False nfa0.end = set() return nfa elif pattern[-1] == '+' and is_regex(pattern[:-1]): # print 'r+型' nfa0 = compile_nfa(pattern[:-1]) for node in nfa0.end: if "ep" not in node.next: node.next["ep"] = set() node.next["ep"].add(nfa0.start) return nfa0 elif pattern[-1] == '?' and is_regex(pattern[:-1]): # print 'r?型' nfa0 = compile_nfa(pattern[:-1]) if "ep" not in nfa0.start.next: nfa0.start.next["ep"] = set() nfa0.start.next["ep"].update(nfa0.end) return nfa0 elif pattern[-1] == ')' and pattern[0] == '(' and is_regex(pattern): # print '(r)型' return compile_nfa(pattern[1:-1]) else: print 'Excuse me? What a f*****g regex exp?' #print Regex._cache #raise RegexError() raise Exception()
import sys from fa import NFA, DFA filename = "test2.txt" file = open(filename, 'r') lines = file.readlines() file.close() nfa = NFA() dfa = DFA() nfa.construct_nfa_from_lines(lines) nfa.print_nfa() print() dfa.convert_from_nfa(nfa) dfa.print_dfa()