Example #1
0
def create_nfa(final_states_name, productions):
    alphabet = set()
    for i in range(ord(' '), ord('~') + 1):
        alphabet.add(chr(i))
    nfa = NFA(alphabet)
    name_to_state_dict = {}

    def get_state_by_name(name):
        if name not in name_to_state_dict:
            name_to_state_dict[name] = nfa.create_node()
        return name_to_state_dict[name]

    for final_state_name in final_states_name:
        node = get_state_by_name(final_state_name)
        node.data = {'token': final_state_name}
        nfa.final_state.add(node)
        # print 'index: %3d, name: %s' % (node.index, final_state_name)
    for p in productions:
        # print p
        left_state = get_state_by_name(p['left'])
        if p['epsilon']:
            nfa.add_transfer(nfa.S, Epsilon, left_state)
            continue
        if p['right'] is not None:
            f = get_state_by_name(p['right'])
        else:
            f = nfa.S
        for i in p['terminate'][:-1]:
            node = nfa.create_node()
            nfa.add_transfer(f, i, node)
            f = node
        nfa.add_transfer(f, p['terminate'][-1:], left_state)
    nfa.name_to_state_dict = name_to_state_dict
    return nfa
Example #2
0
def build_concatenation_nfa(first: NFA, second: NFA) -> NFA:
    for s in first.accepting_states:
        s.add_epsilon_transition(second.initial_state)

    return NFA(states=first.states.union(second.states),
               alphabet=first.alphabet.union(second.alphabet),
               initial_state=first.initial_state,
               accepting_states=second.accepting_states)
Example #3
0
def build_symbol_nfa(s: str) -> NFA:
    initial, accepting = State("initial_{symbol}".format(symbol=s)), State(
        "accepting_{symbol}".format(symbol=s))

    initial.add_transition(s, accepting)

    return NFA(states={initial, accepting},
               alphabet={s},
               initial_state=initial,
               accepting_states={accepting})
Example #4
0
def build_closure_nfa(source: NFA) -> NFA:
    accepting = State("accepting_closure")
    initial = State("initial_closure",
                    epsilon_transitions=[source.initial_state, accepting])

    for s in source.accepting_states:
        s.add_epsilon_transitions([source.initial_state, accepting])

    return NFA(states=source.states.union({initial, accepting}),
               alphabet=source.alphabet,
               initial_state=initial,
               accepting_states={accepting})
Example #5
0
def build_union_nfa(first: NFA, second: NFA) -> NFA:
    initial, accepting = State("initial_union",
                               epsilon_transitions=[first.initial_state, second.initial_state]), \
                         State("accepting_union")

    for sf, ss in zip(first.accepting_states, second.accepting_states):
        sf.add_epsilon_transition(accepting)
        ss.add_epsilon_transition(accepting)

    return NFA(states=first.states.union(second.states).union(
        {initial, accepting}),
               alphabet=first.alphabet.union(second.alphabet),
               initial_state=initial,
               accepting_states={accepting})
Example #6
0
    def test_accepts(self):
        test_data = {
            "valid_words": ["ab", "aab"],
            "invalid_words": ["aba", "abab", "aabaab", "invalid"]
        }

        filepath = Path(__file__).parent.joinpath("resources", "nfa.json")
        nfa = NFA.from_json_file(filepath)

        for w in test_data["valid_words"]:
            with self.subTest("Should have validated the word.", w=w):
                self.assertTrue(nfa.accepts(w))

        for w in test_data["invalid_words"]:
            with self.subTest("Should not have validated the word", w=w):
                self.assertFalse(nfa.accepts(w))
Example #7
0
 def getAutomata(self):
     """ deal with the information collected"""
     isDeterministic = True
     if len(self.initials) > 1 or "@epsilon" in self.states:
         isDeterministic = False
     else:
         for s in self.transitions:
             for c in self.transitions[s]:
                 if len(self.transitions[s][c]) > 1:
                     isDeterministic = False
                     break
             if not isDeterministic:
                 break
     if isDeterministic:
         if "l" in self.eq.keys():
             fa = DFCA()
             fa.setLength = self.eq["l"]
         else:
             fa = DFA()
     else:
         fa = NFA()
     for s in self.states:
         fa.addState(s)
     fa.setFinal(fa.indexList(self.finals))
     if isDeterministic:
         fa.setInitial(fa.stateIndex(common.uSet(self.initials)))
         for s1 in self.transitions:
             for c in self.transitions[s1]:
                 fa.addTransition(
                     fa.stateIndex(s1), c,
                     fa.stateIndex(common.uSet(self.transitions[s1][c])))
     else:
         fa.setInitial(fa.indexList(self.initials))
         for s1 in self.transitions:
             for c in self.transitions[s1]:
                 for s2 in fa.indexList(self.transitions[s1][c]):
                     fa.addTransition(fa.stateIndex(s1), c, s2)
     return fa
Example #8
0
    def startNFASemRule(self, lst, context=None):
        """

        :param lst:
        :param context:"""
        new = NFA()
        new.Sigma = self.alphabet
        while self.states:
            x = self.states.pop()
            new.addState(x)
        while self.initials:
            x = self.initials.pop()
            new.addInitial(new.stateIndex(x))
        while self.finals:
            x = self.finals.pop()
            new.addFinal(new.stateIndex(x))
        while self.transitions:
            (x1, x2, x3) = self.transitions.pop()
            new.addTransition(new.stateIndex(x1), x2, new.stateIndex(x3))
        self.theList.append(new)
        self.initLocal()
Example #9
0
    def compile(self, grammar_type="regex"):
        """
        根据文法类型进行编译, 产生dfa. regex 表示 正则表达式, regular 表示 正规文法
        :param grammar: 文法类型
        :return:
        """
        if grammar_type == 'regex':
            nfas = []
            for le in self.lexs:
                # print le
                nfas.append(Regex.compile_nfa(le[1], extend=True, type=le[0]))
            nfa = NFA.combine(*nfas)
            self.lex_dfa = nfa.convert_dfa(copy_meta=["type"])
            return
        elif grammar_type == "regular":
            """
            本来没有想到会做三型文法解析, 由于parser里也有文法解析.. 此处应该跟那边合并..
            """
            nfas = []
            grammar = defaultdict(list)
            g_in, g_out = defaultdict(int), defaultdict(int)
            all_symbol = set()
            for l_hand, r_hand in self.lexs:
                l_hand = l_hand[1:-1]
                r_hands = [[x[1:-1] for x in r.strip().split()]
                           for r in r_hand.split('|')]
                for hand in r_hands:
                    for h in hand:
                        g_in[h] += 1
                        all_symbol.add(h)
                g_out[l_hand] += 1
                all_symbol.add(l_hand)
                grammar[l_hand].extend(r_hands)
            grammar['limit'] = [[' '], ['\t'], ['\n']]
            ter, not_ter = [], []
            for sym in all_symbol:
                if g_in[sym] == 0:
                    not_ter.append(sym)
                if g_out[sym] == 0:
                    ter.append(sym)
            # print ter, not_ter
            nfas = []
            for token_type in not_ter:
                nfa = NFA()
                nfa.start = NFANode(r_name=token_type)
                end_node = NFANode(type=token_type)
                end_node.end = True
                nfa.end = {end_node}
                vis = {token_type: nfa.start}

                def get_node(name):
                    if name in vis:
                        return vis[name]
                    vis[name] = NFANode(r_name=name)
                    return vis[name]

                que = Queue()
                que.put(token_type)
                while not que.empty():
                    t = que.get()
                    node = get_node(t)
                    if node.meta.get('vis', 0) > 0:
                        continue
                    node.meta['vis'] = node.meta.get('vis', 0) + 1
                    for r_hand in grammar[t]:
                        node.next.setdefault(r_hand[0], set())
                        if len(r_hand) == 2:
                            node.next[r_hand[0]].add(get_node(r_hand[1]))
                            que.put(r_hand[1])
                        else:
                            node.next[r_hand[0]].add(end_node)
                nfas.append(nfa)
            nfa = NFA.combine(*nfas)
            self.lex_dfa = nfa.convert_dfa(copy_meta=["type"])
            return
Example #10
0
def compile_nfa(pattern):
    """
    :param pattern: 正则
    :return: NFA
    """
    # print 'compile nfa [%s]' % (pattern, )
    assert isinstance(pattern, str)
    if is_base(pattern):
        if pattern in Regex.meta_bases:
            pattern = pattern[1:]
        nfa = NFA()
        enode = NFANode()
        enode.end = True
        nfa.start.next[pattern] = {enode}
        nfa.end.add(enode)
        return nfa
    for i in range(1, len(pattern)):
        s1, s2 = pattern[:i], pattern[i+1:]
        if pattern[i]=='|' and is_regex(s1) and is_regex(s2):
            nfa1, nfa2 = map(compile_nfa, [s1, s2])
            nfa = NFA()
            nfa.start.next["ep"] = set()
            nfa.start.next["ep"].update([nfa1.start, nfa2.start])
            enode = NFANode()
            enode.end = True
            nfa.end.add(enode)
            for node in nfa1.end | nfa2.end:
                if "ep" not in node.next:
                    node.next["ep"] = set()
                node.next["ep"].add(enode)
                node.end = False
            nfa1.end, nfa2.end = set(), set()
            return nfa
    for i in range(1, len(pattern)):
        s1, s2 = pattern[:i], pattern[i:]
        if is_regex(s1) and is_regex(s2):
            # print 'rs 连接型'
            nfa1, nfa2 = map(compile_nfa, [s1, s2])
            nfa = NFA()
            snode = nfa.start
            enode = NFANode()
            enode.end = True
            nfa.end = {enode}
            for node in nfa1.end:
                node.end = False
                if "ep" not in node.next:
                    node.next["ep"] = set()
                node.next["ep"].add(nfa2.start)
            for node in nfa2.end:
                node.end = False
                if "ep" not in node.next:
                    node.next["ep"] = set()
                node.next["ep"].add(enode)
            snode.next["ep"] = {nfa1.start}   #虽然我觉得nfa.start = {nfa1.start} 也可以 , 还是按照教材把
            return nfa
    if pattern[-1] == '*' and is_regex(pattern[:-1]):
        # print 'r* 型'
        nfa0 = compile_nfa(pattern[:-1])
        nfa = NFA()
        snode = nfa.start
        enode = NFANode()
        enode.end = True
        nfa.end.add(enode)
        snode.next["ep"] = {enode, nfa0.start}
        for node in nfa0.end:
            if "ep" not in node.next:
                node.next["ep"] = set()
            node.next["ep"].update([nfa0.start, enode])
            node.end = False
        nfa0.end = set()
        return nfa
    elif pattern[-1] == '+' and is_regex(pattern[:-1]):
        # print 'r+型'
        nfa0 = compile_nfa(pattern[:-1])
        for node in nfa0.end:
            if "ep" not in node.next:
                node.next["ep"] = set()
            node.next["ep"].add(nfa0.start)
        return nfa0
    elif pattern[-1] == '?' and is_regex(pattern[:-1]):
        # print 'r?型'
        nfa0 = compile_nfa(pattern[:-1])
        if "ep" not in nfa0.start.next:
            nfa0.start.next["ep"] = set()
        nfa0.start.next["ep"].update(nfa0.end)
        return nfa0
    elif pattern[-1] == ')' and pattern[0] == '(' and is_regex(pattern):
        # print '(r)型'
        return compile_nfa(pattern[1:-1])
    else:
        print 'Excuse me? What a f*****g regex exp?'
        #print Regex._cache
        #raise RegexError()
        raise Exception()
Example #11
0
    def startNFASemRule(self, lst, context=None):
        """

        :param lst:
        :param context:"""
        new = NFA()
        new.Sigma = self.alphabet
        while self.states:
            x = self.states.pop()
            new.addState(x)
        while self.initials:
            x = self.initials.pop()
            new.addInitial(new.stateIndex(x))
        while self.finals:
            x = self.finals.pop()
            new.addFinal(new.stateIndex(x))
        while self.transitions:
            (x1, x2, x3) = self.transitions.pop()
            new.addTransition(new.stateIndex(x1), x2, new.stateIndex(x3))
        self.theList.append(new)
        self.initLocal()
Example #12
0
import sys
from fa import NFA, DFA

filename = "test2.txt"

file = open(filename, 'r')
lines = file.readlines()

file.close()

nfa = NFA()
dfa = DFA()

nfa.construct_nfa_from_lines(lines)

nfa.print_nfa()
print()

dfa.convert_from_nfa(nfa)

dfa.print_dfa()
Example #13
0
    def compile(self, grammar_type="regex"):
        """
        根据文法类型进行编译, 产生dfa. regex 表示 正则表达式, regular 表示 正规文法
        :param grammar: 文法类型
        :return:
        """
        if grammar_type == 'regex':
            nfas = []
            for le in self.lexs:
                # print le
                nfas.append(Regex.compile_nfa(le[1], extend=True, type=le[0]))
            nfa = NFA.combine(*nfas)
            self.lex_dfa = nfa.convert_dfa(copy_meta=["type"])
            return
        elif grammar_type == "regular":
            """
            本来没有想到会做三型文法解析, 由于parser里也有文法解析.. 此处应该跟那边合并..
            """
            nfas = []
            grammar = defaultdict(list)
            g_in, g_out = defaultdict(int), defaultdict(int)
            all_symbol = set()
            for l_hand, r_hand in self.lexs:
                l_hand = l_hand[1:-1]
                r_hands = [[x[1:-1] for x in r.strip().split()] for r in r_hand.split('|')]
                for hand in r_hands:
                    for h in hand:
                        g_in[h] += 1
                        all_symbol.add(h)
                g_out[l_hand] += 1
                all_symbol.add(l_hand)
                grammar[l_hand].extend(r_hands)
            grammar['limit'] = [[' '], ['\t'], ['\n']]
            ter, not_ter = [], []
            for sym in all_symbol:
                if g_in[sym] == 0:
                    not_ter.append(sym)
                if g_out[sym] == 0:
                    ter.append(sym)
            # print ter, not_ter
            nfas = []
            for token_type in not_ter:
                nfa = NFA()
                nfa.start = NFANode(r_name=token_type)
                end_node = NFANode(type=token_type)
                end_node.end = True
                nfa.end = {end_node}
                vis = {token_type: nfa.start}

                def get_node(name):
                    if name in vis:
                        return vis[name]
                    vis[name] = NFANode(r_name=name)
                    return vis[name]

                que = Queue()
                que.put(token_type)
                while not que.empty():
                    t = que.get()
                    node = get_node(t)
                    if node.meta.get('vis', 0) > 0:
                        continue
                    node.meta['vis'] = node.meta.get('vis', 0) + 1
                    for r_hand in grammar[t]:
                        node.next.setdefault(r_hand[0], set())
                        if len(r_hand) == 2:
                            node.next[r_hand[0]].add(get_node(r_hand[1]))
                            que.put(r_hand[1])
                        else:
                            node.next[r_hand[0]].add(end_node)
                nfas.append(nfa)
            nfa = NFA.combine(*nfas)
            self.lex_dfa = nfa.convert_dfa(copy_meta=["type"])
            return