예제 #1
0
def main():
    try:
        opts, args = getopt.getopt(
            sys.argv[1:], "ho:d:si:S:",
            ["help", "output=", "display=", "simple", "input=", "style="])
    except getopt.GetoptError as err:
        # print help information and exit:
        print str(err)
        usage()
        sys.exit(1)
    output_file = None
    input_file = None
    display_file = None
    simple = False
    fsm_style = "nfa"
    for o, a in opts:
        if o == "-s":
            simple = True
        elif o in ("-h", "--help"):
            usage()
            sys.exit()
        elif o in ("-o", "--output"):
            output_file = a
        elif o in ("-i", "--input"):
            input_file = a
        elif o in ("-d", "--display"):
            display_file = a
        elif o in ("-S", "--style"):
            fsm_style = a.lower()
        else:
            assert False, "unhandled option " + o

    if (input_file == None or output_file == None):
        print "Input file with PCRE patterns and output file for FSM must be specified!"
        sys.exit(1)

    fsm = b_automaton.b_Automaton()
    p = parser.parser("pcre_parser")
    p.load_file(input_file)
    fsm.create_by_parser(p)

    if fsm_style == "nfa":
        fsm.remove_epsilons()
    elif fsm_style == "pa":
        fsm.thompson2glushkov()
    elif fsm_style == "dpa":
        fsm.thompson2reverse_glushkov()
    else:
        print "Unsupported automaton type: " + fsm_style
        print "Supported automata styles are: nfa, pa, dpa"
        print "See " + sys.argv[0] + " -h for details"
        sys.exit(1)

    if simple == True:
        fsm.remove_char_classes()
    fsm.save_to_timbuk(output_file, simple)
    if display_file != None:
        fsm.show(display_file, " ")
예제 #2
0
    def _replace_length_restriction_with_a_closure(self, NFA):
        """
            The first step in this construction replaces the length
            restriction with a closure, and constructs the H-FA, with
            the closure represented by a flag in the history buffer.

            :param NFA: NFA
            :type NFA: nfa_data
            :returns: NFA without counting constraint
            :rtype: nfa_data
        """
        # identify counting transitions with exactly X counting
        cnt_transitions = list()
        for t in NFA.transitions:
            if NFA.alphabet[t[1]].ctype == io_mapper["b_Sym_cnt_constr"]:
                if NFA.alphabet[t[1]].m == NFA.alphabet[t[1]].n:
                    cnt_transitions.append(t)

        # remove founded counting transtions
        #   and replace them with loop transitions
        #   and add epsilon tran. to next state
        for t in cnt_transitions:
            NFA.transitions.remove(t)

            cnt_symbol = NFA.alphabet[t[1]]
            self.flags_cnt[t[0]] = str(cnt_symbol.m)

            NFA.transitions.add((t[0], t[1], t[0]))
            NFA.transitions.add((t[0], -1, t[2]))

        # replace cnt symbol at char or char class
        # and add epsilon symbol into alphabet if does not exist
        for t in cnt_transitions:
            symbolID = t[1]
            cnt_symbol = copy.deepcopy(NFA.alphabet[symbolID])
            if cnt_symbol.ctype == io_mapper["b_Sym_cnt_constr"]:
                if isinstance(cnt_symbol.symbol, str):
                    NFA.alphabet[symbolID] = b_Sym_char_class(
                        new_text=cnt_symbol._text,
                        charClass=set([cnt_symbol.symbol]),
                        new_id=cnt_symbol._id)
                else:
                    NFA.alphabet[symbolID] = b_Sym_char_class(
                        new_text=cnt_symbol._text,
                        charClass=cnt_symbol.symbol,
                        new_id=cnt_symbol._id)

            epsilonID = -1
            if not epsilonID in NFA.alphabet:
                NFA.alphabet[epsilonID] = b_Sym_char("Epsilon", "", -1)

        # remove epsilons
        aut = b_Automaton()
        aut._automaton = NFA
        aut.remove_epsilons()

        return aut._automaton
예제 #3
0
def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], "ho:d:si:S:", ["help", "output=", "display=", "simple", "input=", "style="])
    except getopt.GetoptError as err:
        # print help information and exit:
        print str(err)
        usage()
        sys.exit(1)
    output_file = None
    input_file = None
    display_file = None
    simple = False
    fsm_style = "nfa"
    for o, a in opts:
        if o == "-s":
            simple = True
        elif o in ("-h", "--help"):
            usage()
            sys.exit()
        elif o in ("-o", "--output"):
            output_file = a
        elif o in ("-i", "--input"):
            input_file = a
        elif o in ("-d", "--display"):
            display_file = a
        elif o in ("-S", "--style"):
            fsm_style = a.lower()
        else:
            assert False, "unhandled option " + o
    
    if (input_file == None or output_file == None):
        print "Input file with PCRE patterns and output file for FSM must be specified!"
        sys.exit(1)

    fsm = b_automaton.b_Automaton()
    p = parser.parser("pcre_parser")
    p.load_file(input_file)
    fsm.create_by_parser(p)
    
    if fsm_style == "nfa":
        fsm.remove_epsilons()
    elif fsm_style == "pa":
        fsm.thompson2glushkov()
    elif fsm_style == "dpa":
        fsm.thompson2reverse_glushkov()
    else:
        print "Unsupported automaton type: " + fsm_style
        print "Supported automata styles are: nfa, pa, dpa"
        print "See " + sys.argv[0] + " -h for details"
        sys.exit(1)
        
    if simple == True:
        fsm.remove_char_classes()
    fsm.save_to_timbuk(output_file, simple)
    if display_file != None:
        fsm.show(display_file, " ")
예제 #4
0
def main():
    try:
        opts, args = getopt.getopt(
            sys.argv[1:], "ho:d:si:S:",
            ["help", "output=", "display=", "simple", "input=", "style="])
    except getopt.GetoptError as err:
        # print help information and exit:
        print str(err)
        usage()
        sys.exit(1)
    output_file = None
    input_file = None
    display_file = None
    simple = False
    fsm_style = "nfa"
    for o, a in opts:
        if o == "-s":
            simple = True
        elif o in ("-h", "--help"):
            usage()
            sys.exit()
        elif o in ("-o", "--output"):
            output_file = a
        elif o in ("-i", "--input"):
            input_file = a
        elif o in ("-d", "--display"):
            display_file = a
        else:
            assert False, "unhandled option " + o

    if (input_file == None or output_file == None):
        print "Input file with PCRE patterns and output file for FSM must be specified!"
        sys.exit(1)

    fsm = b_automaton.b_Automaton()
    p = parser.parser("pcre_parser")
    p.load_file(input_file)
    fsm.create_by_parser(p)

    fsm.remove_epsilons()

    print "State num: ", fsm.get_state_num()
    if simple == True:
        fsm.remove_char_classes()
    fsm.save_to_FA_format(output_file)
    if display_file != None:
        fsm.show(display_file, " ")
예제 #5
0
def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], "ho:d:si:S:", ["help", "output=", "display=", "simple", "input=", "style="])
    except getopt.GetoptError as err:
        # print help information and exit:
        print str(err)
        usage()
        sys.exit(1)
    output_file = None
    input_file = None
    display_file = None
    simple = False
    fsm_style = "nfa"
    for o, a in opts:
        if o == "-s":
            simple = True
        elif o in ("-h", "--help"):
            usage()
            sys.exit()
        elif o in ("-o", "--output"):
            output_file = a
        elif o in ("-i", "--input"):
            input_file = a
        elif o in ("-d", "--display"):
            display_file = a
        else:
            assert False, "unhandled option " + o

    if (input_file == None or output_file == None):
        print "Input file with PCRE patterns and output file for FSM must be specified!"
        sys.exit(1)

    fsm = b_automaton.b_Automaton()
    p = parser.parser("pcre_parser")
    p.load_file(input_file)
    fsm.create_by_parser(p)

    fsm.remove_epsilons()

    print "State num: ", fsm.get_state_num()
    if simple == True:
        fsm.remove_char_classes()
    fsm.save_to_FA_format(output_file)
    if display_file != None:
        fsm.show(display_file, " ")
예제 #6
0
def ProcessRule(parser):
    f = open("out.out","a")
    N_Automaton = b_Automaton()
    N_Automaton.create_from_nfa_data(parser.get_nfa())
    N_Automaton.get_automaton().Show("test_NFA.dot")
    print("Automata joined")
    D_Automaton = PHF_DFA()
    D_Automaton.create_from_nfa_data(N_Automaton.get_automaton())
    print("Determinising...")
    D_Automaton.resolve_alphabet()
   # D_Automaton.removeCharClasses()
   # D_Automaton.reduce_alphabet()
    D_Automaton.determinise(states_limit = 10000)
    print("Minimising...")
    D_Automaton.minimise()   #Co to vlastne vypisuje?
    D_Automaton.get_automaton().Show("test_min_dfa.dot")
    stride0 = str(D_Automaton.get_state_num())+"/"+str(D_Automaton.get_trans_num())+"|"
    f.write(stride0)
    D_Automaton.stride_2()
    stride2 = str(D_Automaton.get_state_num())+"/"+str(D_Automaton.get_trans_num())+"|"
    f.write(stride2)
    D_Automaton.stride_2()
    stride4 = str(D_Automaton.get_state_num())+"/"+str(D_Automaton.get_trans_num())+"|"
    f.write(stride4)
    #D_Automaton.stride_2()
    #stride8 = str(D_Automaton.get_state_num())+"/"+str(D_Automaton.get_trans_num())+"|"
    #f.write(stride8)
    f.write("\n")
    f.close()
   # D_Automaton.set_table_parameters((5,13))
   # if(D_Automaton.generate_PHF_table() == False):
   #    print("Failed")
   #    exit()
#    D_Automaton.search("ahojdfgffskf sdfgsdfgs dfg jkgsdgcjgfsdfg admin_root=php")
   # D_Automaton.search(Data)
 # except Exception:
 #    print("GENERATED EXCEPTION")
    #D_Automaton.printConfFile("/home/galloth/configuration");
    print("")
예제 #7
0
from netbench.pattern_match import sym_char, sym_char_class
from netbench.pattern_match import pcre_parser
from netbench.pattern_match.b_automaton import b_Automaton
from netbench.pattern_match.nfa_data import nfa_data
#from netbench.pattern_match.b_dfa import b_dfa
from phf_dfa import PHF_DFA

#FileName= "../../rules/Moduly/web-client.rules.pcre"
FileName= "../../rules/Moduly/web-php.rules.pcre"
parser = pcre_parser.pcre_parser()
parser.load_file(FileName)

print("Joining...")
N_Automaton = b_Automaton()
N_Automaton.create_from_nfa_data(parser.get_nfa())

#while (parser.next_line()):
#   N_Automaton.join(parser.get_nfa())

N_Automaton.get_automaton().Show("test_NFA.dot")
print("Automata joined")
D_Automaton = PHF_DFA()
D_Automaton.create_from_nfa_data(N_Automaton.get_automaton())
print("Determinising...")
D_Automaton.determinise(states_limit = 10000)
print("Minimising...")
D_Automaton.minimise()   #Co to vlastne vypisuje?
D_Automaton.get_automaton().Show("test_min_dfa.dot")

D_Automaton.get_automaton().SaveToFile("temp_automaton")
예제 #8
0
    def test_get_nfa(self):
        """get_nfa()"""
        # If attribute _position < 0, check returning None.
        parser = pcre_parser()
        self.assertTrue(parser._position < 0)
        self.assertTrue(parser.get_nfa() == None)

        # Try method on a few regular expressions.
        # The results obtained compare with the manually completed machines.
        # (Recommend to compare after the elimination of epsilon transition)
        # 1) concatenation
        parser = pcre_parser()
        parser.set_text("/first/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file(
            "test_data/(1)pcre_get_nfa.nfa_data")

        self.assertTrue(
            sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)

        # 2) branch (automat create char class), iteration *
        parser = pcre_parser()
        parser.set_text("/[ab]cd*/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file(
            "test_data/(2)pcre_get_nfa.nfa_data")

        self.assertTrue(
            sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)

        # 3) try second RE (move to next line)
        parser = pcre_parser()
        parser.set_text("/abc/\n/ABC/\n")
        parser.next_line()
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file(
            "test_data/(3)pcre_get_nfa.nfa_data")

        self.assertTrue(
            sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)

        # 4) basic counting constratin
        parser = pcre_parser()
        parser.set_text("/ab{5}c/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file(
            "test_data/(4)pcre_get_nfa.nfa_data")

        self.assertTrue(
            sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)

        # 5) branch, iteration +, harder counting constraint
        parser = pcre_parser()
        parser.set_text("/a[bc]+d{2,3}/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file(
            "test_data/(5)pcre_get_nfa.nfa_data")

        self.assertTrue(
            sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)

        # 6) basic counting constratin, use param create_cnt_constr = True
        parser = pcre_parser(create_cnt_constr=True)
        parser.set_text("/ab{5}c/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file(
            "test_data/(6)pcre_get_nfa.nfa_data")

        self.assertTrue(
            sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)

        # 7) branch, iteration +, harder counting constraint,
        #    use param create_cnt_constr = True
        parser = pcre_parser(create_cnt_constr=True)
        parser.set_text("/a[bc]+d{2,3}/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file(
            "test_data/(7)pcre_get_nfa.nfa_data")

        self.assertTrue(
            sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)

        # 8) concatenation, with create_eof_symbols = True, no $
        parser = pcre_parser(create_eof_symbols=True)
        parser.set_text("/first/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file(
            "test_data/(1)pcre_get_nfa.nfa_data")

        self.assertTrue(
            sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)

        # 9) concatenation, with create_eof_symbols = True, $
        parser = pcre_parser(create_eof_symbols=True)
        parser.set_text("/first$/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file(
            "test_data/(9)pcre_get_nfa.nfa_data")

        self.assertTrue(
            sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)

        # 10) branch, iteration +, harder counting constraint
        # create_eof_symbols = True, create_cnt_constr = True
        parser = pcre_parser(create_eof_symbols=True, create_cnt_constr=True)
        parser.set_text("/a[bc]+d{2,3}$/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file(
            "test_data/(10)pcre_get_nfa.nfa_data")

        self.assertTrue(
            sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)
예제 #9
0
from netbench.pattern_match import sym_char, sym_char_class
from netbench.pattern_match import pcre_parser
from netbench.pattern_match.b_automaton import b_Automaton
from netbench.pattern_match.nfa_data import nfa_data
#from netbench.pattern_match.b_dfa import b_dfa
from phf_dfa import PHF_DFA

#FileName= "../../rules/Moduly/web-client.rules.pcre"
FileName = "../../rules/Moduly/web-php.rules.pcre"
parser = pcre_parser.pcre_parser()
parser.load_file(FileName)

print("Joining...")
N_Automaton = b_Automaton()
N_Automaton.create_from_nfa_data(parser.get_nfa())

#while (parser.next_line()):
#   N_Automaton.join(parser.get_nfa())

N_Automaton.get_automaton().Show("test_NFA.dot")
print("Automata joined")
D_Automaton = PHF_DFA()
D_Automaton.create_from_nfa_data(N_Automaton.get_automaton())
print("Determinising...")
D_Automaton.determinise(states_limit=10000)
print("Minimising...")
D_Automaton.minimise()  #Co to vlastne vypisuje?
D_Automaton.get_automaton().Show("test_min_dfa.dot")

D_Automaton.get_automaton().SaveToFile("temp_automaton")
예제 #10
0
    def test_get_nfa(self):
        """get_nfa()"""
        # If attribute _position < 0, check returning None.
        parser = pcre_parser()
        self.assertTrue(parser._position < 0)
        self.assertTrue(parser.get_nfa() == None)
            
        # Try method on a few regular expressions.
        # The results obtained compare with the manually completed machines.
        # (Recommend to compare after the elimination of epsilon transition)
        # 1) concatenation
        parser = pcre_parser()
        parser.set_text("/first/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file("test_data/(1)pcre_get_nfa.nfa_data")

        self.assertTrue(sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)

        # 2) branch (automat create char class), iteration *
        parser = pcre_parser()
        parser.set_text("/[ab]cd*/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file("test_data/(2)pcre_get_nfa.nfa_data")

        self.assertTrue(sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)

        # 3) try second RE (move to next line)
        parser = pcre_parser()
        parser.set_text("/abc/\n/ABC/\n")
        parser.next_line()
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file("test_data/(3)pcre_get_nfa.nfa_data")

        self.assertTrue(sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)

        # 4) basic counting constratin
        parser = pcre_parser()
        parser.set_text("/ab{5}c/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file("test_data/(4)pcre_get_nfa.nfa_data")

        self.assertTrue(sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)

        # 5) branch, iteration +, harder counting constraint
        parser = pcre_parser()
        parser.set_text("/a[bc]+d{2,3}/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file("test_data/(5)pcre_get_nfa.nfa_data")

        self.assertTrue(sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)
        
        # 6) basic counting constratin, use param create_cnt_constr = True
        parser = pcre_parser(create_cnt_constr = True)
        parser.set_text("/ab{5}c/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file("test_data/(6)pcre_get_nfa.nfa_data")
        
        self.assertTrue(sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)

        # 7) branch, iteration +, harder counting constraint, 
        #    use param create_cnt_constr = True
        parser = pcre_parser(create_cnt_constr = True)
        parser.set_text("/a[bc]+d{2,3}/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file("test_data/(7)pcre_get_nfa.nfa_data")
        
        self.assertTrue(sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)
        
        # 8) concatenation, with create_eof_symbols = True, no $
        parser = pcre_parser(create_eof_symbols = True)
        parser.set_text("/first/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file("test_data/(1)pcre_get_nfa.nfa_data")

        self.assertTrue(sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)
        
        # 9) concatenation, with create_eof_symbols = True, $
        parser = pcre_parser(create_eof_symbols = True)
        parser.set_text("/first$/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file("test_data/(9)pcre_get_nfa.nfa_data")

        self.assertTrue(sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)
        
        # 10) branch, iteration +, harder counting constraint
        # create_eof_symbols = True, create_cnt_constr = True
        parser = pcre_parser(create_eof_symbols = True, create_cnt_constr = True)
        parser.set_text("/a[bc]+d{2,3}$/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file("test_data/(10)pcre_get_nfa.nfa_data")

        self.assertTrue(sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)
예제 #11
0
    def _replace_length_restriction_with_a_closure(self, NFA):
        """
            The first step in this construction replaces the length
            restriction with a closure, and constructs the H-FA, with
            the closure represented by a flag in the history buffer.

            :param NFA: NFA
            :type NFA: nfa_data
            :returns: NFA without counting constraint
            :rtype: nfa_data
        """
        # identify counting transitions with exactly X counting
        cnt_transitions = list()
        for t in NFA.transitions:
            if NFA.alphabet[t[1]].ctype == io_mapper["b_Sym_cnt_constr"]:
                if NFA.alphabet[t[1]].m == NFA.alphabet[t[1]].n:
                    cnt_transitions.append(t)

        # remove founded counting transtions
        #   and replace them with loop transitions
        #   and add epsilon tran. to next state
        for t in cnt_transitions:
            NFA.transitions.remove(t)
            
            cnt_symbol = NFA.alphabet[t[1]]
            self.flags_cnt[t[0]] = str(cnt_symbol.m)

            NFA.transitions.add(
                (t[0],
                t[1],
                t[0])
            )
            NFA.transitions.add(
                (t[0],
                -1,
                t[2])
            )

        # replace cnt symbol at char or char class
        # and add epsilon symbol into alphabet if does not exist
        for t in cnt_transitions:
            symbolID = t[1]
            cnt_symbol = copy.deepcopy(NFA.alphabet[symbolID])
            if cnt_symbol.ctype == io_mapper["b_Sym_cnt_constr"]:
                if isinstance(cnt_symbol.symbol, str):
                    NFA.alphabet[symbolID] = b_Sym_char_class(
                        new_text = cnt_symbol._text,
                        charClass = set([cnt_symbol.symbol]),
                        new_id = cnt_symbol._id)
                else :
                    NFA.alphabet[symbolID] = b_Sym_char_class(
                        new_text = cnt_symbol._text,
                        charClass = cnt_symbol.symbol,
                        new_id = cnt_symbol._id)

            epsilonID = -1
            if not epsilonID in NFA.alphabet:
                NFA.alphabet[epsilonID] = b_Sym_char("Epsilon", "", -1)

        # remove epsilons
        aut = b_Automaton()
        aut._automaton = NFA
        aut.remove_epsilons()

        return aut._automaton