예제 #1
0
    def test_compute(self):
        """compute()"""
        # Method compute(input_file_name):
        # Check the correctness of the logical machine output over
        # self.assertTrue on individual automaton items + focus on the
        # properties of H-FA (transitions, flags, counters)

        # 1) /abcd/ ; test with an expression that does not use properties
        # of History FA
        his_fa = history_fa()
        his_fa.compute(aux_func.getPatternMatchDir() +
                       "/algorithms/j_history_fa/test_data/his_fa_1.RE")
        copy = his_fa.get_automaton(False)
        result = nfa_data().load_from_file(
            aux_func.getPatternMatchDir() +
            "/algorithms/j_history_fa/test_data/his_fa_1.nfa_data")

        self.assertTrue(
            sorted(copy.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(copy.alphabet == result.alphabet)
        self.assertTrue(copy.start == result.start)
        self.assertTrue(copy.final == result.final)
        self.assertTrue(copy.transitions == result.transitions)
        self.assertTrue(copy.Flags == result.Flags)

        # 2) /ab.*cd/ ; test with an expression that contain .*
        his_fa = history_fa()
        his_fa.compute(aux_func.getPatternMatchDir() +
                       "/algorithms/j_history_fa/test_data/his_fa_2.RE")
        copy = his_fa.get_automaton(False)
        result = nfa_data().load_from_file(
            aux_func.getPatternMatchDir() +
            "/algorithms/j_history_fa/test_data/his_fa_2.nfa_data")

        self.assertTrue(
            sorted(copy.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(copy.alphabet == result.alphabet)
        self.assertTrue(copy.start == result.start)
        self.assertTrue(copy.final == result.final)
        self.assertTrue(copy.transitions == result.transitions)
        self.assertTrue(copy.Flags == result.Flags)

        # 3) /ab[^1234]*cd|efg/; test with an expression containing one
        # alternation [^1234]*, the second is not
        his_fa = history_fa()
        his_fa.compute(aux_func.getPatternMatchDir() +
                       "/algorithms/j_history_fa/test_data/his_fa_3.RE")
        copy = his_fa.get_automaton(False)
        result = nfa_data().load_from_file(
            aux_func.getPatternMatchDir() +
            "/algorithms/j_history_fa/test_data/his_fa_3.nfa_data")

        self.assertTrue(
            sorted(copy.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(copy.alphabet == result.alphabet)
        self.assertTrue(copy.start == result.start)
        self.assertTrue(copy.final == result.final)
        self.assertTrue(copy.transitions == result.transitions)
        self.assertTrue(copy.Flags == result.Flags)
예제 #2
0
    def _test_compute_4(self):
        """compute()"""
        """
            Test with more regular expressions, where computed automaton
            has has some NFA tails
        """

        hyfa = hybrid_fa()
        
        parser = pcre_parser()
        parser.load_file(aux_func.getPatternMatchDir() + "/algorithms/hybrid_fa/tests_data/test_compute_4_pattern.re")
        hyfa.create_by_parser(parser)
        
        hyfa.set_special_min_depth(2)
        hyfa.set_max_head_size(-1)
        hyfa.set_max_tx(-1)

        hyfa.compute()

        # self.get_compute() has to be True
        self.assertTrue(hyfa.get_compute())

        hd = hyfa.dfa.get_automaton()
        hn0 = hyfa.nfas[0].get_automaton()
        hn1 = hyfa.nfas[1].get_automaton()
        d = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/hybrid_fa/tests_data/test_compute_4_dfa.nfa_data")
        n0 = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/hybrid_fa/tests_data/test_compute_4_nfa0.nfa_data")
        n1 = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/hybrid_fa/tests_data/test_compute_4_nfa1.nfa_data")

        # test of DFA part
        self.assertEqual(hd.states.keys(), d.states.keys())
        self.assertEqual(hd.alphabet, d.alphabet)
        self.assertEqual(hd.start, d.start)
        self.assertTrue(len(hd.final) == 0)
        self.assertEqual(hd.transitions, d.transitions)
        self.assertTrue(hd.Flags['Hybrid FA - DFA part'])
        self.assertTrue(hd.Flags['Deterministic'])
        
        # two NFA tails
        self.assertEqual(len(hyfa.nfas), 2)
        self.assertEqual(hyfa.tran_aut, {0:4, 1:5})
        
        # test of NFA part #0
        self.assertEqual(hn0.states.keys(), n0.states.keys())
        self.assertEqual(hn0.alphabet, n0.alphabet)
        self.assertEqual(hn0.start, n0.start)
        self.assertEqual(hn0.final, n0.final)
        self.assertEqual(hn0.transitions, n0.transitions)
        self.assertTrue(hn0.Flags['Hybrid FA - one NFA part'])

        # test of NFA part #1
        self.assertEqual(hn1.states.keys(), n1.states.keys())
        self.assertEqual(hn1.alphabet, n1.alphabet)
        self.assertEqual(hn1.start, n1.start)
        self.assertEqual(hn1.final, n1.final)
        self.assertEqual(hn1.transitions, n1.transitions)
        self.assertTrue(hn1.Flags['Hybrid FA - one NFA part'])
예제 #3
0
    def _test_compute_5(self):
        """compute()"""
        """
            Test where are more blow up REs
        """
        hyfa = JHybridFA()
        
        parser = pcre_parser()
        hyfa.set_parser(parser)
        hyfa.load_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_compute_5.re")
        
        hyfa.compute()
        
        # self.get_compute() has to be True
        self.assertTrue(hyfa.get_compute())

        # self.get_compute() has to be True
        self.assertTrue(hyfa.get_compute())

        hd = hyfa.dfa.get_automaton(False)
        hn0 = hyfa.nfas[0].get_automaton(False)
        hn1 = hyfa.nfas[1].get_automaton(False)
        d = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_compute_5_dfa.nfa_data")
        n0 = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_compute_5_nfa0.nfa_data")
        n1 = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_compute_5_nfa1.nfa_data")

        # test where are more blow up REs
        # test of DFA part
        self.assertEqual(hd.states.keys(), d.states.keys())
        self.assertEqual(hd.alphabet, d.alphabet)
        self.assertEqual(hd.start, d.start)
        self.assertEqual(len(hd.final), 3)
        self.assertEqual(hd.transitions, d.transitions)
        self.assertTrue(hd.Flags['Hybrid FA - DFA part'])
        self.assertTrue(hd.Flags['Deterministic'])
        
        self.assertEqual(len(hyfa.nfas), 2)
        self.assertEqual({0:0, 1: 10}, hyfa.tran_aut)
        
        # test of NFA part #0
        self.assertEqual(hn0.states.keys(), n0.states.keys())
        self.assertEqual(hn0.alphabet, n0.alphabet)
        self.assertEqual(hn0.start, n0.start)
        self.assertEqual(hn0.final, n0.final)
        self.assertEqual(hn0.transitions, n0.transitions)
        self.assertTrue(hn0.Flags['Hybrid FA - one NFA part'])

        # test of NFA part #1
        self.assertEqual(hn1.states.keys(), n1.states.keys())
        self.assertEqual(hn1.alphabet, n1.alphabet)
        self.assertEqual(hn1.start, n1.start)
        self.assertEqual(hn1.final, n1.final)
        self.assertEqual(hn1.transitions, n1.transitions)
        self.assertTrue(hn1.Flags['Hybrid FA - one NFA part'])
예제 #4
0
    def test_compute(self):
        """compute()"""
        # Method compute(input_file_name):
        # Check the correctness of the logical machine output over
        # self.assertTrue on individual automaton items + focus on the
        # properties of H-FA (transitions, flags, counters)

        # 1) /abcd/ ; test with an expression that does not use properties
        # of History FA
        his_fa = history_fa()
        his_fa.compute(aux_func.getPatternMatchDir() + "/algorithms/j_history_fa/test_data/his_fa_1.RE")
        copy = his_fa.get_automaton(False)
        result = nfa_data().load_from_file(
            aux_func.getPatternMatchDir() + "/algorithms/j_history_fa/test_data/his_fa_1.nfa_data")

        self.assertTrue(sorted(copy.states.keys()) ==
            sorted(result.states.keys()))
        self.assertTrue(copy.alphabet == result.alphabet)
        self.assertTrue(copy.start == result.start)
        self.assertTrue(copy.final == result.final)
        self.assertTrue(copy.transitions == result.transitions)
        self.assertTrue(copy.Flags == result.Flags)

        # 2) /ab.*cd/ ; test with an expression that contain .*
        his_fa = history_fa()
        his_fa.compute(aux_func.getPatternMatchDir() + "/algorithms/j_history_fa/test_data/his_fa_2.RE")
        copy = his_fa.get_automaton(False)
        result = nfa_data().load_from_file(
            aux_func.getPatternMatchDir() + "/algorithms/j_history_fa/test_data/his_fa_2.nfa_data")

        self.assertTrue(sorted(copy.states.keys()) ==
            sorted(result.states.keys()))
        self.assertTrue(copy.alphabet == result.alphabet)
        self.assertTrue(copy.start == result.start)
        self.assertTrue(copy.final == result.final)
        self.assertTrue(copy.transitions == result.transitions)
        self.assertTrue(copy.Flags == result.Flags)
        
        # 3) /ab[^1234]*cd|efg/; test with an expression containing one
        # alternation [^1234]*, the second is not        
        his_fa = history_fa()
        his_fa.compute(aux_func.getPatternMatchDir() + "/algorithms/j_history_fa/test_data/his_fa_3.RE")
        copy = his_fa.get_automaton(False)
        result = nfa_data().load_from_file(
            aux_func.getPatternMatchDir() + "/algorithms/j_history_fa/test_data/his_fa_3.nfa_data")

        self.assertTrue(sorted(copy.states.keys()) ==
            sorted(result.states.keys()))
        self.assertTrue(copy.alphabet == result.alphabet)
        self.assertTrue(copy.start == result.start)
        self.assertTrue(copy.final == result.final)
        self.assertTrue(copy.transitions == result.transitions)
        self.assertTrue(copy.Flags == result.Flags)
예제 #5
0
    def _test_compute_4(self):
        """compute()"""
        """
            Test with more patterns where some are blow up
        """
        hyfa = JHybridFA()

        parser = pcre_parser()
        hyfa.set_parser(parser)
        hyfa.load_file(aux_func.getPatternMatchDir() +
                       "/algorithms/j_hybrid_fa/tests_data/test_compute_4.re")

        hyfa.compute()

        # self.get_compute() has to be True
        self.assertTrue(hyfa.get_compute())

        # self.get_compute() has to be True
        self.assertTrue(hyfa.get_compute())

        hd = hyfa.dfa.get_automaton(False)
        hn0 = hyfa.nfas[0].get_automaton(False)
        d = nfa_data().load_from_file(
            aux_func.getPatternMatchDir() +
            "/algorithms/j_hybrid_fa/tests_data/test_compute_4_dfa.nfa_data")
        n = nfa_data().load_from_file(
            aux_func.getPatternMatchDir() +
            "/algorithms/j_hybrid_fa/tests_data/test_compute_4_nfa0.nfa_data")

        # Test with more patterns where some are blow up
        # test of DFA part
        self.assertEqual(hd.states.keys(), d.states.keys())
        self.assertEqual(hd.alphabet, d.alphabet)
        self.assertEqual(hd.start, d.start)
        self.assertEqual(len(hd.final), 2)
        self.assertEqual(hd.transitions, d.transitions)
        self.assertTrue(hd.Flags['Hybrid FA - DFA part'])
        self.assertTrue(hd.Flags['Deterministic'])

        self.assertEqual(len(hyfa.nfas), 1)
        self.assertEqual({0: 0}, hyfa.tran_aut)

        # test of NFA part #0
        self.assertEqual(hn0.states.keys(), n.states.keys())
        self.assertEqual(hn0.alphabet, n.alphabet)
        self.assertEqual(hn0.start, n.start)
        self.assertEqual(hn0.final, n.final)
        self.assertEqual(hn0.transitions, n.transitions)
        self.assertTrue(hn0.Flags['Hybrid FA - one NFA part'])
예제 #6
0
    def test_disable_fallback_state(self):
        """disable_fallback_state()"""
        # Test if the variables _compute, fallback and fallback_state were set
        # to the default values.
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set())
        nfaData.states[3] = b_State(3,set([0]))
        nfaData.alphabet[0] = b_Sym_char("a", "a", 0)
        nfaData.alphabet[1] = b_Sym_char("b", "b", 1)
        nfaData.alphabet[2] = b_Sym_char("c", "c", 2)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.transitions.add( (2,2,3) )
        nfaData.final.add(3)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(128)
        aut.set_PHF_class(a)
        aut.create_from_nfa_data(nfaData)
        aut.enable_fallback_state(warning=False)
        aut.compute()

        aut.disable_fallback_state()
        self.assertFalse(aut.get_compute())
        self.assertFalse(aut.fallback)
        self.assertEqual(aut.fallback_state, -1)
예제 #7
0
    def test_get_trans_num(self):
        """get_trans_num()"""
        # Simple regression test for small automaton.
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set())
        nfaData.states[3] = b_State(3,set([0]))
        nfaData.alphabet[0] = b_Sym_char("a", "a", 0)
        nfaData.alphabet[1] = b_Sym_char("b", "b", 1)
        nfaData.alphabet[2] = b_Sym_char("c", "c", 2)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.transitions.add( (2,2,3) )
        nfaData.final.add(3)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(128)
        aut.set_PHF_class(a)
        aut.create_from_nfa_data(nfaData)
        aut.compute()
        self.assertEqual(aut.get_trans_num(), 3)
        
        # Test after removing fallback transitions
        aut.enable_fallback_state(1, warning=False)
        aut.remove_fallback_transitions() 
        self.assertEqual(aut.get_trans_num(), 2)
예제 #8
0
    def test__replace_length_restriction_with_a_closure(self):
        """_replace_length_restriction_with_a_closure(NFA)"""
        # /ab.{4}cd /; test with an expression that contains .{4}
        par = pcre_parser(create_cnt_constr=True)
        par.set_text("/ab.{4}cd/")
        history = HistoryCountingFA()
        history.create_by_parser(par)
        history.remove_epsilons()
        NFA = history.get_automaton(True)
        NFA_without_cnt = history._replace_length_restriction_with_a_closure(
            NFA)
        copy = NFA_without_cnt

        result = nfa_data().load_from_file(
            aux_func.getPatternMatchDir() +
            "/algorithms/history_counting_fa/test_data/test_data_1.nfa_data")

        self.assertTrue(history.flags_cnt == {4: "4"})

        self.assertTrue(
            sorted(copy.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(copy.alphabet == result.alphabet)
        self.assertTrue(copy.start == result.start)
        self.assertTrue(copy.final == result.final)
        self.assertTrue(copy.transitions == result.transitions)
        self.assertTrue(copy.Flags == result.Flags)
예제 #9
0
    def test_report_memory_naive(self):
        """report_memory_naive()"""
        # Simple regression test for small automaton.
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set())
        nfaData.states[3] = b_State(3,set([0]))
        nfaData.alphabet[0] = b_Sym_char("a", "a", 0)
        nfaData.alphabet[1] = b_Sym_char("b", "b", 1)
        nfaData.alphabet[2] = b_Sym_char("c", "c", 2)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.transitions.add( (2,2,3) )
        nfaData.final.add(3)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(128)
        aut.set_PHF_class(a)
        aut.create_from_nfa_data(nfaData)
        aut.compute()
        self.assertEqual(aut.report_memory_naive(), 12)
        
        # Test after removing fallback transitions. report_memory_naive depends
        # on number of states and symbols, not transitions, so nothing changes
        aut.enable_fallback_state(1, warning=False)
        aut.remove_fallback_transitions()
        self.assertEqual(aut.report_memory_naive(), 12)

        # Manually remove symbol and state from _automaton1
        del aut._automaton1.states[2]
        del aut._automaton1.alphabet[2]
        self.assertEqual(aut.report_memory_naive(), 6)
예제 #10
0
파일: test_ddfa.py 프로젝트: 4sp1r3/appreal
 def _test_compute2(self):
     
     delay_dfa = DELAY_DFA()
     
     parser = pcre_parser()
     parser.set_text("/^(a|b)+/")
     delay_dfa.create_by_parser(parser)
     
     delay_dfa.compute()
     self.assertTrue(delay_dfa.get_compute())
     
     a = delay_dfa.get_automaton()
     b = nfa_data()
     
     b.add_symbols(b_Sym_char("a","a",0))
     b.add_symbols(b_Sym_char("b","b",1))
     b.add_symbols(DEF_SYMBOLS("default", 2))
     
     b.add_states(b_State(0,set()))
     b.add_states(b_State(1,set([0])))
     b.start = 0
     b.final = set([1])
     
     b.add_transitions( (0,0,1) )
     b.add_transitions( (0,1,1) )
     b.add_transitions( (1,2,0) )
     
     self.assertEqual(a.states.keys(), b.states.keys())
     self.assertEqual(a.start, b.start)
     self.assertEqual(a.final, b.final)
     self.assertEqual(a.alphabet, b.alphabet)
     self.assertEqual(a.transitions, b.transitions)
     self.assertTrue(a.Flags["Delay DFA"])
예제 #11
0
    def _test_compute2(self):

        delay_dfa = DELAY_DFA()

        parser = pcre_parser()
        parser.set_text("/^(a|b)+/")
        delay_dfa.create_by_parser(parser)

        delay_dfa.compute()
        self.assertTrue(delay_dfa.get_compute())

        a = delay_dfa.get_automaton()
        b = nfa_data()

        b.add_symbols(b_Sym_char("a", "a", 0))
        b.add_symbols(b_Sym_char("b", "b", 1))
        b.add_symbols(DEF_SYMBOLS("default", 2))

        b.add_states(b_State(0, set()))
        b.add_states(b_State(1, set([0])))
        b.start = 0
        b.final = set([1])

        b.add_transitions((0, 0, 1))
        b.add_transitions((0, 1, 1))
        b.add_transitions((1, 2, 0))

        self.assertEqual(a.states.keys(), b.states.keys())
        self.assertEqual(a.start, b.start)
        self.assertEqual(a.final, b.final)
        self.assertEqual(a.alphabet, b.alphabet)
        self.assertEqual(a.transitions, b.transitions)
        self.assertTrue(a.Flags["Delay DFA"])
예제 #12
0
    def test_get_alpha_num(self):
        """get_alpha_num()"""
        # Simple regression test for small automaton.
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set())
        nfaData.states[3] = b_State(3,set([0]))
        nfaData.alphabet[0] = b_Sym_char("a", "a", 0)
        nfaData.alphabet[1] = b_Sym_char("b", "b", 1)
        nfaData.alphabet[2] = b_Sym_char("c", "c", 2)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.transitions.add( (2,2,3) )
        nfaData.final.add(3)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(128)
        aut.set_PHF_class(a)
        aut.create_from_nfa_data(nfaData)
        aut.compute()
        self.assertEqual(aut.get_alpha_num(), 3)
        
        # Manually remove symbol from _automaton1
        del aut._automaton1.alphabet[2]
        self.assertEqual(aut.get_alpha_num(), 2)
예제 #13
0
    def _test_compute3(self):
        # Get test directory
        tdir = aux_func.getPatternMatchDir() + "/algorithms/delay_dfa/"

        delay_dfa = DELAY_DFA()

        nfaData = nfa_data().load_from_file(tdir +
                                            "test_data/text_ddfa.nfa_data")

        delay_dfa.create_from_nfa_data(nfaData)
        delay_dfa.determinise()
        delay_dfa.compute(False)
        self.assertTrue(delay_dfa.get_compute())

        a = delay_dfa.get_automaton()
        b = nfa_data()

        b.add_symbols(b_Sym_char("a", "a", 0))
        b.add_symbols(b_Sym_char("b", "b", 1))
        b.add_symbols(b_Sym_char("c", "c", 2))
        b.add_symbols(b_Sym_char("d", "d", 3))
        b.add_symbols(DEF_SYMBOLS("default", 4))

        b.add_states(b_State(0, set()))
        b.add_states(b_State(1, set([0])))
        b.add_states(b_State(2, set()))
        b.add_states(b_State(3, set([0])))
        b.add_states(b_State(4, set([0])))
        b.start = 0
        b.final = set([1, 3, 4])

        b.add_transitions((0, 2, 0))
        b.add_transitions((0, 0, 1))
        b.add_transitions((0, 1, 2))
        b.add_transitions((0, 3, 3))
        b.add_transitions((1, 4, 0))
        b.add_transitions((2, 2, 4))
        b.add_transitions((2, 4, 0))
        b.add_transitions((3, 4, 0))
        b.add_transitions((4, 4, 0))

        self.assertEqual(a.states.keys(), b.states.keys())
        self.assertEqual(a.start, b.start)
        self.assertEqual(a.final, b.final)
        self.assertEqual(a.alphabet, b.alphabet)
        self.assertEqual(a.transitions, b.transitions)
        self.assertTrue(a.Flags["Delay DFA"])
예제 #14
0
파일: test_ddfa.py 프로젝트: 4sp1r3/appreal
 def _test_compute3(self):
     # Get test directory 
     tdir = aux_func.getPatternMatchDir() + "/algorithms/delay_dfa/"
     
     delay_dfa = DELAY_DFA()
     
     nfaData = nfa_data().load_from_file(tdir + "test_data/text_ddfa.nfa_data")
     
     delay_dfa.create_from_nfa_data(nfaData)
     delay_dfa.determinise()
     delay_dfa.compute(False)
     self.assertTrue(delay_dfa.get_compute())
     
     a = delay_dfa.get_automaton()
     b = nfa_data()
     
     b.add_symbols(b_Sym_char("a","a",0))
     b.add_symbols(b_Sym_char("b","b",1))
     b.add_symbols(b_Sym_char("c","c",2))
     b.add_symbols(b_Sym_char("d","d",3))
     b.add_symbols(DEF_SYMBOLS("default", 4))
     
     b.add_states(b_State(0,set()))
     b.add_states(b_State(1,set([0])))
     b.add_states(b_State(2,set()))
     b.add_states(b_State(3,set([0])))
     b.add_states(b_State(4,set([0])))
     b.start = 0
     b.final = set([1,3,4])
     
     b.add_transitions( (0,2,0) )
     b.add_transitions( (0,0,1) )
     b.add_transitions( (0,1,2) )
     b.add_transitions( (0,3,3) )
     b.add_transitions( (1,4,0) )
     b.add_transitions( (2,2,4) )
     b.add_transitions( (2,4,0) )
     b.add_transitions( (3,4,0) )
     b.add_transitions( (4,4,0) )
     
     self.assertEqual(a.states.keys(), b.states.keys())
     self.assertEqual(a.start, b.start)
     self.assertEqual(a.final, b.final)
     self.assertEqual(a.alphabet, b.alphabet)
     self.assertEqual(a.transitions, b.transitions)
     self.assertTrue(a.Flags["Delay DFA"])
예제 #15
0
    def test__identify_fading_states(self):
        """_identify_fading_states(nfa_closure_states)"""
        history = HistoryFA()
        history._state_representation = [ set([0]),
                                          set([0,1]),
                                          set([0,2]),
                                          set([0,3]),
                                          set([0,4]),
                                          set([0,5]),
                                          set([0,6]),
                                          set([0,2,4]),
                                          set([0,2,5]),
                                          set([0,2,6])
        ]
        self.assertTrue(history._identify_fading_states([2]) == [2, 7, 8, 9])

        act = nfa_data()
        act.states[0] = b_State(0,set())
        act.states[1] = b_State(1,set())
        act.states[2] = b_State(2,set())
        act.states[3] = b_State(3,set([0]))
        act.states[4] = b_State(4,set())
        act.states[5] = b_State(5,set())
        act.states[6] = b_State(6,set([1]))
        act.alphabet[0] = b_Sym_char("a", "a", 0)
        act.alphabet[1] = b_Sym_char("b", "b", 1)
        act.alphabet[2] = b_Sym_char("c", "c", 2)
        act.alphabet[3] = b_Sym_char("d", "d", 3)
        act.alphabet[4] = b_Sym_char("e", "e", 4)
        act.alphabet[5] = b_Sym_char("f", "f", 5)
        star = set()
        for ord_char in range(0, 256):
            star.add(chr(ord_char))
        act.alphabet[6] = b_Sym_char_class("*", star, 6)
        mimo_a = set()
        for ord_char in range(0, 256):
            mimo_a.add(chr(ord_char))
        mimo_a.remove('a')
        act.alphabet[7] = b_Sym_char_class("^a", mimo_a, 7)
        act.start = 0
        act.final.add(3)
        act.final.add(6)
        act.transitions.add( (0, 6, 0) )
        act.transitions.add( (0, 0, 1) )
        act.transitions.add( (1, 1, 2) )
        act.transitions.add( (2, 7, 2) )
        act.transitions.add( (2, 2, 3) )
        act.transitions.add( (0, 3, 4) )
        act.transitions.add( (4, 4, 5) )
        act.transitions.add( (5, 5, 6) )
        history = HistoryFA()
        history._automaton = act
        history.remove_epsilons()
        NFA = history.get_automaton(True)
        history.determinise(create_table = True)
        nfa_closure_states = history._discover_closure_states(NFA)
        self.assertTrue(history._identify_fading_states(nfa_closure_states) ==
            [5, 7, 8, 9])
예제 #16
0
    def test_enable_fallback_state(self):
        """enable_fallback_state()"""
        # Test if fallback and fallback_state is set accordingly, _compute is
        # set to False and warning is/is not printed on stdout depending on
        # value of parameter warning.
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set())
        nfaData.states[3] = b_State(3,set([0]))
        nfaData.alphabet[0] = b_Sym_char("a", "a", 0)
        nfaData.alphabet[1] = b_Sym_char("b", "b", 1)
        nfaData.alphabet[2] = b_Sym_char("c", "c", 2)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.transitions.add( (2,2,3) )
        nfaData.final.add(3)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(128)
        aut.set_PHF_class(a)
        aut.create_from_nfa_data(nfaData)
        aut.compute()

        # redirect stdout to file
        tmp = sys.stdout
        f = open("stdout.output", 'w')
        sys.stdout = f
        
        aut.enable_fallback_state(2, warning=False)
        f.close()
        e = open("stdout.output", 'r')
        line = e.readline()
        # warning was set to False, stdout should be empty
        self.assertFalse(line)
        # check if the fallback_state was set
        self.assertEqual(aut.fallback_state, 2)
        self.assertFalse(aut.get_compute())
        self.assertTrue(aut.fallback)

        f = open("stdout.output", 'w')
        sys.stdout = f
        aut.enable_fallback_state()
        f.close()
        e = open("stdout.output", 'r')
        line = e.readline()
        # warning should be printed by default
        self.assertTrue(line)
        # check if the fallback_state was chosen correctly
        self.assertEqual(aut.fallback_state, 1)
        self.assertFalse(aut.get_compute())
        self.assertTrue(aut.fallback)
        # restore sys.stdout
        sys.stdout = tmp
        os.remove("stdout.output")
예제 #17
0
    def _test_compute_3(self):
        """compute()"""
        """
            Test with more patterns and one with blow up on start on RE
        """
        hyfa = JHybridFA()
        
        parser = pcre_parser()
        hyfa.set_parser(parser)
        hyfa.load_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_compute_3.re")
        
        hyfa.compute()
        
        # self.get_compute() has to be True
        self.assertTrue(hyfa.get_compute())

        # self.get_compute() has to be True
        self.assertTrue(hyfa.get_compute())

        hd = hyfa.dfa.get_automaton(False)
        hn0 = hyfa.nfas[0].get_automaton(False)
        d = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_compute_3_dfa.nfa_data")
        n = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_compute_3_nfa0.nfa_data")

        # Test with more patterns and one with blow up on start on RE
        # test of DFA part
        self.assertEqual(hd.states.keys().sort(), d.states.keys().sort())
        self.assertEqual(hd.alphabet, d.alphabet)
        self.assertEqual(hd.start, d.start)
        self.assertEqual(len(hd.final), 3)
        self.assertEqual(hd.transitions, d.transitions)
        self.assertTrue(hd.Flags['Hybrid FA - DFA part'])
        self.assertTrue(hd.Flags['Deterministic'])
        
        self.assertEqual(len(hyfa.nfas), 1)
        self.assertEqual(hyfa.tran_aut, {0: 8})
        
        # test of NFA part #0
        self.assertEqual(hn0.states.keys().sort(), n.states.keys().sort())
        self.assertEqual(hn0.alphabet, n.alphabet)
        self.assertEqual(hn0.start, n.start)
        self.assertEqual(hn0.final, n.final)
        self.assertEqual(hn0.transitions, n.transitions)
        self.assertTrue(hn0.Flags['Hybrid FA - one NFA part'])
예제 #18
0
    def _test_compute_2(self):
        """compute()"""
        """
            Test with one regular expression, where computed automaton
            has one NFA tail
        """

        hyfa = hybrid_fa()

        parser = pcre_parser()
        parser.set_text("/abcd/")
        hyfa.create_by_parser(parser)

        hyfa.set_special_min_depth(2)
        hyfa.set_max_head_size(-1)
        hyfa.set_max_tx(-1)

        hyfa.compute()

        # self.get_compute() has to be True
        self.assertTrue(hyfa.get_compute())

        parser_dfa = pcre_parser()
        parser_dfa.set_text("/ab/")
        dfa = b_dfa()
        dfa.create_by_parser(parser_dfa)
        dfa.determinise()

        hd = hyfa.dfa.get_automaton()
        hn0 = hyfa.nfas[0].get_automaton()
        d = dfa.get_automaton()
        n = nfa_data().load_from_file(
            aux_func.getPatternMatchDir() +
            "/algorithms/hybrid_fa/tests_data/test_compute_2_nfa0.nfa_data")

        # test on automaton where is one NFA tail
        # test of DFA part
        self.assertEqual(hd.states.keys(), d.states.keys())
        self.assertEqual(hd.alphabet, d.alphabet)
        self.assertEqual(hd.start, d.start)
        self.assertEqual(len(hd.final), 0)
        self.assertEqual(hd.transitions, d.transitions)
        self.assertTrue(hd.Flags['Hybrid FA - DFA part'])
        self.assertTrue(hd.Flags['Deterministic'])

        self.assertEqual(len(hyfa.nfas), 1)
        self.assertEqual(hyfa.tran_aut, {0: 2})

        # test of NFA part #0
        self.assertEqual(hn0.states.keys(), n.states.keys())
        self.assertEqual(hn0.alphabet, n.alphabet)
        self.assertEqual(hn0.start, n.start)
        self.assertEqual(hn0.final, n.final)
        self.assertEqual(hn0.transitions, n.transitions)
        self.assertTrue(hn0.Flags['Hybrid FA - one NFA part'])
예제 #19
0
    def test_validate_transition(self):
        """validate_transition()"""
        # Test correct transition validation for both faulty and non-faulty
        # transition table.
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set())
        nfaData.states[3] = b_State(3,set([0]))
        nfaData.alphabet[0] = b_Sym_char("a", "a", 0)
        nfaData.alphabet[1] = b_Sym_char("b", "b", 1)
        nfaData.alphabet[2] = b_Sym_char("c", "c", 2)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.transitions.add( (2,2,3) )
        nfaData.final.add(3)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(128)
        aut.set_PHF_class(a)
        aut.create_from_nfa_data(nfaData)
        aut.compute()

        for t in aut._automaton1.transitions: # all transitions must be valid
            self.assertTrue(aut.validate_transition(aut._transition_rep(t)))
        # some nonexistent transitions -> invalid
        t = (0,2,0)
        self.assertFalse(aut.validate_transition(aut._transition_rep(t)))
        t = (1,0,2)
        self.assertFalse(aut.validate_transition(aut._transition_rep(t)))
        t = (len(aut._automaton1.states), len(aut._automaton1.alphabet), 0)
        self.assertFalse(aut.validate_transition(aut._transition_rep(t)))
        t = (0, len(aut._automaton1.alphabet), 0)
        self.assertFalse(aut.validate_transition(aut._transition_rep(t)))
        t = (len(aut._automaton1.states), 0, 0)
        self.assertFalse(aut.validate_transition(aut._transition_rep(t)))
        # faulty transitions
        aut.enable_faulty_transitions(32)
        aut.compute()
        for t in aut._automaton1.transitions: # all transitions must be valid
            self.assertTrue(aut.validate_transition(aut._transition_rep(t)))
        # some nonexistent transitions -> invalid, collisions are improbable
        t = (0,2,0)
        self.assertFalse(aut.validate_transition(aut._transition_rep(t)))
        t = (1,0,2)
        self.assertFalse(aut.validate_transition(aut._transition_rep(t)))
        t = (10,10,1)
        self.assertFalse(aut.validate_transition(aut._transition_rep(t)))
        t = (11,11,1)
        self.assertFalse(aut.validate_transition(aut._transition_rep(t)))
        t = (12,12,1)
        self.assertFalse(aut.validate_transition(aut._transition_rep(t)))
예제 #20
0
    def _test_compute_2(self):
        """compute()"""
        """
            Test with one regular expression, where computed automaton
            has one NFA tail
        """
        
        hyfa = hybrid_fa()
        
        parser = pcre_parser()
        parser.set_text("/abcd/")
        hyfa.create_by_parser(parser)
        
        hyfa.set_special_min_depth(2)
        hyfa.set_max_head_size(-1)
        hyfa.set_max_tx(-1)
        
        hyfa.compute()

        # self.get_compute() has to be True
        self.assertTrue(hyfa.get_compute())

        parser_dfa = pcre_parser()
        parser_dfa.set_text("/ab/")
        dfa = b_dfa()
        dfa.create_by_parser(parser_dfa)
        dfa.determinise()

        hd = hyfa.dfa.get_automaton()
        hn0 = hyfa.nfas[0].get_automaton()
        d = dfa.get_automaton()
        n = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/hybrid_fa/tests_data/test_compute_2_nfa0.nfa_data")

        # test on automaton where is one NFA tail
        # test of DFA part
        self.assertEqual(hd.states.keys(), d.states.keys())
        self.assertEqual(hd.alphabet, d.alphabet)
        self.assertEqual(hd.start, d.start)
        self.assertEqual(len(hd.final), 0)
        self.assertEqual(hd.transitions, d.transitions)
        self.assertTrue(hd.Flags['Hybrid FA - DFA part'])
        self.assertTrue(hd.Flags['Deterministic'])
        
        self.assertEqual(len(hyfa.nfas), 1)
        self.assertEqual(hyfa.tran_aut, {0: 2})
        
        # test of NFA part #0
        self.assertEqual(hn0.states.keys(), n.states.keys())
        self.assertEqual(hn0.alphabet, n.alphabet)
        self.assertEqual(hn0.start, n.start)
        self.assertEqual(hn0.final, n.final)
        self.assertEqual(hn0.transitions, n.transitions)
        self.assertTrue(hn0.Flags['Hybrid FA - one NFA part'])
예제 #21
0
    def test_report_memory_real(self):
        """report_memory_real()"""
        # Few simple regression tests for different sizes of PHF table, state
        # and symbol representations and faulty transitions.
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set())
        nfaData.states[3] = b_State(3,set([0]))
        nfaData.alphabet[0] = b_Sym_char("a", "a", 0)
        nfaData.alphabet[1] = b_Sym_char("b", "b", 1)
        nfaData.alphabet[2] = b_Sym_char("c", "c", 2)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.transitions.add( (2,2,3) )
        nfaData.final.add(3)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(8)
        aut.set_PHF_class(a)
        aut.create_from_nfa_data(nfaData)
        aut.compute()
        self.assertEqual(aut.report_memory_real(), 120)

        aut.set_table_parameters((4,6))        
        self.assertEqual(aut.report_memory_real(), 48)

        aut.set_table_parameters((4,7))
        self.assertEqual(aut.report_memory_real(), 72)

        a.set_limit(5)
        aut.set_PHF_class(a)
        aut.compute()
        self.assertEqual(aut.report_memory_real(), 45)

        aut.enable_faulty_transitions(10)
        self.assertEqual(aut.report_memory_real(), 30)
        
        aut.enable_faulty_transitions(19)
        self.assertEqual(aut.report_memory_real(), 60)
예제 #22
0
 def test__discover_closure_states(self):
     """_discover_closure_states(NFA)"""
     act = nfa_data()
     act.states[0] = b_State(0,set())
     act.states[1] = b_State(1,set())
     act.states[2] = b_State(2,set())
     act.states[3] = b_State(3,set([0]))
     act.states[4] = b_State(4,set())
     act.states[5] = b_State(5,set())
     act.states[6] = b_State(6,set([1]))
     act.alphabet[0] = b_Sym_char("a", "a", 0)
     act.alphabet[1] = b_Sym_char("b", "b", 1)
     act.alphabet[2] = b_Sym_char("c", "c", 2)
     act.alphabet[3] = b_Sym_char("d", "d", 3)
     act.alphabet[4] = b_Sym_char("e", "e", 4)
     act.alphabet[5] = b_Sym_char("f", "f", 5)
     star = set()
     for ord_char in range(0, 256):
         star.add(chr(ord_char))
     act.alphabet[6] = b_Sym_char_class("*", star, 6)
     mimo_a = set()
     for ord_char in range(0, 256):
         mimo_a.add(chr(ord_char))
     mimo_a.remove('a')
     act.alphabet[7] = b_Sym_char_class("^a", mimo_a, 7)
     act.start = 0
     act.final.add(3)
     act.final.add(6)
     act.transitions.add( (0, 6, 0) )
     act.transitions.add( (0, 0, 1) )
     act.transitions.add( (1, 1, 2) )
     act.transitions.add( (2, 7, 2) )
     act.transitions.add( (2, 2, 3) )
     act.transitions.add( (0, 3, 4) )
     act.transitions.add( (4, 4, 5) )
     act.transitions.add( (5, 5, 6) )
     history = HistoryFA()
     history._automaton = act
     history.remove_epsilons()
     NFA = history.get_automaton(True)
     self.assertTrue(history._discover_closure_states(NFA) == [2])
예제 #23
0
    def test_get_default_trans_num(self):
        """get_default_trans_num"""

        #Tests with regular expressions from test_compute

        delay_dfa1 = DELAY_DFA()

        parser = pcre_parser()
        parser.set_text("/^abcd/")
        delay_dfa1.create_by_parser(parser)

        delay_dfa1.compute()
        self.assertTrue(delay_dfa1.get_compute())

        delay_dfa2 = DELAY_DFA()

        parser = pcre_parser()
        parser.set_text("/^(a|b)+/")
        delay_dfa2.create_by_parser(parser)

        delay_dfa2.compute()
        self.assertTrue(delay_dfa2.get_compute())

        delay_dfa3 = DELAY_DFA()

        # Get test directory
        tdir = aux_func.getPatternMatchDir() + "/algorithms/delay_dfa/"

        nfaData = nfa_data().load_from_file(tdir +
                                            "test_data/text_ddfa.nfa_data")

        delay_dfa3.create_from_nfa_data(nfaData)
        delay_dfa3.determinise()
        delay_dfa3.compute(False)
        self.assertTrue(delay_dfa3.get_compute())

        self.assertEqual(delay_dfa1.get_default_trans_num(), 0)
        self.assertEqual(delay_dfa2.get_default_trans_num(), 1)
        self.assertEqual(delay_dfa3.get_default_trans_num(), 4)
예제 #24
0
파일: test_ddfa.py 프로젝트: 4sp1r3/appreal
 def test_get_default_trans_num(self):
     """get_default_trans_num"""
     
     #Tests with regular expressions from test_compute
     
     delay_dfa1 = DELAY_DFA()
     
     parser = pcre_parser()
     parser.set_text("/^abcd/")
     delay_dfa1.create_by_parser(parser)
     
     delay_dfa1.compute()
     self.assertTrue(delay_dfa1.get_compute())
     
     delay_dfa2 = DELAY_DFA()
     
     parser = pcre_parser()
     parser.set_text("/^(a|b)+/")
     delay_dfa2.create_by_parser(parser)
     
     delay_dfa2.compute()
     self.assertTrue(delay_dfa2.get_compute())
     
     delay_dfa3 = DELAY_DFA()
     
     # Get test directory 
     tdir = aux_func.getPatternMatchDir() + "/algorithms/delay_dfa/"
     
     nfaData = nfa_data().load_from_file(tdir + "test_data/text_ddfa.nfa_data")
     
     delay_dfa3.create_from_nfa_data(nfaData)
     delay_dfa3.determinise()
     delay_dfa3.compute(False)
     self.assertTrue(delay_dfa3.get_compute())
     
     self.assertEqual(delay_dfa1.get_default_trans_num(),0)
     self.assertEqual(delay_dfa2.get_default_trans_num(),1)
     self.assertEqual(delay_dfa3.get_default_trans_num(),4)
예제 #25
0
    def test__replace_length_restriction_with_a_closure(self):
        """_replace_length_restriction_with_a_closure(NFA)"""
        # /ab.{4}cd /; test with an expression that contains .{4}
        par = pcre_parser(create_cnt_constr = True)
        par.set_text("/ab.{4}cd/")
        history = HistoryCountingFA()
        history.create_by_parser(par)
        history.remove_epsilons()
        NFA = history.get_automaton(True)
        NFA_without_cnt = history._replace_length_restriction_with_a_closure(NFA)
        copy = NFA_without_cnt

        result = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/history_counting_fa/test_data/test_data_1.nfa_data")

        self.assertTrue(history.flags_cnt == {4: "4"})

        self.assertTrue(sorted(copy.states.keys()) ==
            sorted(result.states.keys()))
        self.assertTrue(copy.alphabet == result.alphabet)
        self.assertTrue(copy.start == result.start)
        self.assertTrue(copy.final == result.final)
        self.assertTrue(copy.transitions == result.transitions)
        self.assertTrue(copy.Flags == result.Flags)
예제 #26
0
    def test_remove_fallback_transitions(self):
        """remove_fallback_transitions()"""
        # 1. /abc/, state -1 (automatically chosen 0) - 4 transitions removed
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set())
        nfaData.states[3] = b_State(3,set([0]))
        nfaData.alphabet[0] = b_Sym_char("a", "a", 0)
        nfaData.alphabet[1] = b_Sym_char("b", "b", 1)
        nfaData.alphabet[2] = b_Sym_char("c", "c", 2)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (0,1,0) )
        nfaData.transitions.add( (0,2,0) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.transitions.add( (1,0,1) )
        nfaData.transitions.add( (1,2,0) )
        nfaData.transitions.add( (2,2,3) )
        nfaData.transitions.add( (2,0,1) )
        nfaData.transitions.add( (2,1,0) )
        nfaData.transitions.add( (3,0,3) )
        nfaData.transitions.add( (3,1,3) )
        nfaData.transitions.add( (3,2,3) )
        nfaData.final.add(3)

        result = copy.deepcopy(nfaData)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(128)
        aut.set_PHF_class(a)
        aut.create_from_nfa_data(nfaData)
        aut.enable_fallback_state(warning=False)
        aut.compute()
        cp = aut._automaton1

        self.assertEqual(len(cp.states), len(result.states))
        self.assertEqual(len(cp.alphabet), len(result.alphabet))
        self.assertEqual(len(cp.transitions), 8) # 4 removed transitions
        for i in cp.transitions: # no transitions to fallback_state
            self.assertNotEqual(i[2], aut.fallback_state)
        self.assertEqual(len(cp.final), len(result.final))

        # 2. /abc/, state 1 - 3 transitions removed
        aut._automaton1 = aut._automaton
        aut.enable_fallback_state(1, False)
        aut.compute()
        cp = aut._automaton1

        self.assertEqual(len(cp.states), len(result.states))
        self.assertEqual(len(cp.alphabet), len(result.alphabet))
        self.assertEqual(len(cp.transitions), 9) # 3 removed transitions
        for i in cp.transitions: # no transitions to fallback_state
            self.assertNotEqual(i[2], aut.fallback_state)
        self.assertEqual(len(cp.final), len(result.final))
        
        # 3. /^abc/, state 0 - automaton does not change
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set())
        nfaData.states[3] = b_State(3,set([0]))
        nfaData.alphabet[0] = b_Sym_char("a", "a", 0)
        nfaData.alphabet[1] = b_Sym_char("b", "b", 1)
        nfaData.alphabet[2] = b_Sym_char("c", "c", 2)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.transitions.add( (2,2,3) )
        nfaData.final.add(3)

        result = copy.deepcopy(nfaData)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(128)
        aut.set_PHF_class(a)
        aut.create_from_nfa_data(nfaData)
        aut.enable_fallback_state(0, warning=False)
        aut.compute()
        cp = aut._automaton1

        self.assertEqual(len(cp.states), len(result.states))
        self.assertEqual(len(cp.alphabet), len(result.alphabet))
        self.assertEqual(len(cp.transitions), len(result.transitions))
        for i in cp.transitions: # no transitions to fallback_state
            self.assertNotEqual(i[2], aut.fallback_state)
        self.assertEqual(len(cp.final), len(result.final))
예제 #27
0
    def _test_compute_4(self):
        """compute()"""
        """
            Test with more regular expressions, where computed automaton
            has has some NFA tails
        """

        hyfa = hybrid_fa()

        parser = pcre_parser()
        parser.load_file(
            aux_func.getPatternMatchDir() +
            "/algorithms/hybrid_fa/tests_data/test_compute_4_pattern.re")
        hyfa.create_by_parser(parser)

        hyfa.set_special_min_depth(2)
        hyfa.set_max_head_size(-1)
        hyfa.set_max_tx(-1)

        hyfa.compute()

        # self.get_compute() has to be True
        self.assertTrue(hyfa.get_compute())

        hd = hyfa.dfa.get_automaton()
        hn0 = hyfa.nfas[0].get_automaton()
        hn1 = hyfa.nfas[1].get_automaton()
        d = nfa_data().load_from_file(
            aux_func.getPatternMatchDir() +
            "/algorithms/hybrid_fa/tests_data/test_compute_4_dfa.nfa_data")
        n0 = nfa_data().load_from_file(
            aux_func.getPatternMatchDir() +
            "/algorithms/hybrid_fa/tests_data/test_compute_4_nfa0.nfa_data")
        n1 = nfa_data().load_from_file(
            aux_func.getPatternMatchDir() +
            "/algorithms/hybrid_fa/tests_data/test_compute_4_nfa1.nfa_data")

        # test of DFA part
        self.assertEqual(hd.states.keys(), d.states.keys())
        self.assertEqual(hd.alphabet, d.alphabet)
        self.assertEqual(hd.start, d.start)
        self.assertTrue(len(hd.final) == 0)
        self.assertEqual(hd.transitions, d.transitions)
        self.assertTrue(hd.Flags['Hybrid FA - DFA part'])
        self.assertTrue(hd.Flags['Deterministic'])

        # two NFA tails
        self.assertEqual(len(hyfa.nfas), 2)
        self.assertEqual(hyfa.tran_aut, {0: 4, 1: 5})

        # test of NFA part #0
        self.assertEqual(hn0.states.keys(), n0.states.keys())
        self.assertEqual(hn0.alphabet, n0.alphabet)
        self.assertEqual(hn0.start, n0.start)
        self.assertEqual(hn0.final, n0.final)
        self.assertEqual(hn0.transitions, n0.transitions)
        self.assertTrue(hn0.Flags['Hybrid FA - one NFA part'])

        # test of NFA part #1
        self.assertEqual(hn1.states.keys(), n1.states.keys())
        self.assertEqual(hn1.alphabet, n1.alphabet)
        self.assertEqual(hn1.start, n1.start)
        self.assertEqual(hn1.final, n1.final)
        self.assertEqual(hn1.transitions, n1.transitions)
        self.assertTrue(hn1.Flags['Hybrid FA - one NFA part'])
예제 #28
0
    def test_compute(self):
        """compute()"""
        # Check the correctness of the logical machine output over
        # self.assertTrue on individual items + focus on the properties
        # of HistoryCountingFA (transitions, flags, counters)

        # /abcd/; test with an expression that does not use properties
        # of HistoryCountingFA
        par = pcre_parser(create_cnt_constr=True)
        par.set_text("/abcd/")
        history = HistoryCountingFA()
        history.create_by_parser(par)
        history.remove_epsilons()
        NFA = history.get_automaton(True)

        history.determinise(create_table=True)

        history.compute(NFA)

        copy = history.get_automaton()

        result = nfa_data().load_from_file(
            aux_func.getPatternMatchDir() +
            "/algorithms/history_counting_fa/test_data/test_data_2.nfa_data")

        self.assertTrue(
            sorted(copy.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(copy.alphabet == result.alphabet)
        self.assertTrue(copy.start == result.start)
        self.assertTrue(copy.final == result.final)
        self.assertTrue(copy.transitions == result.transitions)

        # /ab.{3}cd /; test with an expression that contains .{X}
        par = pcre_parser(create_cnt_constr=True)
        par.set_text("/ab.{3}cd/")
        history = HistoryCountingFA()
        history.create_by_parser(par)
        history.remove_epsilons()
        NFA = history.get_automaton(True)

        NFA_without_cnt = \
            history._replace_length_restriction_with_a_closure(NFA)
        NFA = history.get_automaton(True)
        history._automaton = NFA_without_cnt
        history.determinise(create_table=True)

        history.compute(NFA)

        copy = history.get_automaton()

        result = nfa_data().load_from_file(
            aux_func.getPatternMatchDir() +
            "/algorithms/history_counting_fa/test_data/test_data_3.nfa_data")

        self.assertTrue(
            sorted(copy.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(copy.alphabet == result.alphabet)
        self.assertTrue(copy.start == result.start)
        self.assertTrue(copy.final == result.final)
        self.assertTrue(copy.transitions == result.transitions)

        # /ab[^1234]{3}cd|efg/; test with an expression containing one
        # alternation [^1234]{3}, the second is not
        par = pcre_parser(create_cnt_constr=True)
        par.set_text("/ab[^1234]{3}cd|efg/")
        history = HistoryCountingFA()
        history.create_by_parser(par)
        history.remove_epsilons()
        NFA = history.get_automaton(True)

        NFA_without_cnt = \
            history._replace_length_restriction_with_a_closure(NFA)
        NFA = history.get_automaton(True)
        history._automaton = NFA_without_cnt
        history.determinise(create_table=True)

        history.compute(NFA)

        copy = history.get_automaton()

        result = nfa_data().load_from_file(
            aux_func.getPatternMatchDir() +
            "/algorithms/history_counting_fa/test_data/test_data_4.nfa_data")

        self.assertTrue(
            sorted(copy.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(copy.alphabet == result.alphabet)
        self.assertTrue(copy.start == result.start)
        self.assertTrue(copy.final == result.final)
        self.assertTrue(copy.transitions == result.transitions)
예제 #29
0
    def test_compute(self):
        """compute()"""
        # 1. /^abc/ - automaton does not change, PHF table is created
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set())
        nfaData.states[3] = b_State(3,set([0]))
        nfaData.alphabet[0] = b_Sym_char("a", "a", 0)
        nfaData.alphabet[1] = b_Sym_char("b", "b", 1)
        nfaData.alphabet[2] = b_Sym_char("c", "c", 2)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.transitions.add( (2,2,3) )
        nfaData.final.add(3)

        result = copy.deepcopy(nfaData)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(128)
        aut.set_PHF_class(a)
        aut.create_from_nfa_data(nfaData)
        aut.compute()
        cp = aut._automaton1

        self.assertEqual(len(cp.states), len(result.states))
        self.assertEqual(len(cp.alphabet), len(result.alphabet))
        self.assertEqual(len(cp.transitions), len(result.transitions))
        self.assertEqual(len(cp.final), len(result.final))
        self.assertNotEqual(aut.trans_table, None)
        self.assertTrue(aut.get_compute())

        # 2. determinization of /^ab|ac/, PHF table is created
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set([0]))
        nfaData.states[3] = b_State(3,set())
        nfaData.states[4] = b_State(4,set([0]))
        nfaData.alphabet[0] = b_Sym_char("a", "a", 0)
        nfaData.alphabet[1] = b_Sym_char("b", "b", 1)
        nfaData.alphabet[2] = b_Sym_char("c", "c", 2)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.transitions.add( (0,0,3) )
        nfaData.transitions.add( (3,2,4) )
        nfaData.final.add(2)
        nfaData.final.add(4)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(128)
        aut.set_PHF_class(a)
        aut.create_from_nfa_data(nfaData)
        aut.compute()
        cp = aut._automaton1

        self.assertEqual(len(cp.states), 3)
        self.assertEqual(len(cp.alphabet), 3)
        self.assertEqual(len(cp.transitions), 3)
        self.assertEqual(len(cp.final), 1)
        self.assertNotEqual(aut.trans_table, None)
        self.assertTrue(aut.get_compute())
        
        # 3. resolve alphabet - /^[a-c][b-d]/, PHF table is created
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set([0]))
        nfaData.alphabet[0] = b_Sym_char_class("ch0", set(['a', 'b', 'c']), 0)
        nfaData.alphabet[1] = b_Sym_char_class("ch1", set(['b', 'c', 'd']), 1)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.final.add(2)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(128)
        aut.set_PHF_class(a)
        aut.create_from_nfa_data(nfaData)
        aut.compute()
        cp = aut._automaton1
        
        self.assertEqual(len(cp.states), 3)
        self.assertEqual(len(cp.alphabet), 3)
        self.assertEqual(len(cp.transitions), 4)
        self.assertEqual(len(cp.final), 1)
        self.assertNotEqual(aut.trans_table, None)
        self.assertTrue(aut.get_compute())

        # 4. /abc/ and enable_fallback_state - some transitions are removed
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set())
        nfaData.states[3] = b_State(3,set([0]))
        nfaData.alphabet[0] = b_Sym_char("a", "a", 0)
        nfaData.alphabet[1] = b_Sym_char("b", "b", 1)
        nfaData.alphabet[2] = b_Sym_char("c", "c", 2)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (0,1,0) )
        nfaData.transitions.add( (0,2,0) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.transitions.add( (1,0,1) )
        nfaData.transitions.add( (1,2,0) )
        nfaData.transitions.add( (2,2,3) )
        nfaData.transitions.add( (2,0,1) )
        nfaData.transitions.add( (2,1,0) )
        nfaData.transitions.add( (3,0,3) )
        nfaData.transitions.add( (3,1,3) )
        nfaData.transitions.add( (3,2,3) )
        nfaData.final.add(3)

        result = copy.deepcopy(nfaData)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(128)
        aut.set_PHF_class(a)
        aut.create_from_nfa_data(nfaData)
        aut.enable_fallback_state(warning=False)
        aut.compute()
        cp = aut._automaton1

        self.assertEqual(len(cp.states), len(result.states))
        self.assertEqual(len(cp.alphabet), len(result.alphabet))
        self.assertTrue(len(cp.transitions) < len(result.transitions))
        self.assertEqual(len(cp.final), len(result.final))
        self.assertNotEqual(aut.trans_table, None)
        self.assertTrue(aut.get_compute())
예제 #30
0
파일: compare.py 프로젝트: vhavlena/appreal
def test():
    """
        Run searching using pcregrep and PHF_DFA. Prints out the results.
    """
    # parse options
    usage = "usage: %prog rules.pcre pcap_dir/ [options]"
    optparser = OptionParser(usage=usage)
    optparser.add_option("-O", "--outputfile", dest="resultfile", help="output file for results, default is stdout")
    optparser.add_option("-P", "--PerPacket", dest="PerPacket", action="store_true", default=False,
                      help="compare nonfaulty matching for flows and packets, faulty algorithm is used only with flows")
    optparser.add_option("-s", "--showprogress", dest="progress", action="store_true", default=False,
                      help="show progress of computation")
    optparser.add_option("-C", "--count", dest="maxiter", type="int", default="1", help="number of test iterations")
    optparser.add_option("-F", "--faulty", dest="FAULTY", type="int", default="0", help="number of bits for compress hash, default is 0 (no faulty transitions)")
    optparser.add_option("-D", "--debuglevel", dest="DEBUG", type="int", default="0", help="debug output level (0-2)")
    optparser.add_option("-S", "--savefile", dest="savefile", default="", metavar="FILE", help="save nfa_data in FILE")
    optparser.add_option("-L", "--loadfile", dest="autfile", default="", metavar="FILE", help="load nfa_data from FILE")
    optparser.add_option("-N", "--nonfaulty", dest="NonFaulty", action="store_true", default=False,
                      help="try to generate PHF table without collisions, therefore ensure nonfaulty matching. Experimental code. "
                            "May take a long time with small compress hash output.")
    (options, args) = optparser.parse_args()

    global FAULTY, DEBUG
    if len(args) != 2:
       print "You must specify rules.pcre and pcap_dir/"
       optparser.print_usage()
       exit(1)
    rulesfile, inputdir = args
    PerPacket, resultfile,  maxiter, autfile, savefile, FAULTY, DEBUG = options.PerPacket, options.resultfile, options.maxiter, options.autfile, options.savefile, options.FAULTY, options.DEBUG
    progress = options.progress
    NonFaulty = options.NonFaulty
    
    if inputdir[-1] == "/":
        inputdir = inputdir[:-1] # remove '/' from the end
    rules = open(rulesfile, 'rb')
    if PerPacket:
        packetdir = inputdir + "/packets"
        inputdir = inputdir + "/flows"
    if resultfile:
        sys.stdout = open(resultfile, 'a')
    totalhits, totalfp, totalfn = (0, 0, 0)
    iter = 0    
    while iter != maxiter:
        if progress:
            print >>sys.stderr, "\r", ' '*80, '\r',"pcregrep",
        if not iter:
            # prepare pcregrep
            p = subprocess.Popen("cd pcre-8.20/ && make pcregrep", shell=True, stdout=subprocess.PIPE)
            p.wait()
            results = dict()
        file_list = list()
        rule_count = len(open(rulesfile).readlines())
        for root, dirs, files in os.walk(inputdir):
            for i in files:
                i = os.path.join(root, i)
                file_list.append(i)
                if not iter:
                    results[i] = [rule_count*[0],rule_count*[0],rule_count*[0]]
                else:
                    results[i][0] = rule_count*[0]
        #results = init_results
        rule_num = 0
        grep_reg_exp = "grep_reg_exp." + str(os.getpid())
        for rule in rules:
            if not iter:
                if DEBUG:
                    print rule,
                (grep_rule, grep_params) = parse_rule(rule)
                f = open(grep_reg_exp, 'w')
                f.write(grep_rule)
                f.close()
                p = subprocess.Popen("pcre-8.20/pcregrep --buffer-size 50000 --color=auto -N ANYCRLF" + grep_params + " -r -l -f " + grep_reg_exp + " " + inputdir, shell=True, stdout=subprocess.PIPE)
                p.wait()
                for out in p.stdout:
                    item = out.split()[0]
                    results[item][1][rule_num] = 1
                if PerPacket:
                    p = subprocess.Popen("pcre-8.20/pcregrep --buffer-size 50000 --color=auto -N ANYCRLF" + grep_params + " -r -l -f " + grep_reg_exp + " " + packetdir, shell=True, stdout=subprocess.PIPE)
                    p.wait()
                    for out in p.stdout:
                        item =  inputdir + "/" + out.split()[0].split("-")[1].replace("_", "/")
                        results[item][2][rule_num] = 1

            rule_num += 1
        try:
            os.remove(grep_reg_exp)
        except:
            pass
        if progress:
            print >>sys.stderr, "\r", ' '*80, '\r', "create automaton",
        #aut = b_Automaton()
        aut = PHF_DFA()
        if autfile:
            aut.create_from_nfa_data(nfa_data().load_from_file(autfile))
        else:
            par = parser("pcre_parser")
            #par.set_text(rule)
            par.load_file(rulesfile)
            aut.create_by_parser(par)
            if DEBUG:
                aut.show("NFA.dot")
            #aut.remove_epsilons()
            if progress:
                print >>sys.stderr, "\r", ' '*80, '\r', "resolve alphabet",
            aut.resolve_alphabet()
            if progress:
                print >>sys.stderr, "\r", ' '*80, '\r', "determinise",
            aut.determinise()
            if progress:
                print >>sys.stderr, "\r", ' '*80, '\r', "minimise",
            aut.minimise()
            if DEBUG:
                aut.show("DFA.dot")
            if savefile:
                aut._automaton.save_to_file(savefile)
        aut._automaton1 = aut._automaton
        aut.set_table_parameters((20,10))
        if DEBUG > 1:
            print "Without fallback state:"
            print "Symbols:", len(aut._automaton.alphabet)
            print "States:", len(aut._automaton.states)
            print "Transitions:", aut.get_trans_num(), float(aut.get_trans_num()) / (aut.get_state_num() * aut.get_alpha_num()) * 100, "%"
        if isinstance(aut, PHF_DFA):
            if progress:
                print >>sys.stderr, "\r", ' '*80, '\r', "generate PHF",
            if aut.get_trans_num() == (aut.get_state_num() * aut.get_alpha_num()):
                aut.enable_fallback_state(warning=False)
            if FAULTY:
                aut.enable_faulty_transitions(FAULTY)
                if NonFaulty:
                    aut.enable_faulty_check()
            aut.compute()
            if DEBUG:
                print "Fallback state:", aut.fallback_state
                print "Symbols:", len(aut._automaton.alphabet)
                print "States:", len(aut._automaton.states)
                print "Transitions:", aut.get_trans_num(), float(aut.get_trans_num()) / (aut.get_state_num() * aut.get_alpha_num()) * 100, "%"
        count = 1
        all = len(file_list)
        if progress:
            print >> sys.stderr, '\r' + 80*' ' + '\r',
        for f in file_list:
            # progress
            if progress:
                print >> sys.stderr, '\r',
                print >> sys.stderr, str(iter+1)+'/'+str(maxiter)+ ":", count, '/', all,
#                sys.stderr.flush()
            count += 1
            data = open(f, 'rb').read()
            results[f][0] = aut.search(data)
        if progress:
            print >>sys.stderr, "\r", ' '*80, '\r', "compare results",
        if isinstance(aut, PHF_DFA) and DEBUG:
            if DEBUG > 1:
                print "List of collisions:"
                print aut.collisions
                for tran, i in aut.collisions.iteritems():
                    #print tran, i
                    print  BitArray(bytes=tran[0], length=aut.symbol_bits).uint, BitArray(bytes=tran[1], length=aut.state_bits).uint, i
                    print "SYM:", aut._automaton.alphabet[BitArray(bytes=tran[0], length=aut.symbol_bits).uint]
            print "Bad transitions:", aut.bad_transitions
            print "Collisions:", len(aut.collisions)
            print "Compress bits:", aut.compress_bits
        stats = compare_results(results)
        stats = list(stats)
        if stats[0] == 0:
            print "Zero hits, cannot compute F-measure!"
            stats[0] = 1
        if DEBUG:
            print "Total number of searched packets/flows:", stats[3]
        print "Hits:", stats[0]
        totalhits += stats[0]
        totalfp += stats[1]
        totalfn += stats[2]
        precis = float(stats[0])/(stats[1]+stats[0])
        recall = float(stats[0])/(stats[0]+stats[2])
        fmeas = 2* precis * recall / (precis + recall)
        print "False positives:", stats[1], precis*100, "%"
        print "False negatives:", stats[2], recall*100, "%"
        print "F-measure:", fmeas*100, "%"
        if PerPacket:
            print "Per packet errors:", stats[4], stats[5]
        print '-'*80
        iter += 1
    print "Total stats:"
    precis = float(totalhits)/(totalfp + totalhits)
    recall = float(totalhits)/(totalfn + totalhits)
    fmeas = 2* precis * recall / (precis + recall)
    print "Hits:", totalhits
    print "False positives:", totalfp, precis*100, "%"
    print "False negatives:", totalfn, recall*100, "%"
    print "F-measure:", fmeas*100, "%"
    print "_"*80
예제 #31
0
    def test_compute(self):
        """compute()"""
        # Check the correctness of the logical machine output over
        # self.assertTrue on individual items + focus on the properties
        # of HistoryCountingFA (transitions, flags, counters)

        # /abcd/; test with an expression that does not use properties
        # of HistoryCountingFA
        par = pcre_parser(create_cnt_constr = True)
        par.set_text("/abcd/")
        history = HistoryCountingFA()
        history.create_by_parser(par)
        history.remove_epsilons()
        NFA = history.get_automaton(True)

        history.determinise(create_table = True)

        history.compute(NFA)

        copy = history.get_automaton()

        result = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/history_counting_fa/test_data/test_data_2.nfa_data")

        self.assertTrue(sorted(copy.states.keys()) ==
            sorted(result.states.keys()))
        self.assertTrue(copy.alphabet == result.alphabet)
        self.assertTrue(copy.start == result.start)
        self.assertTrue(copy.final == result.final)
        self.assertTrue(copy.transitions == result.transitions)
        
        # /ab.{3}cd /; test with an expression that contains .{X}
        par = pcre_parser(create_cnt_constr = True)
        par.set_text("/ab.{3}cd/")
        history = HistoryCountingFA()
        history.create_by_parser(par)
        history.remove_epsilons()
        NFA = history.get_automaton(True)

        NFA_without_cnt = \
            history._replace_length_restriction_with_a_closure(NFA)
        NFA = history.get_automaton(True)
        history._automaton = NFA_without_cnt
        history.determinise(create_table = True)

        history.compute(NFA)

        copy = history.get_automaton()

        result = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/history_counting_fa/test_data/test_data_3.nfa_data")

        self.assertTrue(sorted(copy.states.keys()) ==
            sorted(result.states.keys()))
        self.assertTrue(copy.alphabet == result.alphabet)
        self.assertTrue(copy.start == result.start)
        self.assertTrue(copy.final == result.final)
        self.assertTrue(copy.transitions == result.transitions)

        # /ab[^1234]{3}cd|efg/; test with an expression containing one
        # alternation [^1234]{3}, the second is not
        par = pcre_parser(create_cnt_constr = True)
        par.set_text("/ab[^1234]{3}cd|efg/")
        history = HistoryCountingFA()
        history.create_by_parser(par)
        history.remove_epsilons()
        NFA = history.get_automaton(True)

        NFA_without_cnt = \
            history._replace_length_restriction_with_a_closure(NFA)
        NFA = history.get_automaton(True)
        history._automaton = NFA_without_cnt
        history.determinise(create_table = True)

        history.compute(NFA)

        copy = history.get_automaton()

        result = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/history_counting_fa/test_data/test_data_4.nfa_data")

        self.assertTrue(sorted(copy.states.keys()) ==
            sorted(result.states.keys()))
        self.assertTrue(copy.alphabet == result.alphabet)
        self.assertTrue(copy.start == result.start)
        self.assertTrue(copy.final == result.final)
        self.assertTrue(copy.transitions == result.transitions)
예제 #32
0
N_Automaton.get_automaton().Show("test_NFA.dot")
print("Automata joined")
D_Automaton = PHF_DFA()
D_Automaton.create_from_nfa_data(N_Automaton.get_automaton())
print("Determinising...")
D_Automaton.determinise(states_limit = 10000)
print("Minimising...")
D_Automaton.minimise()   #Co to vlastne vypisuje?
D_Automaton.get_automaton().Show("test_min_dfa.dot")

D_Automaton.get_automaton().SaveToFile("temp_automaton")

print("striding...")
D_Automaton.reduce_alphabet()
D_Automaton.stride_2()
D_Automaton.get_automaton().Show("test_multi_dfa.dot")
print("generating PHF...")
D_Automaton.generate_PHF_table()

D_Automaton1 = PHF_DFA()
Temp = nfa_data()
Temp = Temp.LoadFromFile("temp_automaton")
D_Automaton1.create_from_nfa_data(Temp)
D_Automaton1.reduce_alphabet()
D_Automaton1.stride_2()
D_Automaton1.get_automaton().Show("test_multi_dfa.dot")
print("generating PHF...")
D_Automaton1.generate_PHF_table()

예제 #33
0
    def test_get_nfa(self):
        """get_nfa()"""
        # If attribute _position < 0, check returning None.
        parser = pcre_parser()
        self.assertTrue(parser._position < 0)
        self.assertTrue(parser.get_nfa() == None)

        # Try method on a few regular expressions.
        # The results obtained compare with the manually completed machines.
        # (Recommend to compare after the elimination of epsilon transition)
        # 1) concatenation
        parser = pcre_parser()
        parser.set_text("/first/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file(
            "test_data/(1)pcre_get_nfa.nfa_data")

        self.assertTrue(
            sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)

        # 2) branch (automat create char class), iteration *
        parser = pcre_parser()
        parser.set_text("/[ab]cd*/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file(
            "test_data/(2)pcre_get_nfa.nfa_data")

        self.assertTrue(
            sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)

        # 3) try second RE (move to next line)
        parser = pcre_parser()
        parser.set_text("/abc/\n/ABC/\n")
        parser.next_line()
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file(
            "test_data/(3)pcre_get_nfa.nfa_data")

        self.assertTrue(
            sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)

        # 4) basic counting constratin
        parser = pcre_parser()
        parser.set_text("/ab{5}c/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file(
            "test_data/(4)pcre_get_nfa.nfa_data")

        self.assertTrue(
            sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)

        # 5) branch, iteration +, harder counting constraint
        parser = pcre_parser()
        parser.set_text("/a[bc]+d{2,3}/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file(
            "test_data/(5)pcre_get_nfa.nfa_data")

        self.assertTrue(
            sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)

        # 6) basic counting constratin, use param create_cnt_constr = True
        parser = pcre_parser(create_cnt_constr=True)
        parser.set_text("/ab{5}c/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file(
            "test_data/(6)pcre_get_nfa.nfa_data")

        self.assertTrue(
            sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)

        # 7) branch, iteration +, harder counting constraint,
        #    use param create_cnt_constr = True
        parser = pcre_parser(create_cnt_constr=True)
        parser.set_text("/a[bc]+d{2,3}/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file(
            "test_data/(7)pcre_get_nfa.nfa_data")

        self.assertTrue(
            sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)

        # 8) concatenation, with create_eof_symbols = True, no $
        parser = pcre_parser(create_eof_symbols=True)
        parser.set_text("/first/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file(
            "test_data/(1)pcre_get_nfa.nfa_data")

        self.assertTrue(
            sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)

        # 9) concatenation, with create_eof_symbols = True, $
        parser = pcre_parser(create_eof_symbols=True)
        parser.set_text("/first$/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file(
            "test_data/(9)pcre_get_nfa.nfa_data")

        self.assertTrue(
            sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)

        # 10) branch, iteration +, harder counting constraint
        # create_eof_symbols = True, create_cnt_constr = True
        parser = pcre_parser(create_eof_symbols=True, create_cnt_constr=True)
        parser.set_text("/a[bc]+d{2,3}$/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file(
            "test_data/(10)pcre_get_nfa.nfa_data")

        self.assertTrue(
            sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)
예제 #34
0
    def test_generate_PHF_table(self):
        """generate_PHF_table()"""
        # Test of PHF table generation - the right size of tabel, every
        # transition is exactly once in the table and on the right index.
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set())
        nfaData.states[3] = b_State(3,set([0]))
        nfaData.alphabet[0] = b_Sym_char("a", "a", 0)
        nfaData.alphabet[1] = b_Sym_char("b", "b", 1)
        nfaData.alphabet[2] = b_Sym_char("c", "c", 2)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (0,1,0) )
        nfaData.transitions.add( (0,2,0) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.transitions.add( (1,0,1) )
        nfaData.transitions.add( (1,2,0) )
        nfaData.transitions.add( (2,2,3) )
        nfaData.transitions.add( (2,0,1) )
        nfaData.transitions.add( (2,1,0) )
        nfaData.transitions.add( (3,0,3) )
        nfaData.transitions.add( (3,1,3) )
        nfaData.transitions.add( (3,2,3) )
        nfaData.final.add(3)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(128)
        aut.set_PHF_class(a)
        aut._automaton1 =  nfaData
        aut.generate_PHF_table()
        # transition table size
        self.assertEqual(aut.ran, len(aut.trans_table))
        self.assertEqual(aut.ran, 384)
        # count number of unique lines in transition table
        tranCount = dict()
        for l in aut.trans_table:
            tranCount.setdefault(l[1], 0)
            tranCount[l[1]] += 1
        # test if every automaton transition is just once in the table
        for t in aut._automaton1.transitions:
            self.assertEqual(tranCount[aut._transition_rep(t)], 1)
        t = ([2 ** aut.state_bits - 1, 2 ** aut.symbol_bits - 1, 0])
        # rest of trans are the nonexistent transitions
        self.assertEqual(tranCount[aut._transition_rep(t)], aut.ran - len(aut._automaton1.transitions))
        # check if each transition is on its index returned by hash function
        for t in aut._automaton1.transitions:
            rep = aut._transition_rep(t)
            self.assertEqual(rep, aut.trans_table[aut.hash_function.hash(rep)][1])
        # test the representation in faulty table
        aut.enable_faulty_transitions(8)
        aut.generate_PHF_table()
        for t in aut._automaton1.transitions:
            rep = aut._transition_rep(t)
            self.assertEqual(aut.compress_hash.hash(rep), aut.trans_table[aut.hash_function.hash(rep)][3])

        # change the size of PHF table and repeat tests
        aut = PHF_DFA()
        a = bdz()
        a.set_ratio(6.0)
        a.set_iteration_limit(10)
        aut.set_PHF_class(a)
        aut._automaton1 = nfaData
        aut.generate_PHF_table()
        # transition table size
        self.assertEqual(aut.ran, len(aut.trans_table))
        self.assertEqual(aut.ran, 72)
        # count number of unique lines in transition table
        tranCount = dict()
        for l in aut.trans_table:
            tranCount.setdefault(l[1], 0)
            tranCount[l[1]] += 1
        # test if every automaton transition is just once in the table
        for t in aut._automaton1.transitions:
            self.assertEqual(tranCount[aut._transition_rep(t)], 1)
        t = ([2 ** aut.state_bits - 1, 2 ** aut.symbol_bits - 1, 0])
        # rest of trans are the nonexistent transitions
        self.assertEqual(tranCount[aut._transition_rep(t)], aut.ran - len(aut._automaton1.transitions))
        # check if each transition is on its index returned by hash function
        for t in aut._automaton1.transitions:
            rep = aut._transition_rep(t)
            self.assertEqual(rep, aut.trans_table[aut.hash_function.hash(rep)][1])
        # test the representation in faulty table
        aut.enable_faulty_transitions(8)
        aut.generate_PHF_table()
        for t in aut._automaton1.transitions:
            rep = aut._transition_rep(t)
            self.assertEqual(aut.compress_hash.hash(rep), aut.trans_table[aut.hash_function.hash(rep)][3])

        # RE /#include.*>/ and enable fallback_state
        par = pcre_parser()
        par.set_text("/#include.*>/s")
        aut = PHF_DFA()
        a = bdz()
        a.set_ratio(2.5)
        a.set_iteration_limit(10)
        aut.set_PHF_class(a)
        aut.create_by_parser(par)
        aut.enable_fallback_state(warning=False)
        aut.compute()
        # transition table size
        self.assertEqual(aut.ran, len(aut.trans_table))
        self.assertEqual(aut.ran, 90)
        # count number of unique lines in transition table
        tranCount = dict()
        for l in aut.trans_table:
            tranCount.setdefault(l[1], 0)
            tranCount[l[1]] += 1
        # test if every automaton transition is just once in the table
        for t in aut._automaton1.transitions:
            self.assertEqual(tranCount[aut._transition_rep(t)], 1)
        t = ([2 ** aut.state_bits - 1, 2 ** aut.symbol_bits - 1, 0])
        # rest of trans are the nonexistent transitions
        self.assertEqual(tranCount[aut._transition_rep(t)], aut.ran - len(aut._automaton1.transitions))
        # check if each transition is on its index returned by hash function
        for t in aut._automaton1.transitions:
            rep = aut._transition_rep(t)
            self.assertEqual(rep, aut.trans_table[aut.hash_function.hash(rep)][1])
        # test the representation in faulty table
        aut.enable_faulty_transitions(8)
        aut.generate_PHF_table()
        for t in aut._automaton1.transitions:
            rep = aut._transition_rep(t)
            self.assertEqual(aut.compress_hash.hash(rep), aut.trans_table[aut.hash_function.hash(rep)][3])
        # disable fallback_state
        aut.disable_fallback_state()
        aut.compute()
        self.assertEqual(aut.ran, len(aut.trans_table))
        self.assertEqual(aut.ran, 252)
        # count number of unique lines in transition table
        tranCount = dict()
        for l in aut.trans_table:
            tranCount.setdefault(l[1], 0)
            tranCount[l[1]] += 1
        # test if every automaton transition is just once in the table
        for t in aut._automaton1.transitions:
            self.assertEqual(tranCount[aut._transition_rep(t)], 1)
        t = ([2 ** aut.state_bits - 1, 2 ** aut.symbol_bits - 1, 0])
        # rest of trans are the nonexistent transitions
        self.assertEqual(tranCount[aut._transition_rep(t)], aut.ran - len(aut._automaton1.transitions))
        # check if each transition is on its index returned by hash function
        for t in aut._automaton1.transitions:
            rep = aut._transition_rep(t)
            self.assertEqual(rep, aut.trans_table[aut.hash_function.hash(rep)][1])
        # test the representation in faulty table
        aut.enable_faulty_transitions(8)
        aut.generate_PHF_table()
        for t in aut._automaton1.transitions:
            rep = aut._transition_rep(t)
            self.assertEqual(aut.compress_hash.hash(rep), aut.trans_table[aut.hash_function.hash(rep)][3])
예제 #35
0
#   N_Automaton.join(parser.get_nfa())

N_Automaton.get_automaton().Show("test_NFA.dot")
print("Automata joined")
D_Automaton = PHF_DFA()
D_Automaton.create_from_nfa_data(N_Automaton.get_automaton())
print("Determinising...")
D_Automaton.determinise(states_limit=10000)
print("Minimising...")
D_Automaton.minimise()  #Co to vlastne vypisuje?
D_Automaton.get_automaton().Show("test_min_dfa.dot")

D_Automaton.get_automaton().SaveToFile("temp_automaton")

print("striding...")
D_Automaton.reduce_alphabet()
D_Automaton.stride_2()
D_Automaton.get_automaton().Show("test_multi_dfa.dot")
print("generating PHF...")
D_Automaton.generate_PHF_table()

D_Automaton1 = PHF_DFA()
Temp = nfa_data()
Temp = Temp.LoadFromFile("temp_automaton")
D_Automaton1.create_from_nfa_data(Temp)
D_Automaton1.reduce_alphabet()
D_Automaton1.stride_2()
D_Automaton1.get_automaton().Show("test_multi_dfa.dot")
print("generating PHF...")
D_Automaton1.generate_PHF_table()
예제 #36
0
    def test_search(self):
        """search()"""
        # 1. RE /^abc/
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set())
        nfaData.states[3] = b_State(3,set([0]))
        nfaData.alphabet[0] = b_Sym_char("a", "a", 0)
        nfaData.alphabet[1] = b_Sym_char("b", "b", 1)
        nfaData.alphabet[2] = b_Sym_char("c", "c", 2)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.transitions.add( (2,2,3) )
        nfaData.final.add(3)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(128)
        aut.set_PHF_class(a)
        aut.create_from_nfa_data(nfaData)
        aut.compute()

        self.assertEqual(aut.search("abc"), [1])
        self.assertEqual(aut.search("aaaaaaaaaaaaaabc"), [0])
        self.assertEqual(aut.search("ccccbbbabc"), [0])
        self.assertEqual(aut.search("ababc"), [0])
        self.assertEqual(aut.search("d"), [0])
        self.assertEqual(aut.search("cbabbacba"), [0])

        # 2. RE /abc/
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set())
        nfaData.states[3] = b_State(3,set([0]))
        nfaData.alphabet[0] = b_Sym_char("a", "a", 0)
        nfaData.alphabet[1] = b_Sym_char("b", "b", 1)
        nfaData.alphabet[2] = b_Sym_char("c", "c", 2)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (0,1,0) )
        nfaData.transitions.add( (0,2,0) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.transitions.add( (1,0,1) )
        nfaData.transitions.add( (1,2,0) )
        nfaData.transitions.add( (2,2,3) )
        nfaData.transitions.add( (2,0,1) )
        nfaData.transitions.add( (2,1,0) )
        nfaData.transitions.add( (3,0,3) )
        nfaData.transitions.add( (3,1,3) )
        nfaData.transitions.add( (3,2,3) )
        nfaData.final.add(3)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(128)
        aut.set_PHF_class(a)
        aut.create_from_nfa_data(nfaData)
        aut.compute()

        self.assertEqual(aut.search("abc"), [1])
        self.assertEqual(aut.search("aaaaaaaaaaaaaabc"), [1])
        self.assertEqual(aut.search("ccccbbbabc"), [1])
        self.assertEqual(aut.search("ababc"), [1])
        self.assertEqual(aut.search("d"), [0])
        self.assertEqual(aut.search("cbabbacba"), [0])

        # 2a. same test with faulty transitions
        aut.enable_faulty_transitions(32)
        aut.compute()
        self.assertEqual(aut.search("abc"), [1])
        self.assertEqual(aut.search("aaaaaaaaaaaaaabc"), [1])
        self.assertEqual(aut.search("ccccbbbabc"), [1])
        self.assertEqual(aut.search("ababc"), [1])
        self.assertEqual(aut.search("d"), [0])
        self.assertEqual(aut.search("cbabbacba"), [0])

        # 3. RE /#include.*>/ with enable_fallback_state
        par = pcre_parser()
        par.set_text("/#include.*>/")
        aut = PHF_DFA()
        a = bdz()
        a.set_ratio(2.5)
        a.set_iteration_limit(10)
        aut.set_PHF_class(a)
        aut.create_by_parser(par)
        aut.enable_fallback_state(warning=False)
        aut.compute()
        self.assertEqual(aut.search("#include <stdio.h>"), [1])
        self.assertEqual(aut.search("#include <stdlib.h>"), [1])
        self.assertEqual(aut.search("#include <stdio.h>bba"), [1])
        self.assertEqual(aut.search('#include "pcre.h"'), [0])
        self.assertEqual(aut.search('asdf#include <stdio.h>'), [1])
예제 #37
0
    def test_get_nfa(self):
        """get_nfa()"""
        # If attribute _position < 0, check returning None.
        parser = pcre_parser()
        self.assertTrue(parser._position < 0)
        self.assertTrue(parser.get_nfa() == None)
            
        # Try method on a few regular expressions.
        # The results obtained compare with the manually completed machines.
        # (Recommend to compare after the elimination of epsilon transition)
        # 1) concatenation
        parser = pcre_parser()
        parser.set_text("/first/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file("test_data/(1)pcre_get_nfa.nfa_data")

        self.assertTrue(sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)

        # 2) branch (automat create char class), iteration *
        parser = pcre_parser()
        parser.set_text("/[ab]cd*/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file("test_data/(2)pcre_get_nfa.nfa_data")

        self.assertTrue(sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)

        # 3) try second RE (move to next line)
        parser = pcre_parser()
        parser.set_text("/abc/\n/ABC/\n")
        parser.next_line()
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file("test_data/(3)pcre_get_nfa.nfa_data")

        self.assertTrue(sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)

        # 4) basic counting constratin
        parser = pcre_parser()
        parser.set_text("/ab{5}c/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file("test_data/(4)pcre_get_nfa.nfa_data")

        self.assertTrue(sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)

        # 5) branch, iteration +, harder counting constraint
        parser = pcre_parser()
        parser.set_text("/a[bc]+d{2,3}/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file("test_data/(5)pcre_get_nfa.nfa_data")

        self.assertTrue(sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)
        
        # 6) basic counting constratin, use param create_cnt_constr = True
        parser = pcre_parser(create_cnt_constr = True)
        parser.set_text("/ab{5}c/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file("test_data/(6)pcre_get_nfa.nfa_data")
        
        self.assertTrue(sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)

        # 7) branch, iteration +, harder counting constraint, 
        #    use param create_cnt_constr = True
        parser = pcre_parser(create_cnt_constr = True)
        parser.set_text("/a[bc]+d{2,3}/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file("test_data/(7)pcre_get_nfa.nfa_data")
        
        self.assertTrue(sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)
        
        # 8) concatenation, with create_eof_symbols = True, no $
        parser = pcre_parser(create_eof_symbols = True)
        parser.set_text("/first/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file("test_data/(1)pcre_get_nfa.nfa_data")

        self.assertTrue(sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)
        
        # 9) concatenation, with create_eof_symbols = True, $
        parser = pcre_parser(create_eof_symbols = True)
        parser.set_text("/first$/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file("test_data/(9)pcre_get_nfa.nfa_data")

        self.assertTrue(sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)
        
        # 10) branch, iteration +, harder counting constraint
        # create_eof_symbols = True, create_cnt_constr = True
        parser = pcre_parser(create_eof_symbols = True, create_cnt_constr = True)
        parser.set_text("/a[bc]+d{2,3}$/")
        automat = b_Automaton()
        automat._automaton = parser.get_nfa()
        automat.remove_epsilons()
        cp = automat.get_automaton()
        result = nfa_data().load_from_file("test_data/(10)pcre_get_nfa.nfa_data")

        self.assertTrue(sorted(cp.states.keys()) == sorted(result.states.keys()))
        self.assertTrue(cp.alphabet == result.alphabet)
        self.assertTrue(cp.start == result.start)
        self.assertTrue(cp.final == result.final)
        self.assertTrue(cp.transitions == result.transitions)
        self.assertTrue(cp.Flags == result.Flags)