def test_compute(self): """compute()""" # Method compute(input_file_name): # Check the correctness of the logical machine output over # self.assertTrue on individual automaton items + focus on the # properties of H-FA (transitions, flags, counters) # 1) /abcd/ ; test with an expression that does not use properties # of History FA his_fa = history_fa() his_fa.compute(aux_func.getPatternMatchDir() + "/algorithms/j_history_fa/test_data/his_fa_1.RE") copy = his_fa.get_automaton(False) result = nfa_data().load_from_file( aux_func.getPatternMatchDir() + "/algorithms/j_history_fa/test_data/his_fa_1.nfa_data") self.assertTrue( sorted(copy.states.keys()) == sorted(result.states.keys())) self.assertTrue(copy.alphabet == result.alphabet) self.assertTrue(copy.start == result.start) self.assertTrue(copy.final == result.final) self.assertTrue(copy.transitions == result.transitions) self.assertTrue(copy.Flags == result.Flags) # 2) /ab.*cd/ ; test with an expression that contain .* his_fa = history_fa() his_fa.compute(aux_func.getPatternMatchDir() + "/algorithms/j_history_fa/test_data/his_fa_2.RE") copy = his_fa.get_automaton(False) result = nfa_data().load_from_file( aux_func.getPatternMatchDir() + "/algorithms/j_history_fa/test_data/his_fa_2.nfa_data") self.assertTrue( sorted(copy.states.keys()) == sorted(result.states.keys())) self.assertTrue(copy.alphabet == result.alphabet) self.assertTrue(copy.start == result.start) self.assertTrue(copy.final == result.final) self.assertTrue(copy.transitions == result.transitions) self.assertTrue(copy.Flags == result.Flags) # 3) /ab[^1234]*cd|efg/; test with an expression containing one # alternation [^1234]*, the second is not his_fa = history_fa() his_fa.compute(aux_func.getPatternMatchDir() + "/algorithms/j_history_fa/test_data/his_fa_3.RE") copy = his_fa.get_automaton(False) result = nfa_data().load_from_file( aux_func.getPatternMatchDir() + "/algorithms/j_history_fa/test_data/his_fa_3.nfa_data") self.assertTrue( sorted(copy.states.keys()) == sorted(result.states.keys())) self.assertTrue(copy.alphabet == result.alphabet) self.assertTrue(copy.start == result.start) self.assertTrue(copy.final == result.final) self.assertTrue(copy.transitions == result.transitions) self.assertTrue(copy.Flags == result.Flags)
def _test_compute_4(self): """compute()""" """ Test with more regular expressions, where computed automaton has has some NFA tails """ hyfa = hybrid_fa() parser = pcre_parser() parser.load_file(aux_func.getPatternMatchDir() + "/algorithms/hybrid_fa/tests_data/test_compute_4_pattern.re") hyfa.create_by_parser(parser) hyfa.set_special_min_depth(2) hyfa.set_max_head_size(-1) hyfa.set_max_tx(-1) hyfa.compute() # self.get_compute() has to be True self.assertTrue(hyfa.get_compute()) hd = hyfa.dfa.get_automaton() hn0 = hyfa.nfas[0].get_automaton() hn1 = hyfa.nfas[1].get_automaton() d = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/hybrid_fa/tests_data/test_compute_4_dfa.nfa_data") n0 = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/hybrid_fa/tests_data/test_compute_4_nfa0.nfa_data") n1 = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/hybrid_fa/tests_data/test_compute_4_nfa1.nfa_data") # test of DFA part self.assertEqual(hd.states.keys(), d.states.keys()) self.assertEqual(hd.alphabet, d.alphabet) self.assertEqual(hd.start, d.start) self.assertTrue(len(hd.final) == 0) self.assertEqual(hd.transitions, d.transitions) self.assertTrue(hd.Flags['Hybrid FA - DFA part']) self.assertTrue(hd.Flags['Deterministic']) # two NFA tails self.assertEqual(len(hyfa.nfas), 2) self.assertEqual(hyfa.tran_aut, {0:4, 1:5}) # test of NFA part #0 self.assertEqual(hn0.states.keys(), n0.states.keys()) self.assertEqual(hn0.alphabet, n0.alphabet) self.assertEqual(hn0.start, n0.start) self.assertEqual(hn0.final, n0.final) self.assertEqual(hn0.transitions, n0.transitions) self.assertTrue(hn0.Flags['Hybrid FA - one NFA part']) # test of NFA part #1 self.assertEqual(hn1.states.keys(), n1.states.keys()) self.assertEqual(hn1.alphabet, n1.alphabet) self.assertEqual(hn1.start, n1.start) self.assertEqual(hn1.final, n1.final) self.assertEqual(hn1.transitions, n1.transitions) self.assertTrue(hn1.Flags['Hybrid FA - one NFA part'])
def _test_compute_5(self): """compute()""" """ Test where are more blow up REs """ hyfa = JHybridFA() parser = pcre_parser() hyfa.set_parser(parser) hyfa.load_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_compute_5.re") hyfa.compute() # self.get_compute() has to be True self.assertTrue(hyfa.get_compute()) # self.get_compute() has to be True self.assertTrue(hyfa.get_compute()) hd = hyfa.dfa.get_automaton(False) hn0 = hyfa.nfas[0].get_automaton(False) hn1 = hyfa.nfas[1].get_automaton(False) d = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_compute_5_dfa.nfa_data") n0 = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_compute_5_nfa0.nfa_data") n1 = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_compute_5_nfa1.nfa_data") # test where are more blow up REs # test of DFA part self.assertEqual(hd.states.keys(), d.states.keys()) self.assertEqual(hd.alphabet, d.alphabet) self.assertEqual(hd.start, d.start) self.assertEqual(len(hd.final), 3) self.assertEqual(hd.transitions, d.transitions) self.assertTrue(hd.Flags['Hybrid FA - DFA part']) self.assertTrue(hd.Flags['Deterministic']) self.assertEqual(len(hyfa.nfas), 2) self.assertEqual({0:0, 1: 10}, hyfa.tran_aut) # test of NFA part #0 self.assertEqual(hn0.states.keys(), n0.states.keys()) self.assertEqual(hn0.alphabet, n0.alphabet) self.assertEqual(hn0.start, n0.start) self.assertEqual(hn0.final, n0.final) self.assertEqual(hn0.transitions, n0.transitions) self.assertTrue(hn0.Flags['Hybrid FA - one NFA part']) # test of NFA part #1 self.assertEqual(hn1.states.keys(), n1.states.keys()) self.assertEqual(hn1.alphabet, n1.alphabet) self.assertEqual(hn1.start, n1.start) self.assertEqual(hn1.final, n1.final) self.assertEqual(hn1.transitions, n1.transitions) self.assertTrue(hn1.Flags['Hybrid FA - one NFA part'])
def test_compute(self): """compute()""" # Method compute(input_file_name): # Check the correctness of the logical machine output over # self.assertTrue on individual automaton items + focus on the # properties of H-FA (transitions, flags, counters) # 1) /abcd/ ; test with an expression that does not use properties # of History FA his_fa = history_fa() his_fa.compute(aux_func.getPatternMatchDir() + "/algorithms/j_history_fa/test_data/his_fa_1.RE") copy = his_fa.get_automaton(False) result = nfa_data().load_from_file( aux_func.getPatternMatchDir() + "/algorithms/j_history_fa/test_data/his_fa_1.nfa_data") self.assertTrue(sorted(copy.states.keys()) == sorted(result.states.keys())) self.assertTrue(copy.alphabet == result.alphabet) self.assertTrue(copy.start == result.start) self.assertTrue(copy.final == result.final) self.assertTrue(copy.transitions == result.transitions) self.assertTrue(copy.Flags == result.Flags) # 2) /ab.*cd/ ; test with an expression that contain .* his_fa = history_fa() his_fa.compute(aux_func.getPatternMatchDir() + "/algorithms/j_history_fa/test_data/his_fa_2.RE") copy = his_fa.get_automaton(False) result = nfa_data().load_from_file( aux_func.getPatternMatchDir() + "/algorithms/j_history_fa/test_data/his_fa_2.nfa_data") self.assertTrue(sorted(copy.states.keys()) == sorted(result.states.keys())) self.assertTrue(copy.alphabet == result.alphabet) self.assertTrue(copy.start == result.start) self.assertTrue(copy.final == result.final) self.assertTrue(copy.transitions == result.transitions) self.assertTrue(copy.Flags == result.Flags) # 3) /ab[^1234]*cd|efg/; test with an expression containing one # alternation [^1234]*, the second is not his_fa = history_fa() his_fa.compute(aux_func.getPatternMatchDir() + "/algorithms/j_history_fa/test_data/his_fa_3.RE") copy = his_fa.get_automaton(False) result = nfa_data().load_from_file( aux_func.getPatternMatchDir() + "/algorithms/j_history_fa/test_data/his_fa_3.nfa_data") self.assertTrue(sorted(copy.states.keys()) == sorted(result.states.keys())) self.assertTrue(copy.alphabet == result.alphabet) self.assertTrue(copy.start == result.start) self.assertTrue(copy.final == result.final) self.assertTrue(copy.transitions == result.transitions) self.assertTrue(copy.Flags == result.Flags)
def _test_compute_4(self): """compute()""" """ Test with more patterns where some are blow up """ hyfa = JHybridFA() parser = pcre_parser() hyfa.set_parser(parser) hyfa.load_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_compute_4.re") hyfa.compute() # self.get_compute() has to be True self.assertTrue(hyfa.get_compute()) # self.get_compute() has to be True self.assertTrue(hyfa.get_compute()) hd = hyfa.dfa.get_automaton(False) hn0 = hyfa.nfas[0].get_automaton(False) d = nfa_data().load_from_file( aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_compute_4_dfa.nfa_data") n = nfa_data().load_from_file( aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_compute_4_nfa0.nfa_data") # Test with more patterns where some are blow up # test of DFA part self.assertEqual(hd.states.keys(), d.states.keys()) self.assertEqual(hd.alphabet, d.alphabet) self.assertEqual(hd.start, d.start) self.assertEqual(len(hd.final), 2) self.assertEqual(hd.transitions, d.transitions) self.assertTrue(hd.Flags['Hybrid FA - DFA part']) self.assertTrue(hd.Flags['Deterministic']) self.assertEqual(len(hyfa.nfas), 1) self.assertEqual({0: 0}, hyfa.tran_aut) # test of NFA part #0 self.assertEqual(hn0.states.keys(), n.states.keys()) self.assertEqual(hn0.alphabet, n.alphabet) self.assertEqual(hn0.start, n.start) self.assertEqual(hn0.final, n.final) self.assertEqual(hn0.transitions, n.transitions) self.assertTrue(hn0.Flags['Hybrid FA - one NFA part'])
def test_disable_fallback_state(self): """disable_fallback_state()""" # Test if the variables _compute, fallback and fallback_state were set # to the default values. nfaData = nfa_data() nfaData.states[0] = b_State(0,set()) nfaData.states[1] = b_State(1,set()) nfaData.states[2] = b_State(2,set()) nfaData.states[3] = b_State(3,set([0])) nfaData.alphabet[0] = b_Sym_char("a", "a", 0) nfaData.alphabet[1] = b_Sym_char("b", "b", 1) nfaData.alphabet[2] = b_Sym_char("c", "c", 2) nfaData.start = 0 nfaData.transitions.add( (0,0,1) ) nfaData.transitions.add( (1,1,2) ) nfaData.transitions.add( (2,2,3) ) nfaData.final.add(3) aut = PHF_DFA() a = bdz() a.set_limit(128) aut.set_PHF_class(a) aut.create_from_nfa_data(nfaData) aut.enable_fallback_state(warning=False) aut.compute() aut.disable_fallback_state() self.assertFalse(aut.get_compute()) self.assertFalse(aut.fallback) self.assertEqual(aut.fallback_state, -1)
def test_get_trans_num(self): """get_trans_num()""" # Simple regression test for small automaton. nfaData = nfa_data() nfaData.states[0] = b_State(0,set()) nfaData.states[1] = b_State(1,set()) nfaData.states[2] = b_State(2,set()) nfaData.states[3] = b_State(3,set([0])) nfaData.alphabet[0] = b_Sym_char("a", "a", 0) nfaData.alphabet[1] = b_Sym_char("b", "b", 1) nfaData.alphabet[2] = b_Sym_char("c", "c", 2) nfaData.start = 0 nfaData.transitions.add( (0,0,1) ) nfaData.transitions.add( (1,1,2) ) nfaData.transitions.add( (2,2,3) ) nfaData.final.add(3) aut = PHF_DFA() a = bdz() a.set_limit(128) aut.set_PHF_class(a) aut.create_from_nfa_data(nfaData) aut.compute() self.assertEqual(aut.get_trans_num(), 3) # Test after removing fallback transitions aut.enable_fallback_state(1, warning=False) aut.remove_fallback_transitions() self.assertEqual(aut.get_trans_num(), 2)
def test__replace_length_restriction_with_a_closure(self): """_replace_length_restriction_with_a_closure(NFA)""" # /ab.{4}cd /; test with an expression that contains .{4} par = pcre_parser(create_cnt_constr=True) par.set_text("/ab.{4}cd/") history = HistoryCountingFA() history.create_by_parser(par) history.remove_epsilons() NFA = history.get_automaton(True) NFA_without_cnt = history._replace_length_restriction_with_a_closure( NFA) copy = NFA_without_cnt result = nfa_data().load_from_file( aux_func.getPatternMatchDir() + "/algorithms/history_counting_fa/test_data/test_data_1.nfa_data") self.assertTrue(history.flags_cnt == {4: "4"}) self.assertTrue( sorted(copy.states.keys()) == sorted(result.states.keys())) self.assertTrue(copy.alphabet == result.alphabet) self.assertTrue(copy.start == result.start) self.assertTrue(copy.final == result.final) self.assertTrue(copy.transitions == result.transitions) self.assertTrue(copy.Flags == result.Flags)
def test_report_memory_naive(self): """report_memory_naive()""" # Simple regression test for small automaton. nfaData = nfa_data() nfaData.states[0] = b_State(0,set()) nfaData.states[1] = b_State(1,set()) nfaData.states[2] = b_State(2,set()) nfaData.states[3] = b_State(3,set([0])) nfaData.alphabet[0] = b_Sym_char("a", "a", 0) nfaData.alphabet[1] = b_Sym_char("b", "b", 1) nfaData.alphabet[2] = b_Sym_char("c", "c", 2) nfaData.start = 0 nfaData.transitions.add( (0,0,1) ) nfaData.transitions.add( (1,1,2) ) nfaData.transitions.add( (2,2,3) ) nfaData.final.add(3) aut = PHF_DFA() a = bdz() a.set_limit(128) aut.set_PHF_class(a) aut.create_from_nfa_data(nfaData) aut.compute() self.assertEqual(aut.report_memory_naive(), 12) # Test after removing fallback transitions. report_memory_naive depends # on number of states and symbols, not transitions, so nothing changes aut.enable_fallback_state(1, warning=False) aut.remove_fallback_transitions() self.assertEqual(aut.report_memory_naive(), 12) # Manually remove symbol and state from _automaton1 del aut._automaton1.states[2] del aut._automaton1.alphabet[2] self.assertEqual(aut.report_memory_naive(), 6)
def _test_compute2(self): delay_dfa = DELAY_DFA() parser = pcre_parser() parser.set_text("/^(a|b)+/") delay_dfa.create_by_parser(parser) delay_dfa.compute() self.assertTrue(delay_dfa.get_compute()) a = delay_dfa.get_automaton() b = nfa_data() b.add_symbols(b_Sym_char("a","a",0)) b.add_symbols(b_Sym_char("b","b",1)) b.add_symbols(DEF_SYMBOLS("default", 2)) b.add_states(b_State(0,set())) b.add_states(b_State(1,set([0]))) b.start = 0 b.final = set([1]) b.add_transitions( (0,0,1) ) b.add_transitions( (0,1,1) ) b.add_transitions( (1,2,0) ) self.assertEqual(a.states.keys(), b.states.keys()) self.assertEqual(a.start, b.start) self.assertEqual(a.final, b.final) self.assertEqual(a.alphabet, b.alphabet) self.assertEqual(a.transitions, b.transitions) self.assertTrue(a.Flags["Delay DFA"])
def _test_compute2(self): delay_dfa = DELAY_DFA() parser = pcre_parser() parser.set_text("/^(a|b)+/") delay_dfa.create_by_parser(parser) delay_dfa.compute() self.assertTrue(delay_dfa.get_compute()) a = delay_dfa.get_automaton() b = nfa_data() b.add_symbols(b_Sym_char("a", "a", 0)) b.add_symbols(b_Sym_char("b", "b", 1)) b.add_symbols(DEF_SYMBOLS("default", 2)) b.add_states(b_State(0, set())) b.add_states(b_State(1, set([0]))) b.start = 0 b.final = set([1]) b.add_transitions((0, 0, 1)) b.add_transitions((0, 1, 1)) b.add_transitions((1, 2, 0)) self.assertEqual(a.states.keys(), b.states.keys()) self.assertEqual(a.start, b.start) self.assertEqual(a.final, b.final) self.assertEqual(a.alphabet, b.alphabet) self.assertEqual(a.transitions, b.transitions) self.assertTrue(a.Flags["Delay DFA"])
def test_get_alpha_num(self): """get_alpha_num()""" # Simple regression test for small automaton. nfaData = nfa_data() nfaData.states[0] = b_State(0,set()) nfaData.states[1] = b_State(1,set()) nfaData.states[2] = b_State(2,set()) nfaData.states[3] = b_State(3,set([0])) nfaData.alphabet[0] = b_Sym_char("a", "a", 0) nfaData.alphabet[1] = b_Sym_char("b", "b", 1) nfaData.alphabet[2] = b_Sym_char("c", "c", 2) nfaData.start = 0 nfaData.transitions.add( (0,0,1) ) nfaData.transitions.add( (1,1,2) ) nfaData.transitions.add( (2,2,3) ) nfaData.final.add(3) aut = PHF_DFA() a = bdz() a.set_limit(128) aut.set_PHF_class(a) aut.create_from_nfa_data(nfaData) aut.compute() self.assertEqual(aut.get_alpha_num(), 3) # Manually remove symbol from _automaton1 del aut._automaton1.alphabet[2] self.assertEqual(aut.get_alpha_num(), 2)
def _test_compute3(self): # Get test directory tdir = aux_func.getPatternMatchDir() + "/algorithms/delay_dfa/" delay_dfa = DELAY_DFA() nfaData = nfa_data().load_from_file(tdir + "test_data/text_ddfa.nfa_data") delay_dfa.create_from_nfa_data(nfaData) delay_dfa.determinise() delay_dfa.compute(False) self.assertTrue(delay_dfa.get_compute()) a = delay_dfa.get_automaton() b = nfa_data() b.add_symbols(b_Sym_char("a", "a", 0)) b.add_symbols(b_Sym_char("b", "b", 1)) b.add_symbols(b_Sym_char("c", "c", 2)) b.add_symbols(b_Sym_char("d", "d", 3)) b.add_symbols(DEF_SYMBOLS("default", 4)) b.add_states(b_State(0, set())) b.add_states(b_State(1, set([0]))) b.add_states(b_State(2, set())) b.add_states(b_State(3, set([0]))) b.add_states(b_State(4, set([0]))) b.start = 0 b.final = set([1, 3, 4]) b.add_transitions((0, 2, 0)) b.add_transitions((0, 0, 1)) b.add_transitions((0, 1, 2)) b.add_transitions((0, 3, 3)) b.add_transitions((1, 4, 0)) b.add_transitions((2, 2, 4)) b.add_transitions((2, 4, 0)) b.add_transitions((3, 4, 0)) b.add_transitions((4, 4, 0)) self.assertEqual(a.states.keys(), b.states.keys()) self.assertEqual(a.start, b.start) self.assertEqual(a.final, b.final) self.assertEqual(a.alphabet, b.alphabet) self.assertEqual(a.transitions, b.transitions) self.assertTrue(a.Flags["Delay DFA"])
def _test_compute3(self): # Get test directory tdir = aux_func.getPatternMatchDir() + "/algorithms/delay_dfa/" delay_dfa = DELAY_DFA() nfaData = nfa_data().load_from_file(tdir + "test_data/text_ddfa.nfa_data") delay_dfa.create_from_nfa_data(nfaData) delay_dfa.determinise() delay_dfa.compute(False) self.assertTrue(delay_dfa.get_compute()) a = delay_dfa.get_automaton() b = nfa_data() b.add_symbols(b_Sym_char("a","a",0)) b.add_symbols(b_Sym_char("b","b",1)) b.add_symbols(b_Sym_char("c","c",2)) b.add_symbols(b_Sym_char("d","d",3)) b.add_symbols(DEF_SYMBOLS("default", 4)) b.add_states(b_State(0,set())) b.add_states(b_State(1,set([0]))) b.add_states(b_State(2,set())) b.add_states(b_State(3,set([0]))) b.add_states(b_State(4,set([0]))) b.start = 0 b.final = set([1,3,4]) b.add_transitions( (0,2,0) ) b.add_transitions( (0,0,1) ) b.add_transitions( (0,1,2) ) b.add_transitions( (0,3,3) ) b.add_transitions( (1,4,0) ) b.add_transitions( (2,2,4) ) b.add_transitions( (2,4,0) ) b.add_transitions( (3,4,0) ) b.add_transitions( (4,4,0) ) self.assertEqual(a.states.keys(), b.states.keys()) self.assertEqual(a.start, b.start) self.assertEqual(a.final, b.final) self.assertEqual(a.alphabet, b.alphabet) self.assertEqual(a.transitions, b.transitions) self.assertTrue(a.Flags["Delay DFA"])
def test__identify_fading_states(self): """_identify_fading_states(nfa_closure_states)""" history = HistoryFA() history._state_representation = [ set([0]), set([0,1]), set([0,2]), set([0,3]), set([0,4]), set([0,5]), set([0,6]), set([0,2,4]), set([0,2,5]), set([0,2,6]) ] self.assertTrue(history._identify_fading_states([2]) == [2, 7, 8, 9]) act = nfa_data() act.states[0] = b_State(0,set()) act.states[1] = b_State(1,set()) act.states[2] = b_State(2,set()) act.states[3] = b_State(3,set([0])) act.states[4] = b_State(4,set()) act.states[5] = b_State(5,set()) act.states[6] = b_State(6,set([1])) act.alphabet[0] = b_Sym_char("a", "a", 0) act.alphabet[1] = b_Sym_char("b", "b", 1) act.alphabet[2] = b_Sym_char("c", "c", 2) act.alphabet[3] = b_Sym_char("d", "d", 3) act.alphabet[4] = b_Sym_char("e", "e", 4) act.alphabet[5] = b_Sym_char("f", "f", 5) star = set() for ord_char in range(0, 256): star.add(chr(ord_char)) act.alphabet[6] = b_Sym_char_class("*", star, 6) mimo_a = set() for ord_char in range(0, 256): mimo_a.add(chr(ord_char)) mimo_a.remove('a') act.alphabet[7] = b_Sym_char_class("^a", mimo_a, 7) act.start = 0 act.final.add(3) act.final.add(6) act.transitions.add( (0, 6, 0) ) act.transitions.add( (0, 0, 1) ) act.transitions.add( (1, 1, 2) ) act.transitions.add( (2, 7, 2) ) act.transitions.add( (2, 2, 3) ) act.transitions.add( (0, 3, 4) ) act.transitions.add( (4, 4, 5) ) act.transitions.add( (5, 5, 6) ) history = HistoryFA() history._automaton = act history.remove_epsilons() NFA = history.get_automaton(True) history.determinise(create_table = True) nfa_closure_states = history._discover_closure_states(NFA) self.assertTrue(history._identify_fading_states(nfa_closure_states) == [5, 7, 8, 9])
def test_enable_fallback_state(self): """enable_fallback_state()""" # Test if fallback and fallback_state is set accordingly, _compute is # set to False and warning is/is not printed on stdout depending on # value of parameter warning. nfaData = nfa_data() nfaData.states[0] = b_State(0,set()) nfaData.states[1] = b_State(1,set()) nfaData.states[2] = b_State(2,set()) nfaData.states[3] = b_State(3,set([0])) nfaData.alphabet[0] = b_Sym_char("a", "a", 0) nfaData.alphabet[1] = b_Sym_char("b", "b", 1) nfaData.alphabet[2] = b_Sym_char("c", "c", 2) nfaData.start = 0 nfaData.transitions.add( (0,0,1) ) nfaData.transitions.add( (1,1,2) ) nfaData.transitions.add( (2,2,3) ) nfaData.final.add(3) aut = PHF_DFA() a = bdz() a.set_limit(128) aut.set_PHF_class(a) aut.create_from_nfa_data(nfaData) aut.compute() # redirect stdout to file tmp = sys.stdout f = open("stdout.output", 'w') sys.stdout = f aut.enable_fallback_state(2, warning=False) f.close() e = open("stdout.output", 'r') line = e.readline() # warning was set to False, stdout should be empty self.assertFalse(line) # check if the fallback_state was set self.assertEqual(aut.fallback_state, 2) self.assertFalse(aut.get_compute()) self.assertTrue(aut.fallback) f = open("stdout.output", 'w') sys.stdout = f aut.enable_fallback_state() f.close() e = open("stdout.output", 'r') line = e.readline() # warning should be printed by default self.assertTrue(line) # check if the fallback_state was chosen correctly self.assertEqual(aut.fallback_state, 1) self.assertFalse(aut.get_compute()) self.assertTrue(aut.fallback) # restore sys.stdout sys.stdout = tmp os.remove("stdout.output")
def _test_compute_3(self): """compute()""" """ Test with more patterns and one with blow up on start on RE """ hyfa = JHybridFA() parser = pcre_parser() hyfa.set_parser(parser) hyfa.load_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_compute_3.re") hyfa.compute() # self.get_compute() has to be True self.assertTrue(hyfa.get_compute()) # self.get_compute() has to be True self.assertTrue(hyfa.get_compute()) hd = hyfa.dfa.get_automaton(False) hn0 = hyfa.nfas[0].get_automaton(False) d = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_compute_3_dfa.nfa_data") n = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_compute_3_nfa0.nfa_data") # Test with more patterns and one with blow up on start on RE # test of DFA part self.assertEqual(hd.states.keys().sort(), d.states.keys().sort()) self.assertEqual(hd.alphabet, d.alphabet) self.assertEqual(hd.start, d.start) self.assertEqual(len(hd.final), 3) self.assertEqual(hd.transitions, d.transitions) self.assertTrue(hd.Flags['Hybrid FA - DFA part']) self.assertTrue(hd.Flags['Deterministic']) self.assertEqual(len(hyfa.nfas), 1) self.assertEqual(hyfa.tran_aut, {0: 8}) # test of NFA part #0 self.assertEqual(hn0.states.keys().sort(), n.states.keys().sort()) self.assertEqual(hn0.alphabet, n.alphabet) self.assertEqual(hn0.start, n.start) self.assertEqual(hn0.final, n.final) self.assertEqual(hn0.transitions, n.transitions) self.assertTrue(hn0.Flags['Hybrid FA - one NFA part'])
def _test_compute_2(self): """compute()""" """ Test with one regular expression, where computed automaton has one NFA tail """ hyfa = hybrid_fa() parser = pcre_parser() parser.set_text("/abcd/") hyfa.create_by_parser(parser) hyfa.set_special_min_depth(2) hyfa.set_max_head_size(-1) hyfa.set_max_tx(-1) hyfa.compute() # self.get_compute() has to be True self.assertTrue(hyfa.get_compute()) parser_dfa = pcre_parser() parser_dfa.set_text("/ab/") dfa = b_dfa() dfa.create_by_parser(parser_dfa) dfa.determinise() hd = hyfa.dfa.get_automaton() hn0 = hyfa.nfas[0].get_automaton() d = dfa.get_automaton() n = nfa_data().load_from_file( aux_func.getPatternMatchDir() + "/algorithms/hybrid_fa/tests_data/test_compute_2_nfa0.nfa_data") # test on automaton where is one NFA tail # test of DFA part self.assertEqual(hd.states.keys(), d.states.keys()) self.assertEqual(hd.alphabet, d.alphabet) self.assertEqual(hd.start, d.start) self.assertEqual(len(hd.final), 0) self.assertEqual(hd.transitions, d.transitions) self.assertTrue(hd.Flags['Hybrid FA - DFA part']) self.assertTrue(hd.Flags['Deterministic']) self.assertEqual(len(hyfa.nfas), 1) self.assertEqual(hyfa.tran_aut, {0: 2}) # test of NFA part #0 self.assertEqual(hn0.states.keys(), n.states.keys()) self.assertEqual(hn0.alphabet, n.alphabet) self.assertEqual(hn0.start, n.start) self.assertEqual(hn0.final, n.final) self.assertEqual(hn0.transitions, n.transitions) self.assertTrue(hn0.Flags['Hybrid FA - one NFA part'])
def test_validate_transition(self): """validate_transition()""" # Test correct transition validation for both faulty and non-faulty # transition table. nfaData = nfa_data() nfaData.states[0] = b_State(0,set()) nfaData.states[1] = b_State(1,set()) nfaData.states[2] = b_State(2,set()) nfaData.states[3] = b_State(3,set([0])) nfaData.alphabet[0] = b_Sym_char("a", "a", 0) nfaData.alphabet[1] = b_Sym_char("b", "b", 1) nfaData.alphabet[2] = b_Sym_char("c", "c", 2) nfaData.start = 0 nfaData.transitions.add( (0,0,1) ) nfaData.transitions.add( (1,1,2) ) nfaData.transitions.add( (2,2,3) ) nfaData.final.add(3) aut = PHF_DFA() a = bdz() a.set_limit(128) aut.set_PHF_class(a) aut.create_from_nfa_data(nfaData) aut.compute() for t in aut._automaton1.transitions: # all transitions must be valid self.assertTrue(aut.validate_transition(aut._transition_rep(t))) # some nonexistent transitions -> invalid t = (0,2,0) self.assertFalse(aut.validate_transition(aut._transition_rep(t))) t = (1,0,2) self.assertFalse(aut.validate_transition(aut._transition_rep(t))) t = (len(aut._automaton1.states), len(aut._automaton1.alphabet), 0) self.assertFalse(aut.validate_transition(aut._transition_rep(t))) t = (0, len(aut._automaton1.alphabet), 0) self.assertFalse(aut.validate_transition(aut._transition_rep(t))) t = (len(aut._automaton1.states), 0, 0) self.assertFalse(aut.validate_transition(aut._transition_rep(t))) # faulty transitions aut.enable_faulty_transitions(32) aut.compute() for t in aut._automaton1.transitions: # all transitions must be valid self.assertTrue(aut.validate_transition(aut._transition_rep(t))) # some nonexistent transitions -> invalid, collisions are improbable t = (0,2,0) self.assertFalse(aut.validate_transition(aut._transition_rep(t))) t = (1,0,2) self.assertFalse(aut.validate_transition(aut._transition_rep(t))) t = (10,10,1) self.assertFalse(aut.validate_transition(aut._transition_rep(t))) t = (11,11,1) self.assertFalse(aut.validate_transition(aut._transition_rep(t))) t = (12,12,1) self.assertFalse(aut.validate_transition(aut._transition_rep(t)))
def _test_compute_2(self): """compute()""" """ Test with one regular expression, where computed automaton has one NFA tail """ hyfa = hybrid_fa() parser = pcre_parser() parser.set_text("/abcd/") hyfa.create_by_parser(parser) hyfa.set_special_min_depth(2) hyfa.set_max_head_size(-1) hyfa.set_max_tx(-1) hyfa.compute() # self.get_compute() has to be True self.assertTrue(hyfa.get_compute()) parser_dfa = pcre_parser() parser_dfa.set_text("/ab/") dfa = b_dfa() dfa.create_by_parser(parser_dfa) dfa.determinise() hd = hyfa.dfa.get_automaton() hn0 = hyfa.nfas[0].get_automaton() d = dfa.get_automaton() n = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/hybrid_fa/tests_data/test_compute_2_nfa0.nfa_data") # test on automaton where is one NFA tail # test of DFA part self.assertEqual(hd.states.keys(), d.states.keys()) self.assertEqual(hd.alphabet, d.alphabet) self.assertEqual(hd.start, d.start) self.assertEqual(len(hd.final), 0) self.assertEqual(hd.transitions, d.transitions) self.assertTrue(hd.Flags['Hybrid FA - DFA part']) self.assertTrue(hd.Flags['Deterministic']) self.assertEqual(len(hyfa.nfas), 1) self.assertEqual(hyfa.tran_aut, {0: 2}) # test of NFA part #0 self.assertEqual(hn0.states.keys(), n.states.keys()) self.assertEqual(hn0.alphabet, n.alphabet) self.assertEqual(hn0.start, n.start) self.assertEqual(hn0.final, n.final) self.assertEqual(hn0.transitions, n.transitions) self.assertTrue(hn0.Flags['Hybrid FA - one NFA part'])
def test_report_memory_real(self): """report_memory_real()""" # Few simple regression tests for different sizes of PHF table, state # and symbol representations and faulty transitions. nfaData = nfa_data() nfaData.states[0] = b_State(0,set()) nfaData.states[1] = b_State(1,set()) nfaData.states[2] = b_State(2,set()) nfaData.states[3] = b_State(3,set([0])) nfaData.alphabet[0] = b_Sym_char("a", "a", 0) nfaData.alphabet[1] = b_Sym_char("b", "b", 1) nfaData.alphabet[2] = b_Sym_char("c", "c", 2) nfaData.start = 0 nfaData.transitions.add( (0,0,1) ) nfaData.transitions.add( (1,1,2) ) nfaData.transitions.add( (2,2,3) ) nfaData.final.add(3) aut = PHF_DFA() a = bdz() a.set_limit(8) aut.set_PHF_class(a) aut.create_from_nfa_data(nfaData) aut.compute() self.assertEqual(aut.report_memory_real(), 120) aut.set_table_parameters((4,6)) self.assertEqual(aut.report_memory_real(), 48) aut.set_table_parameters((4,7)) self.assertEqual(aut.report_memory_real(), 72) a.set_limit(5) aut.set_PHF_class(a) aut.compute() self.assertEqual(aut.report_memory_real(), 45) aut.enable_faulty_transitions(10) self.assertEqual(aut.report_memory_real(), 30) aut.enable_faulty_transitions(19) self.assertEqual(aut.report_memory_real(), 60)
def test__discover_closure_states(self): """_discover_closure_states(NFA)""" act = nfa_data() act.states[0] = b_State(0,set()) act.states[1] = b_State(1,set()) act.states[2] = b_State(2,set()) act.states[3] = b_State(3,set([0])) act.states[4] = b_State(4,set()) act.states[5] = b_State(5,set()) act.states[6] = b_State(6,set([1])) act.alphabet[0] = b_Sym_char("a", "a", 0) act.alphabet[1] = b_Sym_char("b", "b", 1) act.alphabet[2] = b_Sym_char("c", "c", 2) act.alphabet[3] = b_Sym_char("d", "d", 3) act.alphabet[4] = b_Sym_char("e", "e", 4) act.alphabet[5] = b_Sym_char("f", "f", 5) star = set() for ord_char in range(0, 256): star.add(chr(ord_char)) act.alphabet[6] = b_Sym_char_class("*", star, 6) mimo_a = set() for ord_char in range(0, 256): mimo_a.add(chr(ord_char)) mimo_a.remove('a') act.alphabet[7] = b_Sym_char_class("^a", mimo_a, 7) act.start = 0 act.final.add(3) act.final.add(6) act.transitions.add( (0, 6, 0) ) act.transitions.add( (0, 0, 1) ) act.transitions.add( (1, 1, 2) ) act.transitions.add( (2, 7, 2) ) act.transitions.add( (2, 2, 3) ) act.transitions.add( (0, 3, 4) ) act.transitions.add( (4, 4, 5) ) act.transitions.add( (5, 5, 6) ) history = HistoryFA() history._automaton = act history.remove_epsilons() NFA = history.get_automaton(True) self.assertTrue(history._discover_closure_states(NFA) == [2])
def test_get_default_trans_num(self): """get_default_trans_num""" #Tests with regular expressions from test_compute delay_dfa1 = DELAY_DFA() parser = pcre_parser() parser.set_text("/^abcd/") delay_dfa1.create_by_parser(parser) delay_dfa1.compute() self.assertTrue(delay_dfa1.get_compute()) delay_dfa2 = DELAY_DFA() parser = pcre_parser() parser.set_text("/^(a|b)+/") delay_dfa2.create_by_parser(parser) delay_dfa2.compute() self.assertTrue(delay_dfa2.get_compute()) delay_dfa3 = DELAY_DFA() # Get test directory tdir = aux_func.getPatternMatchDir() + "/algorithms/delay_dfa/" nfaData = nfa_data().load_from_file(tdir + "test_data/text_ddfa.nfa_data") delay_dfa3.create_from_nfa_data(nfaData) delay_dfa3.determinise() delay_dfa3.compute(False) self.assertTrue(delay_dfa3.get_compute()) self.assertEqual(delay_dfa1.get_default_trans_num(), 0) self.assertEqual(delay_dfa2.get_default_trans_num(), 1) self.assertEqual(delay_dfa3.get_default_trans_num(), 4)
def test_get_default_trans_num(self): """get_default_trans_num""" #Tests with regular expressions from test_compute delay_dfa1 = DELAY_DFA() parser = pcre_parser() parser.set_text("/^abcd/") delay_dfa1.create_by_parser(parser) delay_dfa1.compute() self.assertTrue(delay_dfa1.get_compute()) delay_dfa2 = DELAY_DFA() parser = pcre_parser() parser.set_text("/^(a|b)+/") delay_dfa2.create_by_parser(parser) delay_dfa2.compute() self.assertTrue(delay_dfa2.get_compute()) delay_dfa3 = DELAY_DFA() # Get test directory tdir = aux_func.getPatternMatchDir() + "/algorithms/delay_dfa/" nfaData = nfa_data().load_from_file(tdir + "test_data/text_ddfa.nfa_data") delay_dfa3.create_from_nfa_data(nfaData) delay_dfa3.determinise() delay_dfa3.compute(False) self.assertTrue(delay_dfa3.get_compute()) self.assertEqual(delay_dfa1.get_default_trans_num(),0) self.assertEqual(delay_dfa2.get_default_trans_num(),1) self.assertEqual(delay_dfa3.get_default_trans_num(),4)
def test__replace_length_restriction_with_a_closure(self): """_replace_length_restriction_with_a_closure(NFA)""" # /ab.{4}cd /; test with an expression that contains .{4} par = pcre_parser(create_cnt_constr = True) par.set_text("/ab.{4}cd/") history = HistoryCountingFA() history.create_by_parser(par) history.remove_epsilons() NFA = history.get_automaton(True) NFA_without_cnt = history._replace_length_restriction_with_a_closure(NFA) copy = NFA_without_cnt result = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/history_counting_fa/test_data/test_data_1.nfa_data") self.assertTrue(history.flags_cnt == {4: "4"}) self.assertTrue(sorted(copy.states.keys()) == sorted(result.states.keys())) self.assertTrue(copy.alphabet == result.alphabet) self.assertTrue(copy.start == result.start) self.assertTrue(copy.final == result.final) self.assertTrue(copy.transitions == result.transitions) self.assertTrue(copy.Flags == result.Flags)
def test_remove_fallback_transitions(self): """remove_fallback_transitions()""" # 1. /abc/, state -1 (automatically chosen 0) - 4 transitions removed nfaData = nfa_data() nfaData.states[0] = b_State(0,set()) nfaData.states[1] = b_State(1,set()) nfaData.states[2] = b_State(2,set()) nfaData.states[3] = b_State(3,set([0])) nfaData.alphabet[0] = b_Sym_char("a", "a", 0) nfaData.alphabet[1] = b_Sym_char("b", "b", 1) nfaData.alphabet[2] = b_Sym_char("c", "c", 2) nfaData.start = 0 nfaData.transitions.add( (0,0,1) ) nfaData.transitions.add( (0,1,0) ) nfaData.transitions.add( (0,2,0) ) nfaData.transitions.add( (1,1,2) ) nfaData.transitions.add( (1,0,1) ) nfaData.transitions.add( (1,2,0) ) nfaData.transitions.add( (2,2,3) ) nfaData.transitions.add( (2,0,1) ) nfaData.transitions.add( (2,1,0) ) nfaData.transitions.add( (3,0,3) ) nfaData.transitions.add( (3,1,3) ) nfaData.transitions.add( (3,2,3) ) nfaData.final.add(3) result = copy.deepcopy(nfaData) aut = PHF_DFA() a = bdz() a.set_limit(128) aut.set_PHF_class(a) aut.create_from_nfa_data(nfaData) aut.enable_fallback_state(warning=False) aut.compute() cp = aut._automaton1 self.assertEqual(len(cp.states), len(result.states)) self.assertEqual(len(cp.alphabet), len(result.alphabet)) self.assertEqual(len(cp.transitions), 8) # 4 removed transitions for i in cp.transitions: # no transitions to fallback_state self.assertNotEqual(i[2], aut.fallback_state) self.assertEqual(len(cp.final), len(result.final)) # 2. /abc/, state 1 - 3 transitions removed aut._automaton1 = aut._automaton aut.enable_fallback_state(1, False) aut.compute() cp = aut._automaton1 self.assertEqual(len(cp.states), len(result.states)) self.assertEqual(len(cp.alphabet), len(result.alphabet)) self.assertEqual(len(cp.transitions), 9) # 3 removed transitions for i in cp.transitions: # no transitions to fallback_state self.assertNotEqual(i[2], aut.fallback_state) self.assertEqual(len(cp.final), len(result.final)) # 3. /^abc/, state 0 - automaton does not change nfaData = nfa_data() nfaData.states[0] = b_State(0,set()) nfaData.states[1] = b_State(1,set()) nfaData.states[2] = b_State(2,set()) nfaData.states[3] = b_State(3,set([0])) nfaData.alphabet[0] = b_Sym_char("a", "a", 0) nfaData.alphabet[1] = b_Sym_char("b", "b", 1) nfaData.alphabet[2] = b_Sym_char("c", "c", 2) nfaData.start = 0 nfaData.transitions.add( (0,0,1) ) nfaData.transitions.add( (1,1,2) ) nfaData.transitions.add( (2,2,3) ) nfaData.final.add(3) result = copy.deepcopy(nfaData) aut = PHF_DFA() a = bdz() a.set_limit(128) aut.set_PHF_class(a) aut.create_from_nfa_data(nfaData) aut.enable_fallback_state(0, warning=False) aut.compute() cp = aut._automaton1 self.assertEqual(len(cp.states), len(result.states)) self.assertEqual(len(cp.alphabet), len(result.alphabet)) self.assertEqual(len(cp.transitions), len(result.transitions)) for i in cp.transitions: # no transitions to fallback_state self.assertNotEqual(i[2], aut.fallback_state) self.assertEqual(len(cp.final), len(result.final))
def _test_compute_4(self): """compute()""" """ Test with more regular expressions, where computed automaton has has some NFA tails """ hyfa = hybrid_fa() parser = pcre_parser() parser.load_file( aux_func.getPatternMatchDir() + "/algorithms/hybrid_fa/tests_data/test_compute_4_pattern.re") hyfa.create_by_parser(parser) hyfa.set_special_min_depth(2) hyfa.set_max_head_size(-1) hyfa.set_max_tx(-1) hyfa.compute() # self.get_compute() has to be True self.assertTrue(hyfa.get_compute()) hd = hyfa.dfa.get_automaton() hn0 = hyfa.nfas[0].get_automaton() hn1 = hyfa.nfas[1].get_automaton() d = nfa_data().load_from_file( aux_func.getPatternMatchDir() + "/algorithms/hybrid_fa/tests_data/test_compute_4_dfa.nfa_data") n0 = nfa_data().load_from_file( aux_func.getPatternMatchDir() + "/algorithms/hybrid_fa/tests_data/test_compute_4_nfa0.nfa_data") n1 = nfa_data().load_from_file( aux_func.getPatternMatchDir() + "/algorithms/hybrid_fa/tests_data/test_compute_4_nfa1.nfa_data") # test of DFA part self.assertEqual(hd.states.keys(), d.states.keys()) self.assertEqual(hd.alphabet, d.alphabet) self.assertEqual(hd.start, d.start) self.assertTrue(len(hd.final) == 0) self.assertEqual(hd.transitions, d.transitions) self.assertTrue(hd.Flags['Hybrid FA - DFA part']) self.assertTrue(hd.Flags['Deterministic']) # two NFA tails self.assertEqual(len(hyfa.nfas), 2) self.assertEqual(hyfa.tran_aut, {0: 4, 1: 5}) # test of NFA part #0 self.assertEqual(hn0.states.keys(), n0.states.keys()) self.assertEqual(hn0.alphabet, n0.alphabet) self.assertEqual(hn0.start, n0.start) self.assertEqual(hn0.final, n0.final) self.assertEqual(hn0.transitions, n0.transitions) self.assertTrue(hn0.Flags['Hybrid FA - one NFA part']) # test of NFA part #1 self.assertEqual(hn1.states.keys(), n1.states.keys()) self.assertEqual(hn1.alphabet, n1.alphabet) self.assertEqual(hn1.start, n1.start) self.assertEqual(hn1.final, n1.final) self.assertEqual(hn1.transitions, n1.transitions) self.assertTrue(hn1.Flags['Hybrid FA - one NFA part'])
def test_compute(self): """compute()""" # Check the correctness of the logical machine output over # self.assertTrue on individual items + focus on the properties # of HistoryCountingFA (transitions, flags, counters) # /abcd/; test with an expression that does not use properties # of HistoryCountingFA par = pcre_parser(create_cnt_constr=True) par.set_text("/abcd/") history = HistoryCountingFA() history.create_by_parser(par) history.remove_epsilons() NFA = history.get_automaton(True) history.determinise(create_table=True) history.compute(NFA) copy = history.get_automaton() result = nfa_data().load_from_file( aux_func.getPatternMatchDir() + "/algorithms/history_counting_fa/test_data/test_data_2.nfa_data") self.assertTrue( sorted(copy.states.keys()) == sorted(result.states.keys())) self.assertTrue(copy.alphabet == result.alphabet) self.assertTrue(copy.start == result.start) self.assertTrue(copy.final == result.final) self.assertTrue(copy.transitions == result.transitions) # /ab.{3}cd /; test with an expression that contains .{X} par = pcre_parser(create_cnt_constr=True) par.set_text("/ab.{3}cd/") history = HistoryCountingFA() history.create_by_parser(par) history.remove_epsilons() NFA = history.get_automaton(True) NFA_without_cnt = \ history._replace_length_restriction_with_a_closure(NFA) NFA = history.get_automaton(True) history._automaton = NFA_without_cnt history.determinise(create_table=True) history.compute(NFA) copy = history.get_automaton() result = nfa_data().load_from_file( aux_func.getPatternMatchDir() + "/algorithms/history_counting_fa/test_data/test_data_3.nfa_data") self.assertTrue( sorted(copy.states.keys()) == sorted(result.states.keys())) self.assertTrue(copy.alphabet == result.alphabet) self.assertTrue(copy.start == result.start) self.assertTrue(copy.final == result.final) self.assertTrue(copy.transitions == result.transitions) # /ab[^1234]{3}cd|efg/; test with an expression containing one # alternation [^1234]{3}, the second is not par = pcre_parser(create_cnt_constr=True) par.set_text("/ab[^1234]{3}cd|efg/") history = HistoryCountingFA() history.create_by_parser(par) history.remove_epsilons() NFA = history.get_automaton(True) NFA_without_cnt = \ history._replace_length_restriction_with_a_closure(NFA) NFA = history.get_automaton(True) history._automaton = NFA_without_cnt history.determinise(create_table=True) history.compute(NFA) copy = history.get_automaton() result = nfa_data().load_from_file( aux_func.getPatternMatchDir() + "/algorithms/history_counting_fa/test_data/test_data_4.nfa_data") self.assertTrue( sorted(copy.states.keys()) == sorted(result.states.keys())) self.assertTrue(copy.alphabet == result.alphabet) self.assertTrue(copy.start == result.start) self.assertTrue(copy.final == result.final) self.assertTrue(copy.transitions == result.transitions)
def test_compute(self): """compute()""" # 1. /^abc/ - automaton does not change, PHF table is created nfaData = nfa_data() nfaData.states[0] = b_State(0,set()) nfaData.states[1] = b_State(1,set()) nfaData.states[2] = b_State(2,set()) nfaData.states[3] = b_State(3,set([0])) nfaData.alphabet[0] = b_Sym_char("a", "a", 0) nfaData.alphabet[1] = b_Sym_char("b", "b", 1) nfaData.alphabet[2] = b_Sym_char("c", "c", 2) nfaData.start = 0 nfaData.transitions.add( (0,0,1) ) nfaData.transitions.add( (1,1,2) ) nfaData.transitions.add( (2,2,3) ) nfaData.final.add(3) result = copy.deepcopy(nfaData) aut = PHF_DFA() a = bdz() a.set_limit(128) aut.set_PHF_class(a) aut.create_from_nfa_data(nfaData) aut.compute() cp = aut._automaton1 self.assertEqual(len(cp.states), len(result.states)) self.assertEqual(len(cp.alphabet), len(result.alphabet)) self.assertEqual(len(cp.transitions), len(result.transitions)) self.assertEqual(len(cp.final), len(result.final)) self.assertNotEqual(aut.trans_table, None) self.assertTrue(aut.get_compute()) # 2. determinization of /^ab|ac/, PHF table is created nfaData = nfa_data() nfaData.states[0] = b_State(0,set()) nfaData.states[1] = b_State(1,set()) nfaData.states[2] = b_State(2,set([0])) nfaData.states[3] = b_State(3,set()) nfaData.states[4] = b_State(4,set([0])) nfaData.alphabet[0] = b_Sym_char("a", "a", 0) nfaData.alphabet[1] = b_Sym_char("b", "b", 1) nfaData.alphabet[2] = b_Sym_char("c", "c", 2) nfaData.start = 0 nfaData.transitions.add( (0,0,1) ) nfaData.transitions.add( (1,1,2) ) nfaData.transitions.add( (0,0,3) ) nfaData.transitions.add( (3,2,4) ) nfaData.final.add(2) nfaData.final.add(4) aut = PHF_DFA() a = bdz() a.set_limit(128) aut.set_PHF_class(a) aut.create_from_nfa_data(nfaData) aut.compute() cp = aut._automaton1 self.assertEqual(len(cp.states), 3) self.assertEqual(len(cp.alphabet), 3) self.assertEqual(len(cp.transitions), 3) self.assertEqual(len(cp.final), 1) self.assertNotEqual(aut.trans_table, None) self.assertTrue(aut.get_compute()) # 3. resolve alphabet - /^[a-c][b-d]/, PHF table is created nfaData = nfa_data() nfaData.states[0] = b_State(0,set()) nfaData.states[1] = b_State(1,set()) nfaData.states[2] = b_State(2,set([0])) nfaData.alphabet[0] = b_Sym_char_class("ch0", set(['a', 'b', 'c']), 0) nfaData.alphabet[1] = b_Sym_char_class("ch1", set(['b', 'c', 'd']), 1) nfaData.start = 0 nfaData.transitions.add( (0,0,1) ) nfaData.transitions.add( (1,1,2) ) nfaData.final.add(2) aut = PHF_DFA() a = bdz() a.set_limit(128) aut.set_PHF_class(a) aut.create_from_nfa_data(nfaData) aut.compute() cp = aut._automaton1 self.assertEqual(len(cp.states), 3) self.assertEqual(len(cp.alphabet), 3) self.assertEqual(len(cp.transitions), 4) self.assertEqual(len(cp.final), 1) self.assertNotEqual(aut.trans_table, None) self.assertTrue(aut.get_compute()) # 4. /abc/ and enable_fallback_state - some transitions are removed nfaData = nfa_data() nfaData.states[0] = b_State(0,set()) nfaData.states[1] = b_State(1,set()) nfaData.states[2] = b_State(2,set()) nfaData.states[3] = b_State(3,set([0])) nfaData.alphabet[0] = b_Sym_char("a", "a", 0) nfaData.alphabet[1] = b_Sym_char("b", "b", 1) nfaData.alphabet[2] = b_Sym_char("c", "c", 2) nfaData.start = 0 nfaData.transitions.add( (0,0,1) ) nfaData.transitions.add( (0,1,0) ) nfaData.transitions.add( (0,2,0) ) nfaData.transitions.add( (1,1,2) ) nfaData.transitions.add( (1,0,1) ) nfaData.transitions.add( (1,2,0) ) nfaData.transitions.add( (2,2,3) ) nfaData.transitions.add( (2,0,1) ) nfaData.transitions.add( (2,1,0) ) nfaData.transitions.add( (3,0,3) ) nfaData.transitions.add( (3,1,3) ) nfaData.transitions.add( (3,2,3) ) nfaData.final.add(3) result = copy.deepcopy(nfaData) aut = PHF_DFA() a = bdz() a.set_limit(128) aut.set_PHF_class(a) aut.create_from_nfa_data(nfaData) aut.enable_fallback_state(warning=False) aut.compute() cp = aut._automaton1 self.assertEqual(len(cp.states), len(result.states)) self.assertEqual(len(cp.alphabet), len(result.alphabet)) self.assertTrue(len(cp.transitions) < len(result.transitions)) self.assertEqual(len(cp.final), len(result.final)) self.assertNotEqual(aut.trans_table, None) self.assertTrue(aut.get_compute())
def test(): """ Run searching using pcregrep and PHF_DFA. Prints out the results. """ # parse options usage = "usage: %prog rules.pcre pcap_dir/ [options]" optparser = OptionParser(usage=usage) optparser.add_option("-O", "--outputfile", dest="resultfile", help="output file for results, default is stdout") optparser.add_option("-P", "--PerPacket", dest="PerPacket", action="store_true", default=False, help="compare nonfaulty matching for flows and packets, faulty algorithm is used only with flows") optparser.add_option("-s", "--showprogress", dest="progress", action="store_true", default=False, help="show progress of computation") optparser.add_option("-C", "--count", dest="maxiter", type="int", default="1", help="number of test iterations") optparser.add_option("-F", "--faulty", dest="FAULTY", type="int", default="0", help="number of bits for compress hash, default is 0 (no faulty transitions)") optparser.add_option("-D", "--debuglevel", dest="DEBUG", type="int", default="0", help="debug output level (0-2)") optparser.add_option("-S", "--savefile", dest="savefile", default="", metavar="FILE", help="save nfa_data in FILE") optparser.add_option("-L", "--loadfile", dest="autfile", default="", metavar="FILE", help="load nfa_data from FILE") optparser.add_option("-N", "--nonfaulty", dest="NonFaulty", action="store_true", default=False, help="try to generate PHF table without collisions, therefore ensure nonfaulty matching. Experimental code. " "May take a long time with small compress hash output.") (options, args) = optparser.parse_args() global FAULTY, DEBUG if len(args) != 2: print "You must specify rules.pcre and pcap_dir/" optparser.print_usage() exit(1) rulesfile, inputdir = args PerPacket, resultfile, maxiter, autfile, savefile, FAULTY, DEBUG = options.PerPacket, options.resultfile, options.maxiter, options.autfile, options.savefile, options.FAULTY, options.DEBUG progress = options.progress NonFaulty = options.NonFaulty if inputdir[-1] == "/": inputdir = inputdir[:-1] # remove '/' from the end rules = open(rulesfile, 'rb') if PerPacket: packetdir = inputdir + "/packets" inputdir = inputdir + "/flows" if resultfile: sys.stdout = open(resultfile, 'a') totalhits, totalfp, totalfn = (0, 0, 0) iter = 0 while iter != maxiter: if progress: print >>sys.stderr, "\r", ' '*80, '\r',"pcregrep", if not iter: # prepare pcregrep p = subprocess.Popen("cd pcre-8.20/ && make pcregrep", shell=True, stdout=subprocess.PIPE) p.wait() results = dict() file_list = list() rule_count = len(open(rulesfile).readlines()) for root, dirs, files in os.walk(inputdir): for i in files: i = os.path.join(root, i) file_list.append(i) if not iter: results[i] = [rule_count*[0],rule_count*[0],rule_count*[0]] else: results[i][0] = rule_count*[0] #results = init_results rule_num = 0 grep_reg_exp = "grep_reg_exp." + str(os.getpid()) for rule in rules: if not iter: if DEBUG: print rule, (grep_rule, grep_params) = parse_rule(rule) f = open(grep_reg_exp, 'w') f.write(grep_rule) f.close() p = subprocess.Popen("pcre-8.20/pcregrep --buffer-size 50000 --color=auto -N ANYCRLF" + grep_params + " -r -l -f " + grep_reg_exp + " " + inputdir, shell=True, stdout=subprocess.PIPE) p.wait() for out in p.stdout: item = out.split()[0] results[item][1][rule_num] = 1 if PerPacket: p = subprocess.Popen("pcre-8.20/pcregrep --buffer-size 50000 --color=auto -N ANYCRLF" + grep_params + " -r -l -f " + grep_reg_exp + " " + packetdir, shell=True, stdout=subprocess.PIPE) p.wait() for out in p.stdout: item = inputdir + "/" + out.split()[0].split("-")[1].replace("_", "/") results[item][2][rule_num] = 1 rule_num += 1 try: os.remove(grep_reg_exp) except: pass if progress: print >>sys.stderr, "\r", ' '*80, '\r', "create automaton", #aut = b_Automaton() aut = PHF_DFA() if autfile: aut.create_from_nfa_data(nfa_data().load_from_file(autfile)) else: par = parser("pcre_parser") #par.set_text(rule) par.load_file(rulesfile) aut.create_by_parser(par) if DEBUG: aut.show("NFA.dot") #aut.remove_epsilons() if progress: print >>sys.stderr, "\r", ' '*80, '\r', "resolve alphabet", aut.resolve_alphabet() if progress: print >>sys.stderr, "\r", ' '*80, '\r', "determinise", aut.determinise() if progress: print >>sys.stderr, "\r", ' '*80, '\r', "minimise", aut.minimise() if DEBUG: aut.show("DFA.dot") if savefile: aut._automaton.save_to_file(savefile) aut._automaton1 = aut._automaton aut.set_table_parameters((20,10)) if DEBUG > 1: print "Without fallback state:" print "Symbols:", len(aut._automaton.alphabet) print "States:", len(aut._automaton.states) print "Transitions:", aut.get_trans_num(), float(aut.get_trans_num()) / (aut.get_state_num() * aut.get_alpha_num()) * 100, "%" if isinstance(aut, PHF_DFA): if progress: print >>sys.stderr, "\r", ' '*80, '\r', "generate PHF", if aut.get_trans_num() == (aut.get_state_num() * aut.get_alpha_num()): aut.enable_fallback_state(warning=False) if FAULTY: aut.enable_faulty_transitions(FAULTY) if NonFaulty: aut.enable_faulty_check() aut.compute() if DEBUG: print "Fallback state:", aut.fallback_state print "Symbols:", len(aut._automaton.alphabet) print "States:", len(aut._automaton.states) print "Transitions:", aut.get_trans_num(), float(aut.get_trans_num()) / (aut.get_state_num() * aut.get_alpha_num()) * 100, "%" count = 1 all = len(file_list) if progress: print >> sys.stderr, '\r' + 80*' ' + '\r', for f in file_list: # progress if progress: print >> sys.stderr, '\r', print >> sys.stderr, str(iter+1)+'/'+str(maxiter)+ ":", count, '/', all, # sys.stderr.flush() count += 1 data = open(f, 'rb').read() results[f][0] = aut.search(data) if progress: print >>sys.stderr, "\r", ' '*80, '\r', "compare results", if isinstance(aut, PHF_DFA) and DEBUG: if DEBUG > 1: print "List of collisions:" print aut.collisions for tran, i in aut.collisions.iteritems(): #print tran, i print BitArray(bytes=tran[0], length=aut.symbol_bits).uint, BitArray(bytes=tran[1], length=aut.state_bits).uint, i print "SYM:", aut._automaton.alphabet[BitArray(bytes=tran[0], length=aut.symbol_bits).uint] print "Bad transitions:", aut.bad_transitions print "Collisions:", len(aut.collisions) print "Compress bits:", aut.compress_bits stats = compare_results(results) stats = list(stats) if stats[0] == 0: print "Zero hits, cannot compute F-measure!" stats[0] = 1 if DEBUG: print "Total number of searched packets/flows:", stats[3] print "Hits:", stats[0] totalhits += stats[0] totalfp += stats[1] totalfn += stats[2] precis = float(stats[0])/(stats[1]+stats[0]) recall = float(stats[0])/(stats[0]+stats[2]) fmeas = 2* precis * recall / (precis + recall) print "False positives:", stats[1], precis*100, "%" print "False negatives:", stats[2], recall*100, "%" print "F-measure:", fmeas*100, "%" if PerPacket: print "Per packet errors:", stats[4], stats[5] print '-'*80 iter += 1 print "Total stats:" precis = float(totalhits)/(totalfp + totalhits) recall = float(totalhits)/(totalfn + totalhits) fmeas = 2* precis * recall / (precis + recall) print "Hits:", totalhits print "False positives:", totalfp, precis*100, "%" print "False negatives:", totalfn, recall*100, "%" print "F-measure:", fmeas*100, "%" print "_"*80
def test_compute(self): """compute()""" # Check the correctness of the logical machine output over # self.assertTrue on individual items + focus on the properties # of HistoryCountingFA (transitions, flags, counters) # /abcd/; test with an expression that does not use properties # of HistoryCountingFA par = pcre_parser(create_cnt_constr = True) par.set_text("/abcd/") history = HistoryCountingFA() history.create_by_parser(par) history.remove_epsilons() NFA = history.get_automaton(True) history.determinise(create_table = True) history.compute(NFA) copy = history.get_automaton() result = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/history_counting_fa/test_data/test_data_2.nfa_data") self.assertTrue(sorted(copy.states.keys()) == sorted(result.states.keys())) self.assertTrue(copy.alphabet == result.alphabet) self.assertTrue(copy.start == result.start) self.assertTrue(copy.final == result.final) self.assertTrue(copy.transitions == result.transitions) # /ab.{3}cd /; test with an expression that contains .{X} par = pcre_parser(create_cnt_constr = True) par.set_text("/ab.{3}cd/") history = HistoryCountingFA() history.create_by_parser(par) history.remove_epsilons() NFA = history.get_automaton(True) NFA_without_cnt = \ history._replace_length_restriction_with_a_closure(NFA) NFA = history.get_automaton(True) history._automaton = NFA_without_cnt history.determinise(create_table = True) history.compute(NFA) copy = history.get_automaton() result = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/history_counting_fa/test_data/test_data_3.nfa_data") self.assertTrue(sorted(copy.states.keys()) == sorted(result.states.keys())) self.assertTrue(copy.alphabet == result.alphabet) self.assertTrue(copy.start == result.start) self.assertTrue(copy.final == result.final) self.assertTrue(copy.transitions == result.transitions) # /ab[^1234]{3}cd|efg/; test with an expression containing one # alternation [^1234]{3}, the second is not par = pcre_parser(create_cnt_constr = True) par.set_text("/ab[^1234]{3}cd|efg/") history = HistoryCountingFA() history.create_by_parser(par) history.remove_epsilons() NFA = history.get_automaton(True) NFA_without_cnt = \ history._replace_length_restriction_with_a_closure(NFA) NFA = history.get_automaton(True) history._automaton = NFA_without_cnt history.determinise(create_table = True) history.compute(NFA) copy = history.get_automaton() result = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/history_counting_fa/test_data/test_data_4.nfa_data") self.assertTrue(sorted(copy.states.keys()) == sorted(result.states.keys())) self.assertTrue(copy.alphabet == result.alphabet) self.assertTrue(copy.start == result.start) self.assertTrue(copy.final == result.final) self.assertTrue(copy.transitions == result.transitions)
N_Automaton.get_automaton().Show("test_NFA.dot") print("Automata joined") D_Automaton = PHF_DFA() D_Automaton.create_from_nfa_data(N_Automaton.get_automaton()) print("Determinising...") D_Automaton.determinise(states_limit = 10000) print("Minimising...") D_Automaton.minimise() #Co to vlastne vypisuje? D_Automaton.get_automaton().Show("test_min_dfa.dot") D_Automaton.get_automaton().SaveToFile("temp_automaton") print("striding...") D_Automaton.reduce_alphabet() D_Automaton.stride_2() D_Automaton.get_automaton().Show("test_multi_dfa.dot") print("generating PHF...") D_Automaton.generate_PHF_table() D_Automaton1 = PHF_DFA() Temp = nfa_data() Temp = Temp.LoadFromFile("temp_automaton") D_Automaton1.create_from_nfa_data(Temp) D_Automaton1.reduce_alphabet() D_Automaton1.stride_2() D_Automaton1.get_automaton().Show("test_multi_dfa.dot") print("generating PHF...") D_Automaton1.generate_PHF_table()
def test_get_nfa(self): """get_nfa()""" # If attribute _position < 0, check returning None. parser = pcre_parser() self.assertTrue(parser._position < 0) self.assertTrue(parser.get_nfa() == None) # Try method on a few regular expressions. # The results obtained compare with the manually completed machines. # (Recommend to compare after the elimination of epsilon transition) # 1) concatenation parser = pcre_parser() parser.set_text("/first/") automat = b_Automaton() automat._automaton = parser.get_nfa() automat.remove_epsilons() cp = automat.get_automaton() result = nfa_data().load_from_file( "test_data/(1)pcre_get_nfa.nfa_data") self.assertTrue( sorted(cp.states.keys()) == sorted(result.states.keys())) self.assertTrue(cp.alphabet == result.alphabet) self.assertTrue(cp.start == result.start) self.assertTrue(cp.final == result.final) self.assertTrue(cp.transitions == result.transitions) self.assertTrue(cp.Flags == result.Flags) # 2) branch (automat create char class), iteration * parser = pcre_parser() parser.set_text("/[ab]cd*/") automat = b_Automaton() automat._automaton = parser.get_nfa() automat.remove_epsilons() cp = automat.get_automaton() result = nfa_data().load_from_file( "test_data/(2)pcre_get_nfa.nfa_data") self.assertTrue( sorted(cp.states.keys()) == sorted(result.states.keys())) self.assertTrue(cp.alphabet == result.alphabet) self.assertTrue(cp.start == result.start) self.assertTrue(cp.final == result.final) self.assertTrue(cp.transitions == result.transitions) self.assertTrue(cp.Flags == result.Flags) # 3) try second RE (move to next line) parser = pcre_parser() parser.set_text("/abc/\n/ABC/\n") parser.next_line() automat = b_Automaton() automat._automaton = parser.get_nfa() automat.remove_epsilons() cp = automat.get_automaton() result = nfa_data().load_from_file( "test_data/(3)pcre_get_nfa.nfa_data") self.assertTrue( sorted(cp.states.keys()) == sorted(result.states.keys())) self.assertTrue(cp.alphabet == result.alphabet) self.assertTrue(cp.start == result.start) self.assertTrue(cp.final == result.final) self.assertTrue(cp.transitions == result.transitions) self.assertTrue(cp.Flags == result.Flags) # 4) basic counting constratin parser = pcre_parser() parser.set_text("/ab{5}c/") automat = b_Automaton() automat._automaton = parser.get_nfa() automat.remove_epsilons() cp = automat.get_automaton() result = nfa_data().load_from_file( "test_data/(4)pcre_get_nfa.nfa_data") self.assertTrue( sorted(cp.states.keys()) == sorted(result.states.keys())) self.assertTrue(cp.alphabet == result.alphabet) self.assertTrue(cp.start == result.start) self.assertTrue(cp.final == result.final) self.assertTrue(cp.transitions == result.transitions) self.assertTrue(cp.Flags == result.Flags) # 5) branch, iteration +, harder counting constraint parser = pcre_parser() parser.set_text("/a[bc]+d{2,3}/") automat = b_Automaton() automat._automaton = parser.get_nfa() automat.remove_epsilons() cp = automat.get_automaton() result = nfa_data().load_from_file( "test_data/(5)pcre_get_nfa.nfa_data") self.assertTrue( sorted(cp.states.keys()) == sorted(result.states.keys())) self.assertTrue(cp.alphabet == result.alphabet) self.assertTrue(cp.start == result.start) self.assertTrue(cp.final == result.final) self.assertTrue(cp.transitions == result.transitions) self.assertTrue(cp.Flags == result.Flags) # 6) basic counting constratin, use param create_cnt_constr = True parser = pcre_parser(create_cnt_constr=True) parser.set_text("/ab{5}c/") automat = b_Automaton() automat._automaton = parser.get_nfa() automat.remove_epsilons() cp = automat.get_automaton() result = nfa_data().load_from_file( "test_data/(6)pcre_get_nfa.nfa_data") self.assertTrue( sorted(cp.states.keys()) == sorted(result.states.keys())) self.assertTrue(cp.alphabet == result.alphabet) self.assertTrue(cp.start == result.start) self.assertTrue(cp.final == result.final) self.assertTrue(cp.transitions == result.transitions) self.assertTrue(cp.Flags == result.Flags) # 7) branch, iteration +, harder counting constraint, # use param create_cnt_constr = True parser = pcre_parser(create_cnt_constr=True) parser.set_text("/a[bc]+d{2,3}/") automat = b_Automaton() automat._automaton = parser.get_nfa() automat.remove_epsilons() cp = automat.get_automaton() result = nfa_data().load_from_file( "test_data/(7)pcre_get_nfa.nfa_data") self.assertTrue( sorted(cp.states.keys()) == sorted(result.states.keys())) self.assertTrue(cp.alphabet == result.alphabet) self.assertTrue(cp.start == result.start) self.assertTrue(cp.final == result.final) self.assertTrue(cp.transitions == result.transitions) self.assertTrue(cp.Flags == result.Flags) # 8) concatenation, with create_eof_symbols = True, no $ parser = pcre_parser(create_eof_symbols=True) parser.set_text("/first/") automat = b_Automaton() automat._automaton = parser.get_nfa() automat.remove_epsilons() cp = automat.get_automaton() result = nfa_data().load_from_file( "test_data/(1)pcre_get_nfa.nfa_data") self.assertTrue( sorted(cp.states.keys()) == sorted(result.states.keys())) self.assertTrue(cp.alphabet == result.alphabet) self.assertTrue(cp.start == result.start) self.assertTrue(cp.final == result.final) self.assertTrue(cp.transitions == result.transitions) self.assertTrue(cp.Flags == result.Flags) # 9) concatenation, with create_eof_symbols = True, $ parser = pcre_parser(create_eof_symbols=True) parser.set_text("/first$/") automat = b_Automaton() automat._automaton = parser.get_nfa() automat.remove_epsilons() cp = automat.get_automaton() result = nfa_data().load_from_file( "test_data/(9)pcre_get_nfa.nfa_data") self.assertTrue( sorted(cp.states.keys()) == sorted(result.states.keys())) self.assertTrue(cp.alphabet == result.alphabet) self.assertTrue(cp.start == result.start) self.assertTrue(cp.final == result.final) self.assertTrue(cp.transitions == result.transitions) self.assertTrue(cp.Flags == result.Flags) # 10) branch, iteration +, harder counting constraint # create_eof_symbols = True, create_cnt_constr = True parser = pcre_parser(create_eof_symbols=True, create_cnt_constr=True) parser.set_text("/a[bc]+d{2,3}$/") automat = b_Automaton() automat._automaton = parser.get_nfa() automat.remove_epsilons() cp = automat.get_automaton() result = nfa_data().load_from_file( "test_data/(10)pcre_get_nfa.nfa_data") self.assertTrue( sorted(cp.states.keys()) == sorted(result.states.keys())) self.assertTrue(cp.alphabet == result.alphabet) self.assertTrue(cp.start == result.start) self.assertTrue(cp.final == result.final) self.assertTrue(cp.transitions == result.transitions) self.assertTrue(cp.Flags == result.Flags)
def test_generate_PHF_table(self): """generate_PHF_table()""" # Test of PHF table generation - the right size of tabel, every # transition is exactly once in the table and on the right index. nfaData = nfa_data() nfaData.states[0] = b_State(0,set()) nfaData.states[1] = b_State(1,set()) nfaData.states[2] = b_State(2,set()) nfaData.states[3] = b_State(3,set([0])) nfaData.alphabet[0] = b_Sym_char("a", "a", 0) nfaData.alphabet[1] = b_Sym_char("b", "b", 1) nfaData.alphabet[2] = b_Sym_char("c", "c", 2) nfaData.start = 0 nfaData.transitions.add( (0,0,1) ) nfaData.transitions.add( (0,1,0) ) nfaData.transitions.add( (0,2,0) ) nfaData.transitions.add( (1,1,2) ) nfaData.transitions.add( (1,0,1) ) nfaData.transitions.add( (1,2,0) ) nfaData.transitions.add( (2,2,3) ) nfaData.transitions.add( (2,0,1) ) nfaData.transitions.add( (2,1,0) ) nfaData.transitions.add( (3,0,3) ) nfaData.transitions.add( (3,1,3) ) nfaData.transitions.add( (3,2,3) ) nfaData.final.add(3) aut = PHF_DFA() a = bdz() a.set_limit(128) aut.set_PHF_class(a) aut._automaton1 = nfaData aut.generate_PHF_table() # transition table size self.assertEqual(aut.ran, len(aut.trans_table)) self.assertEqual(aut.ran, 384) # count number of unique lines in transition table tranCount = dict() for l in aut.trans_table: tranCount.setdefault(l[1], 0) tranCount[l[1]] += 1 # test if every automaton transition is just once in the table for t in aut._automaton1.transitions: self.assertEqual(tranCount[aut._transition_rep(t)], 1) t = ([2 ** aut.state_bits - 1, 2 ** aut.symbol_bits - 1, 0]) # rest of trans are the nonexistent transitions self.assertEqual(tranCount[aut._transition_rep(t)], aut.ran - len(aut._automaton1.transitions)) # check if each transition is on its index returned by hash function for t in aut._automaton1.transitions: rep = aut._transition_rep(t) self.assertEqual(rep, aut.trans_table[aut.hash_function.hash(rep)][1]) # test the representation in faulty table aut.enable_faulty_transitions(8) aut.generate_PHF_table() for t in aut._automaton1.transitions: rep = aut._transition_rep(t) self.assertEqual(aut.compress_hash.hash(rep), aut.trans_table[aut.hash_function.hash(rep)][3]) # change the size of PHF table and repeat tests aut = PHF_DFA() a = bdz() a.set_ratio(6.0) a.set_iteration_limit(10) aut.set_PHF_class(a) aut._automaton1 = nfaData aut.generate_PHF_table() # transition table size self.assertEqual(aut.ran, len(aut.trans_table)) self.assertEqual(aut.ran, 72) # count number of unique lines in transition table tranCount = dict() for l in aut.trans_table: tranCount.setdefault(l[1], 0) tranCount[l[1]] += 1 # test if every automaton transition is just once in the table for t in aut._automaton1.transitions: self.assertEqual(tranCount[aut._transition_rep(t)], 1) t = ([2 ** aut.state_bits - 1, 2 ** aut.symbol_bits - 1, 0]) # rest of trans are the nonexistent transitions self.assertEqual(tranCount[aut._transition_rep(t)], aut.ran - len(aut._automaton1.transitions)) # check if each transition is on its index returned by hash function for t in aut._automaton1.transitions: rep = aut._transition_rep(t) self.assertEqual(rep, aut.trans_table[aut.hash_function.hash(rep)][1]) # test the representation in faulty table aut.enable_faulty_transitions(8) aut.generate_PHF_table() for t in aut._automaton1.transitions: rep = aut._transition_rep(t) self.assertEqual(aut.compress_hash.hash(rep), aut.trans_table[aut.hash_function.hash(rep)][3]) # RE /#include.*>/ and enable fallback_state par = pcre_parser() par.set_text("/#include.*>/s") aut = PHF_DFA() a = bdz() a.set_ratio(2.5) a.set_iteration_limit(10) aut.set_PHF_class(a) aut.create_by_parser(par) aut.enable_fallback_state(warning=False) aut.compute() # transition table size self.assertEqual(aut.ran, len(aut.trans_table)) self.assertEqual(aut.ran, 90) # count number of unique lines in transition table tranCount = dict() for l in aut.trans_table: tranCount.setdefault(l[1], 0) tranCount[l[1]] += 1 # test if every automaton transition is just once in the table for t in aut._automaton1.transitions: self.assertEqual(tranCount[aut._transition_rep(t)], 1) t = ([2 ** aut.state_bits - 1, 2 ** aut.symbol_bits - 1, 0]) # rest of trans are the nonexistent transitions self.assertEqual(tranCount[aut._transition_rep(t)], aut.ran - len(aut._automaton1.transitions)) # check if each transition is on its index returned by hash function for t in aut._automaton1.transitions: rep = aut._transition_rep(t) self.assertEqual(rep, aut.trans_table[aut.hash_function.hash(rep)][1]) # test the representation in faulty table aut.enable_faulty_transitions(8) aut.generate_PHF_table() for t in aut._automaton1.transitions: rep = aut._transition_rep(t) self.assertEqual(aut.compress_hash.hash(rep), aut.trans_table[aut.hash_function.hash(rep)][3]) # disable fallback_state aut.disable_fallback_state() aut.compute() self.assertEqual(aut.ran, len(aut.trans_table)) self.assertEqual(aut.ran, 252) # count number of unique lines in transition table tranCount = dict() for l in aut.trans_table: tranCount.setdefault(l[1], 0) tranCount[l[1]] += 1 # test if every automaton transition is just once in the table for t in aut._automaton1.transitions: self.assertEqual(tranCount[aut._transition_rep(t)], 1) t = ([2 ** aut.state_bits - 1, 2 ** aut.symbol_bits - 1, 0]) # rest of trans are the nonexistent transitions self.assertEqual(tranCount[aut._transition_rep(t)], aut.ran - len(aut._automaton1.transitions)) # check if each transition is on its index returned by hash function for t in aut._automaton1.transitions: rep = aut._transition_rep(t) self.assertEqual(rep, aut.trans_table[aut.hash_function.hash(rep)][1]) # test the representation in faulty table aut.enable_faulty_transitions(8) aut.generate_PHF_table() for t in aut._automaton1.transitions: rep = aut._transition_rep(t) self.assertEqual(aut.compress_hash.hash(rep), aut.trans_table[aut.hash_function.hash(rep)][3])
# N_Automaton.join(parser.get_nfa()) N_Automaton.get_automaton().Show("test_NFA.dot") print("Automata joined") D_Automaton = PHF_DFA() D_Automaton.create_from_nfa_data(N_Automaton.get_automaton()) print("Determinising...") D_Automaton.determinise(states_limit=10000) print("Minimising...") D_Automaton.minimise() #Co to vlastne vypisuje? D_Automaton.get_automaton().Show("test_min_dfa.dot") D_Automaton.get_automaton().SaveToFile("temp_automaton") print("striding...") D_Automaton.reduce_alphabet() D_Automaton.stride_2() D_Automaton.get_automaton().Show("test_multi_dfa.dot") print("generating PHF...") D_Automaton.generate_PHF_table() D_Automaton1 = PHF_DFA() Temp = nfa_data() Temp = Temp.LoadFromFile("temp_automaton") D_Automaton1.create_from_nfa_data(Temp) D_Automaton1.reduce_alphabet() D_Automaton1.stride_2() D_Automaton1.get_automaton().Show("test_multi_dfa.dot") print("generating PHF...") D_Automaton1.generate_PHF_table()
def test_search(self): """search()""" # 1. RE /^abc/ nfaData = nfa_data() nfaData.states[0] = b_State(0,set()) nfaData.states[1] = b_State(1,set()) nfaData.states[2] = b_State(2,set()) nfaData.states[3] = b_State(3,set([0])) nfaData.alphabet[0] = b_Sym_char("a", "a", 0) nfaData.alphabet[1] = b_Sym_char("b", "b", 1) nfaData.alphabet[2] = b_Sym_char("c", "c", 2) nfaData.start = 0 nfaData.transitions.add( (0,0,1) ) nfaData.transitions.add( (1,1,2) ) nfaData.transitions.add( (2,2,3) ) nfaData.final.add(3) aut = PHF_DFA() a = bdz() a.set_limit(128) aut.set_PHF_class(a) aut.create_from_nfa_data(nfaData) aut.compute() self.assertEqual(aut.search("abc"), [1]) self.assertEqual(aut.search("aaaaaaaaaaaaaabc"), [0]) self.assertEqual(aut.search("ccccbbbabc"), [0]) self.assertEqual(aut.search("ababc"), [0]) self.assertEqual(aut.search("d"), [0]) self.assertEqual(aut.search("cbabbacba"), [0]) # 2. RE /abc/ nfaData = nfa_data() nfaData.states[0] = b_State(0,set()) nfaData.states[1] = b_State(1,set()) nfaData.states[2] = b_State(2,set()) nfaData.states[3] = b_State(3,set([0])) nfaData.alphabet[0] = b_Sym_char("a", "a", 0) nfaData.alphabet[1] = b_Sym_char("b", "b", 1) nfaData.alphabet[2] = b_Sym_char("c", "c", 2) nfaData.start = 0 nfaData.transitions.add( (0,0,1) ) nfaData.transitions.add( (0,1,0) ) nfaData.transitions.add( (0,2,0) ) nfaData.transitions.add( (1,1,2) ) nfaData.transitions.add( (1,0,1) ) nfaData.transitions.add( (1,2,0) ) nfaData.transitions.add( (2,2,3) ) nfaData.transitions.add( (2,0,1) ) nfaData.transitions.add( (2,1,0) ) nfaData.transitions.add( (3,0,3) ) nfaData.transitions.add( (3,1,3) ) nfaData.transitions.add( (3,2,3) ) nfaData.final.add(3) aut = PHF_DFA() a = bdz() a.set_limit(128) aut.set_PHF_class(a) aut.create_from_nfa_data(nfaData) aut.compute() self.assertEqual(aut.search("abc"), [1]) self.assertEqual(aut.search("aaaaaaaaaaaaaabc"), [1]) self.assertEqual(aut.search("ccccbbbabc"), [1]) self.assertEqual(aut.search("ababc"), [1]) self.assertEqual(aut.search("d"), [0]) self.assertEqual(aut.search("cbabbacba"), [0]) # 2a. same test with faulty transitions aut.enable_faulty_transitions(32) aut.compute() self.assertEqual(aut.search("abc"), [1]) self.assertEqual(aut.search("aaaaaaaaaaaaaabc"), [1]) self.assertEqual(aut.search("ccccbbbabc"), [1]) self.assertEqual(aut.search("ababc"), [1]) self.assertEqual(aut.search("d"), [0]) self.assertEqual(aut.search("cbabbacba"), [0]) # 3. RE /#include.*>/ with enable_fallback_state par = pcre_parser() par.set_text("/#include.*>/") aut = PHF_DFA() a = bdz() a.set_ratio(2.5) a.set_iteration_limit(10) aut.set_PHF_class(a) aut.create_by_parser(par) aut.enable_fallback_state(warning=False) aut.compute() self.assertEqual(aut.search("#include <stdio.h>"), [1]) self.assertEqual(aut.search("#include <stdlib.h>"), [1]) self.assertEqual(aut.search("#include <stdio.h>bba"), [1]) self.assertEqual(aut.search('#include "pcre.h"'), [0]) self.assertEqual(aut.search('asdf#include <stdio.h>'), [1])
def test_get_nfa(self): """get_nfa()""" # If attribute _position < 0, check returning None. parser = pcre_parser() self.assertTrue(parser._position < 0) self.assertTrue(parser.get_nfa() == None) # Try method on a few regular expressions. # The results obtained compare with the manually completed machines. # (Recommend to compare after the elimination of epsilon transition) # 1) concatenation parser = pcre_parser() parser.set_text("/first/") automat = b_Automaton() automat._automaton = parser.get_nfa() automat.remove_epsilons() cp = automat.get_automaton() result = nfa_data().load_from_file("test_data/(1)pcre_get_nfa.nfa_data") self.assertTrue(sorted(cp.states.keys()) == sorted(result.states.keys())) self.assertTrue(cp.alphabet == result.alphabet) self.assertTrue(cp.start == result.start) self.assertTrue(cp.final == result.final) self.assertTrue(cp.transitions == result.transitions) self.assertTrue(cp.Flags == result.Flags) # 2) branch (automat create char class), iteration * parser = pcre_parser() parser.set_text("/[ab]cd*/") automat = b_Automaton() automat._automaton = parser.get_nfa() automat.remove_epsilons() cp = automat.get_automaton() result = nfa_data().load_from_file("test_data/(2)pcre_get_nfa.nfa_data") self.assertTrue(sorted(cp.states.keys()) == sorted(result.states.keys())) self.assertTrue(cp.alphabet == result.alphabet) self.assertTrue(cp.start == result.start) self.assertTrue(cp.final == result.final) self.assertTrue(cp.transitions == result.transitions) self.assertTrue(cp.Flags == result.Flags) # 3) try second RE (move to next line) parser = pcre_parser() parser.set_text("/abc/\n/ABC/\n") parser.next_line() automat = b_Automaton() automat._automaton = parser.get_nfa() automat.remove_epsilons() cp = automat.get_automaton() result = nfa_data().load_from_file("test_data/(3)pcre_get_nfa.nfa_data") self.assertTrue(sorted(cp.states.keys()) == sorted(result.states.keys())) self.assertTrue(cp.alphabet == result.alphabet) self.assertTrue(cp.start == result.start) self.assertTrue(cp.final == result.final) self.assertTrue(cp.transitions == result.transitions) self.assertTrue(cp.Flags == result.Flags) # 4) basic counting constratin parser = pcre_parser() parser.set_text("/ab{5}c/") automat = b_Automaton() automat._automaton = parser.get_nfa() automat.remove_epsilons() cp = automat.get_automaton() result = nfa_data().load_from_file("test_data/(4)pcre_get_nfa.nfa_data") self.assertTrue(sorted(cp.states.keys()) == sorted(result.states.keys())) self.assertTrue(cp.alphabet == result.alphabet) self.assertTrue(cp.start == result.start) self.assertTrue(cp.final == result.final) self.assertTrue(cp.transitions == result.transitions) self.assertTrue(cp.Flags == result.Flags) # 5) branch, iteration +, harder counting constraint parser = pcre_parser() parser.set_text("/a[bc]+d{2,3}/") automat = b_Automaton() automat._automaton = parser.get_nfa() automat.remove_epsilons() cp = automat.get_automaton() result = nfa_data().load_from_file("test_data/(5)pcre_get_nfa.nfa_data") self.assertTrue(sorted(cp.states.keys()) == sorted(result.states.keys())) self.assertTrue(cp.alphabet == result.alphabet) self.assertTrue(cp.start == result.start) self.assertTrue(cp.final == result.final) self.assertTrue(cp.transitions == result.transitions) self.assertTrue(cp.Flags == result.Flags) # 6) basic counting constratin, use param create_cnt_constr = True parser = pcre_parser(create_cnt_constr = True) parser.set_text("/ab{5}c/") automat = b_Automaton() automat._automaton = parser.get_nfa() automat.remove_epsilons() cp = automat.get_automaton() result = nfa_data().load_from_file("test_data/(6)pcre_get_nfa.nfa_data") self.assertTrue(sorted(cp.states.keys()) == sorted(result.states.keys())) self.assertTrue(cp.alphabet == result.alphabet) self.assertTrue(cp.start == result.start) self.assertTrue(cp.final == result.final) self.assertTrue(cp.transitions == result.transitions) self.assertTrue(cp.Flags == result.Flags) # 7) branch, iteration +, harder counting constraint, # use param create_cnt_constr = True parser = pcre_parser(create_cnt_constr = True) parser.set_text("/a[bc]+d{2,3}/") automat = b_Automaton() automat._automaton = parser.get_nfa() automat.remove_epsilons() cp = automat.get_automaton() result = nfa_data().load_from_file("test_data/(7)pcre_get_nfa.nfa_data") self.assertTrue(sorted(cp.states.keys()) == sorted(result.states.keys())) self.assertTrue(cp.alphabet == result.alphabet) self.assertTrue(cp.start == result.start) self.assertTrue(cp.final == result.final) self.assertTrue(cp.transitions == result.transitions) self.assertTrue(cp.Flags == result.Flags) # 8) concatenation, with create_eof_symbols = True, no $ parser = pcre_parser(create_eof_symbols = True) parser.set_text("/first/") automat = b_Automaton() automat._automaton = parser.get_nfa() automat.remove_epsilons() cp = automat.get_automaton() result = nfa_data().load_from_file("test_data/(1)pcre_get_nfa.nfa_data") self.assertTrue(sorted(cp.states.keys()) == sorted(result.states.keys())) self.assertTrue(cp.alphabet == result.alphabet) self.assertTrue(cp.start == result.start) self.assertTrue(cp.final == result.final) self.assertTrue(cp.transitions == result.transitions) self.assertTrue(cp.Flags == result.Flags) # 9) concatenation, with create_eof_symbols = True, $ parser = pcre_parser(create_eof_symbols = True) parser.set_text("/first$/") automat = b_Automaton() automat._automaton = parser.get_nfa() automat.remove_epsilons() cp = automat.get_automaton() result = nfa_data().load_from_file("test_data/(9)pcre_get_nfa.nfa_data") self.assertTrue(sorted(cp.states.keys()) == sorted(result.states.keys())) self.assertTrue(cp.alphabet == result.alphabet) self.assertTrue(cp.start == result.start) self.assertTrue(cp.final == result.final) self.assertTrue(cp.transitions == result.transitions) self.assertTrue(cp.Flags == result.Flags) # 10) branch, iteration +, harder counting constraint # create_eof_symbols = True, create_cnt_constr = True parser = pcre_parser(create_eof_symbols = True, create_cnt_constr = True) parser.set_text("/a[bc]+d{2,3}$/") automat = b_Automaton() automat._automaton = parser.get_nfa() automat.remove_epsilons() cp = automat.get_automaton() result = nfa_data().load_from_file("test_data/(10)pcre_get_nfa.nfa_data") self.assertTrue(sorted(cp.states.keys()) == sorted(result.states.keys())) self.assertTrue(cp.alphabet == result.alphabet) self.assertTrue(cp.start == result.start) self.assertTrue(cp.final == result.final) self.assertTrue(cp.transitions == result.transitions) self.assertTrue(cp.Flags == result.Flags)