def _test_search3(self): re = "/a+b*c+/\n/b+c*d+/\n/c+d*e/" delay_dfa = DELAY_DFA() parser = pcre_parser() parser.set_text(re) delay_dfa.create_by_parser(parser) delay_dfa.compute() self.assertTrue(delay_dfa.get_compute()) aut = b_dfa() parser = pcre_parser() parser.set_text(re) aut.create_by_parser(parser) aut.compute() self.assertEqual(delay_dfa.search("abcd"), aut.search("abcd")) self.assertEqual(delay_dfa.search("abcd abc"), aut.search("abcd abc")) self.assertEqual(delay_dfa.search("gabc"), aut.search("gabc")) self.assertEqual(delay_dfa.search("ac bd ce"), aut.search("ac bd ce")) self.assertEqual(delay_dfa.search("ab123a bcd cde"), aut.search("ab123a bcd cde")) self.assertEqual(delay_dfa.search("bce"), aut.search("bce")) self.assertEqual(delay_dfa.search("abe"), aut.search("abe"))
def _test_search_4(self): """search()""" """ Test with many regular expression where computed automaton has many NFA parts. Compares results of searching in computed Hybrid FA with regular NFA automaton searching. """ parser = pcre_parser() parser.load_file(aux_func.getPatternMatchDir() + "/rules/Moduly/web-cgi.rules.pcre") nfa_aut = b_nfa() nfa_aut.create_by_parser(parser) nfa_aut.compute() parser = pcre_parser() hyfa = JHybridFA() hyfa.set_parser(parser) hyfa.load_file(aux_func.getPatternMatchDir() + "/rules/Moduly/web-cgi.rules.pcre") hyfa.compute() input_data = "/awstats.pl?---configdir=| /calendar-admin.pl /db4web_c.exe/aaaa: /itemid=123f /ShellExample.cgi?*" self.assertEqual(nfa_aut.search(input_data), hyfa.search(input_data))
def _test_search_3(self): """search()""" """ Test with many regular expression where computed automaton has many NFA parts. Compares results of searching in computed Hybrid FA with regular NFA automaton searching. """ parser = pcre_parser() parser.load_file(aux_func.getPatternMatchDir() + "/rules/Snort/web-cgi.rules.pcre") nfa_aut = b_nfa() nfa_aut.create_by_parser(parser) nfa_aut.compute() parser = pcre_parser() parser.load_file(aux_func.getPatternMatchDir() + "/rules/Snort/web-cgi.rules.pcre") hyfa = hybrid_fa() hyfa.create_by_parser(parser) hyfa.set_special_min_depth(2) hyfa.set_max_head_size(0) hyfa.set_max_tx(0) hyfa.compute() input_data = "/awstats.pl?---configdir=| /calendar-admin.pl /db4web_c.exe/aaaa: /itemid=123f /ShellExample.cgi?aaaaa*" self.assertEqual(nfa_aut.search(input_data), hyfa.search(input_data))
def _test_compute_1(self): """compute()""" """ Test without blow up patterns """ hyfa = JHybridFA() parser = pcre_parser() hyfa.set_parser(parser) hyfa.load_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_compute_1.re") hyfa.compute() # self.get_compute() has to be True self.assertTrue(hyfa.get_compute()) parser = pcre_parser() parser.load_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_compute_1.re") dfa = b_dfa() dfa.create_by_parser(parser) dfa.compute() a = hyfa.dfa.get_automaton(False) b = dfa.get_automaton(False) # Test without blow up patterns self.assertEqual(a.states.keys(), b.states.keys()) self.assertEqual(a.alphabet, b.alphabet) self.assertEqual(a.start, b.start) self.assertEqual(a.final, b.final) self.assertEqual(a.transitions, b.transitions) self.assertTrue(a.Flags['Hybrid FA - DFA part']) self.assertTrue(a.Flags['Deterministic']) self.assertEqual(len(hyfa.nfas), 0) self.assertEqual(hyfa.tran_aut, {})
def _test_search2(self): re = "/a+b*c+d/\n/abc/" delay_dfa = DELAY_DFA() parser = pcre_parser() parser.set_text(re) delay_dfa.create_by_parser(parser) delay_dfa.compute() self.assertTrue(delay_dfa.get_compute()) aut = b_dfa() parser = pcre_parser() parser.set_text(re) aut.create_by_parser(parser) aut.compute() self.assertEqual(delay_dfa.search("abcd"), aut.search("abcd")) self.assertEqual(delay_dfa.search("abcd abc"), aut.search("abcd abc")) self.assertEqual(delay_dfa.search("abc"), aut.search("abc")) self.assertEqual(delay_dfa.search("acd"), aut.search("acd")) self.assertEqual(delay_dfa.search("abd"), aut.search("abd"))
def _test_compute_2(self): """compute()""" """ Test with one regular expression, where computed automaton has one NFA tail """ hyfa = hybrid_fa() parser = pcre_parser() parser.set_text("/abcd/") hyfa.create_by_parser(parser) hyfa.set_special_min_depth(2) hyfa.set_max_head_size(-1) hyfa.set_max_tx(-1) hyfa.compute() # self.get_compute() has to be True self.assertTrue(hyfa.get_compute()) parser_dfa = pcre_parser() parser_dfa.set_text("/ab/") dfa = b_dfa() dfa.create_by_parser(parser_dfa) dfa.determinise() hd = hyfa.dfa.get_automaton() hn0 = hyfa.nfas[0].get_automaton() d = dfa.get_automaton() n = nfa_data().load_from_file( aux_func.getPatternMatchDir() + "/algorithms/hybrid_fa/tests_data/test_compute_2_nfa0.nfa_data") # test on automaton where is one NFA tail # test of DFA part self.assertEqual(hd.states.keys(), d.states.keys()) self.assertEqual(hd.alphabet, d.alphabet) self.assertEqual(hd.start, d.start) self.assertEqual(len(hd.final), 0) self.assertEqual(hd.transitions, d.transitions) self.assertTrue(hd.Flags['Hybrid FA - DFA part']) self.assertTrue(hd.Flags['Deterministic']) self.assertEqual(len(hyfa.nfas), 1) self.assertEqual(hyfa.tran_aut, {0: 2}) # test of NFA part #0 self.assertEqual(hn0.states.keys(), n.states.keys()) self.assertEqual(hn0.alphabet, n.alphabet) self.assertEqual(hn0.start, n.start) self.assertEqual(hn0.final, n.final) self.assertEqual(hn0.transitions, n.transitions) self.assertTrue(hn0.Flags['Hybrid FA - one NFA part'])
def _test_compute_2(self): """compute()""" """ Test with one regular expression, where computed automaton has one NFA tail """ hyfa = hybrid_fa() parser = pcre_parser() parser.set_text("/abcd/") hyfa.create_by_parser(parser) hyfa.set_special_min_depth(2) hyfa.set_max_head_size(-1) hyfa.set_max_tx(-1) hyfa.compute() # self.get_compute() has to be True self.assertTrue(hyfa.get_compute()) parser_dfa = pcre_parser() parser_dfa.set_text("/ab/") dfa = b_dfa() dfa.create_by_parser(parser_dfa) dfa.determinise() hd = hyfa.dfa.get_automaton() hn0 = hyfa.nfas[0].get_automaton() d = dfa.get_automaton() n = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/hybrid_fa/tests_data/test_compute_2_nfa0.nfa_data") # test on automaton where is one NFA tail # test of DFA part self.assertEqual(hd.states.keys(), d.states.keys()) self.assertEqual(hd.alphabet, d.alphabet) self.assertEqual(hd.start, d.start) self.assertEqual(len(hd.final), 0) self.assertEqual(hd.transitions, d.transitions) self.assertTrue(hd.Flags['Hybrid FA - DFA part']) self.assertTrue(hd.Flags['Deterministic']) self.assertEqual(len(hyfa.nfas), 1) self.assertEqual(hyfa.tran_aut, {0: 2}) # test of NFA part #0 self.assertEqual(hn0.states.keys(), n.states.keys()) self.assertEqual(hn0.alphabet, n.alphabet) self.assertEqual(hn0.start, n.start) self.assertEqual(hn0.final, n.final) self.assertEqual(hn0.transitions, n.transitions) self.assertTrue(hn0.Flags['Hybrid FA - one NFA part'])
def _test_search_2(self): """search()""" """ Test with more regular expression where are not blow up REs. """ parser = pcre_parser() hyfa = JHybridFA() hyfa.load_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_search_2.re") hyfa.set_parser(parser) hyfa.compute() self.assertTrue(hyfa.get_compute()) ret = hyfa.search("0123") self.assertEqual(ret, [1,0,0,0]) ret = hyfa.search("uvwx") self.assertEqual(ret, [0,0,1,0]) ret = hyfa.search("abcd uvwx") self.assertEqual(ret, [0,1,1,0]) ret = hyfa.search("cdefgh") self.assertEqual(ret, [0,0,0,0])
def _test_search_2(self): """search()""" """ Test with more regular expression where are not blow up REs. """ parser = pcre_parser() hyfa = JHybridFA() hyfa.load_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_search_2.re") hyfa.set_parser(parser) hyfa.compute() self.assertTrue(hyfa.get_compute()) ret = hyfa.search("0123") self.assertEqual(ret, [1, 0, 0, 0]) ret = hyfa.search("uvwx") self.assertEqual(ret, [0, 0, 1, 0]) ret = hyfa.search("abcd uvwx") self.assertEqual(ret, [0, 1, 1, 0]) ret = hyfa.search("cdefgh") self.assertEqual(ret, [0, 0, 0, 0])
def _test_search_2(self): """search()""" """ Test with more regular expression where computed automaton has some NFA parts. """ parser = pcre_parser() parser.load_file( aux_func.getPatternMatchDir() + "/algorithms/hybrid_fa/tests_data/test_search_1_pattern.re") hyfa = hybrid_fa() hyfa.create_by_parser(parser) hyfa.set_special_min_depth(2) hyfa.set_max_head_size(-1) hyfa.set_max_tx(-1) hyfa.compute() self.assertTrue(hyfa.get_compute()) ret = hyfa.search("abcd") self.assertEqual(ret, [1, 0]) ret = hyfa.search("bce") self.assertEqual(ret, [0, 1]) ret = hyfa.search("cdefgh") self.assertEqual(ret, [0, 0])
def _test_search_3(self): """search()""" """ Test with more REs where some have blow up patterns on his starts. """ parser = pcre_parser() hyfa = JHybridFA() hyfa.load_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_search_3.re") hyfa.set_parser(parser) hyfa.compute() self.assertTrue(hyfa.get_compute()) ret = hyfa.search("0123") self.assertEqual(ret, [1,0,0,0]) ret = hyfa.search("uvwx") self.assertEqual(ret, [0,0,1,0]) ret = hyfa.search("abcd uvwx") self.assertEqual(ret, [0,1,1,1]) ret = hyfa.search("abcd agcd") self.assertEqual(ret, [0,1,0,1]) ret = hyfa.search("cdefgh") self.assertEqual(ret, [0,0,0,0])
def _test_search_1(self): """search()""" """ Test with more regular expression where computed automaton has some NFA parts. """ parser = pcre_parser() hyfa = JHybridFA() hyfa.load_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_search_1.re") hyfa.set_parser(parser) hyfa.compute() self.assertTrue(hyfa.get_compute()) ret = hyfa.search("0123 uvwx") self.assertEqual(ret, [1,0,1,0]) ret = hyfa.search("abcd abgggcd") self.assertEqual(ret, [0,1,0,1]) ret = hyfa.search("aaaaa") self.assertEqual(ret, [0,0,0,0])
def _test_compute2(self): delay_dfa = DELAY_DFA() parser = pcre_parser() parser.set_text("/^(a|b)+/") delay_dfa.create_by_parser(parser) delay_dfa.compute() self.assertTrue(delay_dfa.get_compute()) a = delay_dfa.get_automaton() b = nfa_data() b.add_symbols(b_Sym_char("a", "a", 0)) b.add_symbols(b_Sym_char("b", "b", 1)) b.add_symbols(DEF_SYMBOLS("default", 2)) b.add_states(b_State(0, set())) b.add_states(b_State(1, set([0]))) b.start = 0 b.final = set([1]) b.add_transitions((0, 0, 1)) b.add_transitions((0, 1, 1)) b.add_transitions((1, 2, 0)) self.assertEqual(a.states.keys(), b.states.keys()) self.assertEqual(a.start, b.start) self.assertEqual(a.final, b.final) self.assertEqual(a.alphabet, b.alphabet) self.assertEqual(a.transitions, b.transitions) self.assertTrue(a.Flags["Delay DFA"])
def test__replace_length_restriction_with_a_closure(self): """_replace_length_restriction_with_a_closure(NFA)""" # /ab.{4}cd /; test with an expression that contains .{4} par = pcre_parser(create_cnt_constr=True) par.set_text("/ab.{4}cd/") history = HistoryCountingFA() history.create_by_parser(par) history.remove_epsilons() NFA = history.get_automaton(True) NFA_without_cnt = history._replace_length_restriction_with_a_closure( NFA) copy = NFA_without_cnt result = nfa_data().load_from_file( aux_func.getPatternMatchDir() + "/algorithms/history_counting_fa/test_data/test_data_1.nfa_data") self.assertTrue(history.flags_cnt == {4: "4"}) self.assertTrue( sorted(copy.states.keys()) == sorted(result.states.keys())) self.assertTrue(copy.alphabet == result.alphabet) self.assertTrue(copy.start == result.start) self.assertTrue(copy.final == result.final) self.assertTrue(copy.transitions == result.transitions) self.assertTrue(copy.Flags == result.Flags)
def _test_search_2(self): """search()""" """ Test with more regular expression where computed automaton has some NFA parts. """ parser = pcre_parser() parser.load_file(aux_func.getPatternMatchDir() + "/algorithms/hybrid_fa/tests_data/test_search_1_pattern.re") hyfa = hybrid_fa() hyfa.create_by_parser(parser) hyfa.set_special_min_depth(2) hyfa.set_max_head_size(-1) hyfa.set_max_tx(-1) hyfa.compute() self.assertTrue(hyfa.get_compute()) ret = hyfa.search("abcd") self.assertEqual(ret, [1,0]) ret = hyfa.search("bce") self.assertEqual(ret, [0,1]) ret = hyfa.search("cdefgh") self.assertEqual(ret, [0,0])
def _test_search_1(self): """search()""" """ Test with more regular expression where computed automaton has some NFA parts. """ parser = pcre_parser() hyfa = JHybridFA() hyfa.load_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_search_1.re") hyfa.set_parser(parser) hyfa.compute() self.assertTrue(hyfa.get_compute()) ret = hyfa.search("0123 uvwx") self.assertEqual(ret, [1, 0, 1, 0]) ret = hyfa.search("abcd abgggcd") self.assertEqual(ret, [0, 1, 0, 1]) ret = hyfa.search("aaaaa") self.assertEqual(ret, [0, 0, 0, 0])
def _test_compute2(self): delay_dfa = DELAY_DFA() parser = pcre_parser() parser.set_text("/^(a|b)+/") delay_dfa.create_by_parser(parser) delay_dfa.compute() self.assertTrue(delay_dfa.get_compute()) a = delay_dfa.get_automaton() b = nfa_data() b.add_symbols(b_Sym_char("a","a",0)) b.add_symbols(b_Sym_char("b","b",1)) b.add_symbols(DEF_SYMBOLS("default", 2)) b.add_states(b_State(0,set())) b.add_states(b_State(1,set([0]))) b.start = 0 b.final = set([1]) b.add_transitions( (0,0,1) ) b.add_transitions( (0,1,1) ) b.add_transitions( (1,2,0) ) self.assertEqual(a.states.keys(), b.states.keys()) self.assertEqual(a.start, b.start) self.assertEqual(a.final, b.final) self.assertEqual(a.alphabet, b.alphabet) self.assertEqual(a.transitions, b.transitions) self.assertTrue(a.Flags["Delay DFA"])
def _test_compute1(self): delay_dfa = DELAY_DFA() parser = pcre_parser() parser.set_text("/^abcd/") delay_dfa.create_by_parser(parser) delay_dfa.compute() self.assertTrue(delay_dfa.get_compute()) dfa = b_dfa() dfa.create_by_parser(parser) dfa.compute() a = delay_dfa.get_automaton() b = dfa.get_automaton() l = len(b.alphabet.keys()) b.add_symbols(DEF_SYMBOLS("default", l)) self.assertEqual(a.states.keys(), b.states.keys()) self.assertEqual(a.start, b.start) self.assertEqual(a.final, b.final) self.assertEqual(a.alphabet, b.alphabet) self.assertEqual(a.transitions, b.transitions) self.assertTrue(a.Flags["Delay DFA"])
def _test_search_3(self): """search()""" """ Test with more REs where some have blow up patterns on his starts. """ parser = pcre_parser() hyfa = JHybridFA() hyfa.load_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_search_3.re") hyfa.set_parser(parser) hyfa.compute() self.assertTrue(hyfa.get_compute()) ret = hyfa.search("0123") self.assertEqual(ret, [1, 0, 0, 0]) ret = hyfa.search("uvwx") self.assertEqual(ret, [0, 0, 1, 0]) ret = hyfa.search("abcd uvwx") self.assertEqual(ret, [0, 1, 1, 1]) ret = hyfa.search("abcd agcd") self.assertEqual(ret, [0, 1, 0, 1]) ret = hyfa.search("cdefgh") self.assertEqual(ret, [0, 0, 0, 0])
def _test_compute_3(self): """compute()""" """ Test with more regular expressions, where computed automaton has only DFA part without any NFA tails """ parser = pcre_parser() parser.load_file( aux_func.getPatternMatchDir() + "/algorithms/hybrid_fa/tests_data/test_compute_3_pattern.re") hyfa = hybrid_fa() hyfa.create_by_parser(parser) hyfa.set_special_min_depth(10) hyfa.set_max_head_size(-1) hyfa.set_max_tx(-1) hyfa.compute() # self.get_compute() has to be True self.assertTrue(hyfa.get_compute()) parser = pcre_parser() parser.load_file( aux_func.getPatternMatchDir() + "/algorithms/hybrid_fa/tests_data/test_compute_3_pattern.re") dfa = b_dfa() dfa.create_by_parser(parser) dfa.determinise() hd = hyfa.dfa.get_automaton() d = dfa.get_automaton() # test of DFA part self.assertEqual(hd.states.keys(), d.states.keys()) self.assertEqual(hd.alphabet, d.alphabet) self.assertEqual(hd.start, d.start) self.assertEqual(hd.final, d.final) self.assertEqual(hd.transitions, d.transitions) self.assertTrue(hd.Flags['Hybrid FA - DFA part']) self.assertTrue(hd.Flags['Deterministic']) # without NFA tails self.assertEqual(len(hyfa.nfas), 0) self.assertEqual(hyfa.tran_aut, {})
def test__is_special(self): """_is_special()""" """ Tests whether _is_special() method works properly. """ parser = pcre_parser() parser.set_text("/abcd/") hyfa = hybrid_fa() hyfa.create_by_parser(parser) hyfa.set_special_min_depth(2) hyfa.set_max_head_size(-1) hyfa.set_max_tx(-1) hyfa.compute() # tests with depth of states # test when same states are borders self.assertFalse(hyfa._is_special(0)) self.assertFalse(hyfa._is_special(2)) self.assertTrue(hyfa._is_special(4)) self.assertTrue(hyfa._is_special(6)) self.assertTrue(hyfa._is_special(8)) # test when all states are not borders hyfa.set_special_min_depth(6) hyfa.set_max_head_size(-1) hyfa.set_max_tx(-1) self.assertFalse(hyfa._is_special(0)) self.assertFalse(hyfa._is_special(2)) self.assertFalse(hyfa._is_special(4)) self.assertFalse(hyfa._is_special(6)) self.assertFalse(hyfa._is_special(8)) # tests with head size hyfa.set_special_min_depth(-1) hyfa.set_max_head_size(3) hyfa.set_max_tx(-1) # state is not border hyfa._head_size = 2 self.assertFalse(hyfa._is_special(0)) # head is full, all states are borders hyfa._head_size = 4 self.assertTrue(hyfa._is_special(0)) # tests with outgoing transitions # test ... hyfa.set_max_tx(1) hyfa.set_max_head_size(-1) hyfa.set_special_min_depth(-1) self.assertTrue(hyfa._is_special(0)) self.assertTrue(hyfa._is_special(2))
def _test_compute_4(self): """compute()""" """ Test with more regular expressions, where computed automaton has has some NFA tails """ hyfa = hybrid_fa() parser = pcre_parser() parser.load_file(aux_func.getPatternMatchDir() + "/algorithms/hybrid_fa/tests_data/test_compute_4_pattern.re") hyfa.create_by_parser(parser) hyfa.set_special_min_depth(2) hyfa.set_max_head_size(-1) hyfa.set_max_tx(-1) hyfa.compute() # self.get_compute() has to be True self.assertTrue(hyfa.get_compute()) hd = hyfa.dfa.get_automaton() hn0 = hyfa.nfas[0].get_automaton() hn1 = hyfa.nfas[1].get_automaton() d = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/hybrid_fa/tests_data/test_compute_4_dfa.nfa_data") n0 = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/hybrid_fa/tests_data/test_compute_4_nfa0.nfa_data") n1 = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/hybrid_fa/tests_data/test_compute_4_nfa1.nfa_data") # test of DFA part self.assertEqual(hd.states.keys(), d.states.keys()) self.assertEqual(hd.alphabet, d.alphabet) self.assertEqual(hd.start, d.start) self.assertTrue(len(hd.final) == 0) self.assertEqual(hd.transitions, d.transitions) self.assertTrue(hd.Flags['Hybrid FA - DFA part']) self.assertTrue(hd.Flags['Deterministic']) # two NFA tails self.assertEqual(len(hyfa.nfas), 2) self.assertEqual(hyfa.tran_aut, {0:4, 1:5}) # test of NFA part #0 self.assertEqual(hn0.states.keys(), n0.states.keys()) self.assertEqual(hn0.alphabet, n0.alphabet) self.assertEqual(hn0.start, n0.start) self.assertEqual(hn0.final, n0.final) self.assertEqual(hn0.transitions, n0.transitions) self.assertTrue(hn0.Flags['Hybrid FA - one NFA part']) # test of NFA part #1 self.assertEqual(hn1.states.keys(), n1.states.keys()) self.assertEqual(hn1.alphabet, n1.alphabet) self.assertEqual(hn1.start, n1.start) self.assertEqual(hn1.final, n1.final) self.assertEqual(hn1.transitions, n1.transitions) self.assertTrue(hn1.Flags['Hybrid FA - one NFA part'])
def _test_compute_3(self): """compute()""" """ Test with more regular expressions, where computed automaton has only DFA part without any NFA tails """ parser = pcre_parser() parser.load_file(aux_func.getPatternMatchDir() + "/algorithms/hybrid_fa/tests_data/test_compute_3_pattern.re") hyfa = hybrid_fa() hyfa.create_by_parser(parser) hyfa.set_special_min_depth(10) hyfa.set_max_head_size(-1) hyfa.set_max_tx(-1) hyfa.compute() # self.get_compute() has to be True self.assertTrue(hyfa.get_compute()) parser = pcre_parser() parser.load_file(aux_func.getPatternMatchDir() + "/algorithms/hybrid_fa/tests_data/test_compute_3_pattern.re") dfa = b_dfa() dfa.create_by_parser(parser) dfa.determinise() hd = hyfa.dfa.get_automaton() d = dfa.get_automaton() # test of DFA part self.assertEqual(hd.states.keys(), d.states.keys()) self.assertEqual(hd.alphabet, d.alphabet) self.assertEqual(hd.start, d.start) self.assertEqual(hd.final, d.final) self.assertEqual(hd.transitions, d.transitions) self.assertTrue(hd.Flags['Hybrid FA - DFA part']) self.assertTrue(hd.Flags['Deterministic']) # without NFA tails self.assertEqual(len(hyfa.nfas), 0) self.assertEqual(hyfa.tran_aut, {})
def _test_compute_5(self): """compute()""" """ Test where are more blow up REs """ hyfa = JHybridFA() parser = pcre_parser() hyfa.set_parser(parser) hyfa.load_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_compute_5.re") hyfa.compute() # self.get_compute() has to be True self.assertTrue(hyfa.get_compute()) # self.get_compute() has to be True self.assertTrue(hyfa.get_compute()) hd = hyfa.dfa.get_automaton(False) hn0 = hyfa.nfas[0].get_automaton(False) hn1 = hyfa.nfas[1].get_automaton(False) d = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_compute_5_dfa.nfa_data") n0 = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_compute_5_nfa0.nfa_data") n1 = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_compute_5_nfa1.nfa_data") # test where are more blow up REs # test of DFA part self.assertEqual(hd.states.keys(), d.states.keys()) self.assertEqual(hd.alphabet, d.alphabet) self.assertEqual(hd.start, d.start) self.assertEqual(len(hd.final), 3) self.assertEqual(hd.transitions, d.transitions) self.assertTrue(hd.Flags['Hybrid FA - DFA part']) self.assertTrue(hd.Flags['Deterministic']) self.assertEqual(len(hyfa.nfas), 2) self.assertEqual({0:0, 1: 10}, hyfa.tran_aut) # test of NFA part #0 self.assertEqual(hn0.states.keys(), n0.states.keys()) self.assertEqual(hn0.alphabet, n0.alphabet) self.assertEqual(hn0.start, n0.start) self.assertEqual(hn0.final, n0.final) self.assertEqual(hn0.transitions, n0.transitions) self.assertTrue(hn0.Flags['Hybrid FA - one NFA part']) # test of NFA part #1 self.assertEqual(hn1.states.keys(), n1.states.keys()) self.assertEqual(hn1.alphabet, n1.alphabet) self.assertEqual(hn1.start, n1.start) self.assertEqual(hn1.final, n1.final) self.assertEqual(hn1.transitions, n1.transitions) self.assertTrue(hn1.Flags['Hybrid FA - one NFA part'])
def _test_search1(self): re = "/a+b*c.*a|bc+/" delay_dfa = DELAY_DFA() parser = pcre_parser() parser.set_text(re) delay_dfa.create_by_parser(parser) delay_dfa.compute() self.assertTrue(delay_dfa.get_compute()) aut = b_dfa() parser = pcre_parser() parser.set_text(re) aut.create_by_parser(parser) self.assertEqual(delay_dfa.search("ac123ac"), aut.search("ac123ac")) self.assertEqual(delay_dfa.search("aacac"), aut.search("aacac")) self.assertEqual(delay_dfa.search("abbb"), aut.search("abbb"))
def test__discover_closure_states(self): """_discover_closure_states(NFA)""" par = pcre_parser(create_cnt_constr=True) par.set_text("/ab[^a]{4}c|def/") history = HistoryCountingFA() history.create_by_parser(par) history.remove_epsilons() NFA = history.get_automaton(True) NFA_without_cnt = \ history._replace_length_restriction_with_a_closure(NFA) self.assertTrue( history._discover_closure_states(NFA_without_cnt) == [12])
def test__discover_closure_states(self): """_discover_closure_states(NFA)""" par = pcre_parser(create_cnt_constr = True) par.set_text("/ab[^a]{4}c|def/") history = HistoryCountingFA() history.create_by_parser(par) history.remove_epsilons() NFA = history.get_automaton(True) NFA_without_cnt = \ history._replace_length_restriction_with_a_closure(NFA) self.assertTrue(history._discover_closure_states(NFA_without_cnt) == [12])
def test_get_default_trans_num(self): """get_default_trans_num""" #Tests with regular expressions from test_compute delay_dfa1 = DELAY_DFA() parser = pcre_parser() parser.set_text("/^abcd/") delay_dfa1.create_by_parser(parser) delay_dfa1.compute() self.assertTrue(delay_dfa1.get_compute()) delay_dfa2 = DELAY_DFA() parser = pcre_parser() parser.set_text("/^(a|b)+/") delay_dfa2.create_by_parser(parser) delay_dfa2.compute() self.assertTrue(delay_dfa2.get_compute()) delay_dfa3 = DELAY_DFA() # Get test directory tdir = aux_func.getPatternMatchDir() + "/algorithms/delay_dfa/" nfaData = nfa_data().load_from_file(tdir + "test_data/text_ddfa.nfa_data") delay_dfa3.create_from_nfa_data(nfaData) delay_dfa3.determinise() delay_dfa3.compute(False) self.assertTrue(delay_dfa3.get_compute()) self.assertEqual(delay_dfa1.get_default_trans_num(), 0) self.assertEqual(delay_dfa2.get_default_trans_num(), 1) self.assertEqual(delay_dfa3.get_default_trans_num(), 4)
def test_get_default_trans_num(self): """get_default_trans_num""" #Tests with regular expressions from test_compute delay_dfa1 = DELAY_DFA() parser = pcre_parser() parser.set_text("/^abcd/") delay_dfa1.create_by_parser(parser) delay_dfa1.compute() self.assertTrue(delay_dfa1.get_compute()) delay_dfa2 = DELAY_DFA() parser = pcre_parser() parser.set_text("/^(a|b)+/") delay_dfa2.create_by_parser(parser) delay_dfa2.compute() self.assertTrue(delay_dfa2.get_compute()) delay_dfa3 = DELAY_DFA() # Get test directory tdir = aux_func.getPatternMatchDir() + "/algorithms/delay_dfa/" nfaData = nfa_data().load_from_file(tdir + "test_data/text_ddfa.nfa_data") delay_dfa3.create_from_nfa_data(nfaData) delay_dfa3.determinise() delay_dfa3.compute(False) self.assertTrue(delay_dfa3.get_compute()) self.assertEqual(delay_dfa1.get_default_trans_num(),0) self.assertEqual(delay_dfa2.get_default_trans_num(),1) self.assertEqual(delay_dfa3.get_default_trans_num(),4)
def _test_compute_1(self): """compute()""" """ Test without blow up patterns """ hyfa = JHybridFA() parser = pcre_parser() hyfa.set_parser(parser) hyfa.load_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_compute_1.re") hyfa.compute() # self.get_compute() has to be True self.assertTrue(hyfa.get_compute()) parser = pcre_parser() parser.load_file( aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_compute_1.re") dfa = b_dfa() dfa.create_by_parser(parser) dfa.compute() a = hyfa.dfa.get_automaton(False) b = dfa.get_automaton(False) # Test without blow up patterns self.assertEqual(a.states.keys(), b.states.keys()) self.assertEqual(a.alphabet, b.alphabet) self.assertEqual(a.start, b.start) self.assertEqual(a.final, b.final) self.assertEqual(a.transitions, b.transitions) self.assertTrue(a.Flags['Hybrid FA - DFA part']) self.assertTrue(a.Flags['Deterministic']) self.assertEqual(len(hyfa.nfas), 0) self.assertEqual(hyfa.tran_aut, {})
def test_set_parser(self): """set_parser()""" """ Tests if set_parser() method works properly. """ hyfa = JHybridFA() # test with regular parser parser = pcre_parser() hyfa.set_parser(parser) self.assertEqual(parser, hyfa._parser) # test with another class self.assertRaises(unknown_parser, hyfa.set_parser, "not_parser")
def _test_compute_4(self): """compute()""" """ Test with more patterns where some are blow up """ hyfa = JHybridFA() parser = pcre_parser() hyfa.set_parser(parser) hyfa.load_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_compute_4.re") hyfa.compute() # self.get_compute() has to be True self.assertTrue(hyfa.get_compute()) # self.get_compute() has to be True self.assertTrue(hyfa.get_compute()) hd = hyfa.dfa.get_automaton(False) hn0 = hyfa.nfas[0].get_automaton(False) d = nfa_data().load_from_file( aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_compute_4_dfa.nfa_data") n = nfa_data().load_from_file( aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_compute_4_nfa0.nfa_data") # Test with more patterns where some are blow up # test of DFA part self.assertEqual(hd.states.keys(), d.states.keys()) self.assertEqual(hd.alphabet, d.alphabet) self.assertEqual(hd.start, d.start) self.assertEqual(len(hd.final), 2) self.assertEqual(hd.transitions, d.transitions) self.assertTrue(hd.Flags['Hybrid FA - DFA part']) self.assertTrue(hd.Flags['Deterministic']) self.assertEqual(len(hyfa.nfas), 1) self.assertEqual({0: 0}, hyfa.tran_aut) # test of NFA part #0 self.assertEqual(hn0.states.keys(), n.states.keys()) self.assertEqual(hn0.alphabet, n.alphabet) self.assertEqual(hn0.start, n.start) self.assertEqual(hn0.final, n.final) self.assertEqual(hn0.transitions, n.transitions) self.assertTrue(hn0.Flags['Hybrid FA - one NFA part'])
def _test_get_trans_num_1(self): """get_state_num()""" """ Test with more regular expression where computed automaton has only DFA part. """ hyfa = JHybridFA() parser = pcre_parser() hyfa.set_parser(parser) hyfa.load_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_get_xxx_num_1.re") hyfa.compute() self.assertEqual(hyfa.get_trans_num(), 102)
def _test_report_memory_naive_2(self): """report_memory_naive()""" """ Test with more regular expression where computed automaton has some NFA parts. """ hyfa = JHybridFA() parser = pcre_parser() hyfa.set_parser(parser) hyfa.load_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_get_xxx_num_2.re") hyfa.compute() self.assertEqual(hyfa.report_memory_naive(), 390)
def _test_get_trans_num_1(self): """get_state_num()""" """ Test with more regular expression where computed automaton has only DFA part. """ hyfa = JHybridFA() parser = pcre_parser() hyfa.set_parser(parser) hyfa.load_file( aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_get_xxx_num_1.re") hyfa.compute() self.assertEqual(hyfa.get_trans_num(), 102)
def _test_report_memory_naive_2(self): """report_memory_naive()""" """ Test with more regular expression where computed automaton has some NFA parts. """ hyfa = JHybridFA() parser = pcre_parser() hyfa.set_parser(parser) hyfa.load_file( aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_get_xxx_num_2.re") hyfa.compute() self.assertEqual(hyfa.report_memory_naive(), 390)
def test__identify_fading_states(self): """_identify_fading_states(nfa_closure_states)""" par = pcre_parser(create_cnt_constr = True) par.set_text("/ab[^a]{4}c|def/") history = HistoryCountingFA() history.create_by_parser(par) history.remove_epsilons() NFA = history.get_automaton(True) NFA_without_cnt = \ history._replace_length_restriction_with_a_closure(NFA) history._automaton = copy.deepcopy(NFA_without_cnt) history.determinise(create_table = True) nfa_closure_states = history._discover_closure_states(NFA_without_cnt) self.assertTrue(history._identify_fading_states(nfa_closure_states) == [5, 7, 8, 9])
def _test_compute_3(self): """compute()""" """ Test with more patterns and one with blow up on start on RE """ hyfa = JHybridFA() parser = pcre_parser() hyfa.set_parser(parser) hyfa.load_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_compute_3.re") hyfa.compute() # self.get_compute() has to be True self.assertTrue(hyfa.get_compute()) # self.get_compute() has to be True self.assertTrue(hyfa.get_compute()) hd = hyfa.dfa.get_automaton(False) hn0 = hyfa.nfas[0].get_automaton(False) d = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_compute_3_dfa.nfa_data") n = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/j_hybrid_fa/tests_data/test_compute_3_nfa0.nfa_data") # Test with more patterns and one with blow up on start on RE # test of DFA part self.assertEqual(hd.states.keys().sort(), d.states.keys().sort()) self.assertEqual(hd.alphabet, d.alphabet) self.assertEqual(hd.start, d.start) self.assertEqual(len(hd.final), 3) self.assertEqual(hd.transitions, d.transitions) self.assertTrue(hd.Flags['Hybrid FA - DFA part']) self.assertTrue(hd.Flags['Deterministic']) self.assertEqual(len(hyfa.nfas), 1) self.assertEqual(hyfa.tran_aut, {0: 8}) # test of NFA part #0 self.assertEqual(hn0.states.keys().sort(), n.states.keys().sort()) self.assertEqual(hn0.alphabet, n.alphabet) self.assertEqual(hn0.start, n.start) self.assertEqual(hn0.final, n.final) self.assertEqual(hn0.transitions, n.transitions) self.assertTrue(hn0.Flags['Hybrid FA - one NFA part'])
def _test_report_memory_naive_2(self): """report_memory_naive()""" """ Test with more regular expression where computed automaton has some NFA parts. """ hyfa = hybrid_fa() parser = pcre_parser() parser.load_file(aux_func.getPatternMatchDir() + "/algorithms/hybrid_fa/tests_data/test_get_state_num_2.re") hyfa.create_by_parser(parser) hyfa.set_special_min_depth(2) hyfa.set_max_head_size(-1) hyfa.set_max_tx(-1) hyfa.compute() self.assertEqual(hyfa.report_memory_naive(), 58)
def test__identify_fading_states(self): """_identify_fading_states(nfa_closure_states)""" par = pcre_parser(create_cnt_constr=True) par.set_text("/ab[^a]{4}c|def/") history = HistoryCountingFA() history.create_by_parser(par) history.remove_epsilons() NFA = history.get_automaton(True) NFA_without_cnt = \ history._replace_length_restriction_with_a_closure(NFA) history._automaton = copy.deepcopy(NFA_without_cnt) history.determinise(create_table=True) nfa_closure_states = history._discover_closure_states(NFA_without_cnt) self.assertTrue( history._identify_fading_states(nfa_closure_states) == [5, 7, 8, 9])
def _test_report_memory_naive_1(self): """report_memory_naive()""" """ Test with more regular expression where computed automaton has only DFA part. """ hyfa = hybrid_fa() parser = pcre_parser() parser.set_text("/abcd/") hyfa.create_by_parser(parser) hyfa.set_special_min_depth(10) hyfa.set_max_head_size(10) hyfa.set_max_tx(10) hyfa.compute() self.assertEqual(hyfa.report_memory_naive(), 40)
def test_report_memory_naive(self): """report_memory_naive()""" # /ab[^1234]*cd|efg/; test with an expression containing one # alternation [^1234]*, the second is not par = pcre_parser(create_cnt_constr = True) par.set_text("/ab[^1234]{3}cd|efg/") history = HistoryCountingFA() history.create_by_parser(par) history.remove_epsilons() NFA = history.get_automaton(True) NFA_without_cnt = \ history._replace_length_restriction_with_a_closure(NFA) NFA = history.get_automaton(True) history._automaton = NFA_without_cnt history.determinise(create_table = True) history.compute(NFA) self.assertTrue(history.report_memory_naive() == 208)
def test_report_memory_naive(self): """report_memory_naive()""" # /ab[^1234]*cd|efg/; test with an expression containing one # alternation [^1234]*, the second is not par = pcre_parser(create_cnt_constr=True) par.set_text("/ab[^1234]{3}cd|efg/") history = HistoryCountingFA() history.create_by_parser(par) history.remove_epsilons() NFA = history.get_automaton(True) NFA_without_cnt = \ history._replace_length_restriction_with_a_closure(NFA) NFA = history.get_automaton(True) history._automaton = NFA_without_cnt history.determinise(create_table=True) history.compute(NFA) self.assertTrue(history.report_memory_naive() == 208)
def _test_report_memory_naive_2(self): """report_memory_naive()""" """ Test with more regular expression where computed automaton has some NFA parts. """ hyfa = hybrid_fa() parser = pcre_parser() parser.load_file( aux_func.getPatternMatchDir() + "/algorithms/hybrid_fa/tests_data/test_get_state_num_2.re") hyfa.create_by_parser(parser) hyfa.set_special_min_depth(2) hyfa.set_max_head_size(-1) hyfa.set_max_tx(-1) hyfa.compute() self.assertEqual(hyfa.report_memory_naive(), 58)
def _test_compute_1(self): """compute()""" """ Test with one regular expression, where computed automaton has only DFA part without any NFA tails. """ hyfa = hybrid_fa() parser = pcre_parser() parser.set_text("/abcd/") hyfa.create_by_parser(parser) hyfa.set_special_min_depth(10) hyfa.set_max_head_size(10) hyfa.set_max_tx(10) hyfa.compute() # self.get_compute() has to be True self.assertTrue(hyfa.get_compute()) dfa = b_dfa() dfa.create_by_parser(parser) dfa.determinise() a = hyfa.dfa.get_automaton() b = dfa.get_automaton() # test on automaton where is only DFA part without NFA tails self.assertEqual(a.states.keys(), b.states.keys()) self.assertEqual(a.alphabet, b.alphabet) self.assertEqual(a.start, b.start) self.assertEqual(a.final, b.final) self.assertEqual(a.transitions, b.transitions) self.assertTrue(a.Flags['Hybrid FA - DFA part']) self.assertTrue(a.Flags['Deterministic']) self.assertEqual(len(hyfa.nfas), 0) self.assertEqual(hyfa.tran_aut, {})
def test__replace_length_restriction_with_a_closure(self): """_replace_length_restriction_with_a_closure(NFA)""" # /ab.{4}cd /; test with an expression that contains .{4} par = pcre_parser(create_cnt_constr = True) par.set_text("/ab.{4}cd/") history = HistoryCountingFA() history.create_by_parser(par) history.remove_epsilons() NFA = history.get_automaton(True) NFA_without_cnt = history._replace_length_restriction_with_a_closure(NFA) copy = NFA_without_cnt result = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/history_counting_fa/test_data/test_data_1.nfa_data") self.assertTrue(history.flags_cnt == {4: "4"}) self.assertTrue(sorted(copy.states.keys()) == sorted(result.states.keys())) self.assertTrue(copy.alphabet == result.alphabet) self.assertTrue(copy.start == result.start) self.assertTrue(copy.final == result.final) self.assertTrue(copy.transitions == result.transitions) self.assertTrue(copy.Flags == result.Flags)
# EXAMPLE of use for PHF_DFA class if __name__ == '__main__': print("-------------------------------------------------------------------") print(" Example of use: PHF DFA ") print("-------------------------------------------------------------------") print(" Ruleset: /#include.*>/ ") print(" Faulty Table: No ") print(" State bits: 10 ") print(" Symbol bits: 12 ") print(" Fallback State: No ") print("-------------------------------------------------------------------") # create parser and load RE parser = pcre_parser() parser.set_text("/#include.*>/") # create phf_dfa automaton aut = PHF_DFA() aut.create_by_parser(parser) # redefine default PHF class so table generation won't fail in this script # it's not important right now, more about that later a = bdz() a.set_ratio(2.0) aut.set_PHF_class(a) # compute dfa and PHF table aut.compute()
def test_search(self): """search()""" # 1. RE /^abc/ nfaData = nfa_data() nfaData.states[0] = b_State(0,set()) nfaData.states[1] = b_State(1,set()) nfaData.states[2] = b_State(2,set()) nfaData.states[3] = b_State(3,set([0])) nfaData.alphabet[0] = b_Sym_char("a", "a", 0) nfaData.alphabet[1] = b_Sym_char("b", "b", 1) nfaData.alphabet[2] = b_Sym_char("c", "c", 2) nfaData.start = 0 nfaData.transitions.add( (0,0,1) ) nfaData.transitions.add( (1,1,2) ) nfaData.transitions.add( (2,2,3) ) nfaData.final.add(3) aut = PHF_DFA() a = bdz() a.set_limit(128) aut.set_PHF_class(a) aut.create_from_nfa_data(nfaData) aut.compute() self.assertEqual(aut.search("abc"), [1]) self.assertEqual(aut.search("aaaaaaaaaaaaaabc"), [0]) self.assertEqual(aut.search("ccccbbbabc"), [0]) self.assertEqual(aut.search("ababc"), [0]) self.assertEqual(aut.search("d"), [0]) self.assertEqual(aut.search("cbabbacba"), [0]) # 2. RE /abc/ nfaData = nfa_data() nfaData.states[0] = b_State(0,set()) nfaData.states[1] = b_State(1,set()) nfaData.states[2] = b_State(2,set()) nfaData.states[3] = b_State(3,set([0])) nfaData.alphabet[0] = b_Sym_char("a", "a", 0) nfaData.alphabet[1] = b_Sym_char("b", "b", 1) nfaData.alphabet[2] = b_Sym_char("c", "c", 2) nfaData.start = 0 nfaData.transitions.add( (0,0,1) ) nfaData.transitions.add( (0,1,0) ) nfaData.transitions.add( (0,2,0) ) nfaData.transitions.add( (1,1,2) ) nfaData.transitions.add( (1,0,1) ) nfaData.transitions.add( (1,2,0) ) nfaData.transitions.add( (2,2,3) ) nfaData.transitions.add( (2,0,1) ) nfaData.transitions.add( (2,1,0) ) nfaData.transitions.add( (3,0,3) ) nfaData.transitions.add( (3,1,3) ) nfaData.transitions.add( (3,2,3) ) nfaData.final.add(3) aut = PHF_DFA() a = bdz() a.set_limit(128) aut.set_PHF_class(a) aut.create_from_nfa_data(nfaData) aut.compute() self.assertEqual(aut.search("abc"), [1]) self.assertEqual(aut.search("aaaaaaaaaaaaaabc"), [1]) self.assertEqual(aut.search("ccccbbbabc"), [1]) self.assertEqual(aut.search("ababc"), [1]) self.assertEqual(aut.search("d"), [0]) self.assertEqual(aut.search("cbabbacba"), [0]) # 2a. same test with faulty transitions aut.enable_faulty_transitions(32) aut.compute() self.assertEqual(aut.search("abc"), [1]) self.assertEqual(aut.search("aaaaaaaaaaaaaabc"), [1]) self.assertEqual(aut.search("ccccbbbabc"), [1]) self.assertEqual(aut.search("ababc"), [1]) self.assertEqual(aut.search("d"), [0]) self.assertEqual(aut.search("cbabbacba"), [0]) # 3. RE /#include.*>/ with enable_fallback_state par = pcre_parser() par.set_text("/#include.*>/") aut = PHF_DFA() a = bdz() a.set_ratio(2.5) a.set_iteration_limit(10) aut.set_PHF_class(a) aut.create_by_parser(par) aut.enable_fallback_state(warning=False) aut.compute() self.assertEqual(aut.search("#include <stdio.h>"), [1]) self.assertEqual(aut.search("#include <stdlib.h>"), [1]) self.assertEqual(aut.search("#include <stdio.h>bba"), [1]) self.assertEqual(aut.search('#include "pcre.h"'), [0]) self.assertEqual(aut.search('asdf#include <stdio.h>'), [1])
def test_generate_PHF_table(self): """generate_PHF_table()""" # Test of PHF table generation - the right size of tabel, every # transition is exactly once in the table and on the right index. nfaData = nfa_data() nfaData.states[0] = b_State(0,set()) nfaData.states[1] = b_State(1,set()) nfaData.states[2] = b_State(2,set()) nfaData.states[3] = b_State(3,set([0])) nfaData.alphabet[0] = b_Sym_char("a", "a", 0) nfaData.alphabet[1] = b_Sym_char("b", "b", 1) nfaData.alphabet[2] = b_Sym_char("c", "c", 2) nfaData.start = 0 nfaData.transitions.add( (0,0,1) ) nfaData.transitions.add( (0,1,0) ) nfaData.transitions.add( (0,2,0) ) nfaData.transitions.add( (1,1,2) ) nfaData.transitions.add( (1,0,1) ) nfaData.transitions.add( (1,2,0) ) nfaData.transitions.add( (2,2,3) ) nfaData.transitions.add( (2,0,1) ) nfaData.transitions.add( (2,1,0) ) nfaData.transitions.add( (3,0,3) ) nfaData.transitions.add( (3,1,3) ) nfaData.transitions.add( (3,2,3) ) nfaData.final.add(3) aut = PHF_DFA() a = bdz() a.set_limit(128) aut.set_PHF_class(a) aut._automaton1 = nfaData aut.generate_PHF_table() # transition table size self.assertEqual(aut.ran, len(aut.trans_table)) self.assertEqual(aut.ran, 384) # count number of unique lines in transition table tranCount = dict() for l in aut.trans_table: tranCount.setdefault(l[1], 0) tranCount[l[1]] += 1 # test if every automaton transition is just once in the table for t in aut._automaton1.transitions: self.assertEqual(tranCount[aut._transition_rep(t)], 1) t = ([2 ** aut.state_bits - 1, 2 ** aut.symbol_bits - 1, 0]) # rest of trans are the nonexistent transitions self.assertEqual(tranCount[aut._transition_rep(t)], aut.ran - len(aut._automaton1.transitions)) # check if each transition is on its index returned by hash function for t in aut._automaton1.transitions: rep = aut._transition_rep(t) self.assertEqual(rep, aut.trans_table[aut.hash_function.hash(rep)][1]) # test the representation in faulty table aut.enable_faulty_transitions(8) aut.generate_PHF_table() for t in aut._automaton1.transitions: rep = aut._transition_rep(t) self.assertEqual(aut.compress_hash.hash(rep), aut.trans_table[aut.hash_function.hash(rep)][3]) # change the size of PHF table and repeat tests aut = PHF_DFA() a = bdz() a.set_ratio(6.0) a.set_iteration_limit(10) aut.set_PHF_class(a) aut._automaton1 = nfaData aut.generate_PHF_table() # transition table size self.assertEqual(aut.ran, len(aut.trans_table)) self.assertEqual(aut.ran, 72) # count number of unique lines in transition table tranCount = dict() for l in aut.trans_table: tranCount.setdefault(l[1], 0) tranCount[l[1]] += 1 # test if every automaton transition is just once in the table for t in aut._automaton1.transitions: self.assertEqual(tranCount[aut._transition_rep(t)], 1) t = ([2 ** aut.state_bits - 1, 2 ** aut.symbol_bits - 1, 0]) # rest of trans are the nonexistent transitions self.assertEqual(tranCount[aut._transition_rep(t)], aut.ran - len(aut._automaton1.transitions)) # check if each transition is on its index returned by hash function for t in aut._automaton1.transitions: rep = aut._transition_rep(t) self.assertEqual(rep, aut.trans_table[aut.hash_function.hash(rep)][1]) # test the representation in faulty table aut.enable_faulty_transitions(8) aut.generate_PHF_table() for t in aut._automaton1.transitions: rep = aut._transition_rep(t) self.assertEqual(aut.compress_hash.hash(rep), aut.trans_table[aut.hash_function.hash(rep)][3]) # RE /#include.*>/ and enable fallback_state par = pcre_parser() par.set_text("/#include.*>/s") aut = PHF_DFA() a = bdz() a.set_ratio(2.5) a.set_iteration_limit(10) aut.set_PHF_class(a) aut.create_by_parser(par) aut.enable_fallback_state(warning=False) aut.compute() # transition table size self.assertEqual(aut.ran, len(aut.trans_table)) self.assertEqual(aut.ran, 90) # count number of unique lines in transition table tranCount = dict() for l in aut.trans_table: tranCount.setdefault(l[1], 0) tranCount[l[1]] += 1 # test if every automaton transition is just once in the table for t in aut._automaton1.transitions: self.assertEqual(tranCount[aut._transition_rep(t)], 1) t = ([2 ** aut.state_bits - 1, 2 ** aut.symbol_bits - 1, 0]) # rest of trans are the nonexistent transitions self.assertEqual(tranCount[aut._transition_rep(t)], aut.ran - len(aut._automaton1.transitions)) # check if each transition is on its index returned by hash function for t in aut._automaton1.transitions: rep = aut._transition_rep(t) self.assertEqual(rep, aut.trans_table[aut.hash_function.hash(rep)][1]) # test the representation in faulty table aut.enable_faulty_transitions(8) aut.generate_PHF_table() for t in aut._automaton1.transitions: rep = aut._transition_rep(t) self.assertEqual(aut.compress_hash.hash(rep), aut.trans_table[aut.hash_function.hash(rep)][3]) # disable fallback_state aut.disable_fallback_state() aut.compute() self.assertEqual(aut.ran, len(aut.trans_table)) self.assertEqual(aut.ran, 252) # count number of unique lines in transition table tranCount = dict() for l in aut.trans_table: tranCount.setdefault(l[1], 0) tranCount[l[1]] += 1 # test if every automaton transition is just once in the table for t in aut._automaton1.transitions: self.assertEqual(tranCount[aut._transition_rep(t)], 1) t = ([2 ** aut.state_bits - 1, 2 ** aut.symbol_bits - 1, 0]) # rest of trans are the nonexistent transitions self.assertEqual(tranCount[aut._transition_rep(t)], aut.ran - len(aut._automaton1.transitions)) # check if each transition is on its index returned by hash function for t in aut._automaton1.transitions: rep = aut._transition_rep(t) self.assertEqual(rep, aut.trans_table[aut.hash_function.hash(rep)][1]) # test the representation in faulty table aut.enable_faulty_transitions(8) aut.generate_PHF_table() for t in aut._automaton1.transitions: rep = aut._transition_rep(t) self.assertEqual(aut.compress_hash.hash(rep), aut.trans_table[aut.hash_function.hash(rep)][3])
def test_get_nfa(self): """get_nfa()""" # If attribute _position < 0, check returning None. parser = pcre_parser() self.assertTrue(parser._position < 0) self.assertTrue(parser.get_nfa() == None) # Try method on a few regular expressions. # The results obtained compare with the manually completed machines. # (Recommend to compare after the elimination of epsilon transition) # 1) concatenation parser = pcre_parser() parser.set_text("/first/") automat = b_Automaton() automat._automaton = parser.get_nfa() automat.remove_epsilons() cp = automat.get_automaton() result = nfa_data().load_from_file( "test_data/(1)pcre_get_nfa.nfa_data") self.assertTrue( sorted(cp.states.keys()) == sorted(result.states.keys())) self.assertTrue(cp.alphabet == result.alphabet) self.assertTrue(cp.start == result.start) self.assertTrue(cp.final == result.final) self.assertTrue(cp.transitions == result.transitions) self.assertTrue(cp.Flags == result.Flags) # 2) branch (automat create char class), iteration * parser = pcre_parser() parser.set_text("/[ab]cd*/") automat = b_Automaton() automat._automaton = parser.get_nfa() automat.remove_epsilons() cp = automat.get_automaton() result = nfa_data().load_from_file( "test_data/(2)pcre_get_nfa.nfa_data") self.assertTrue( sorted(cp.states.keys()) == sorted(result.states.keys())) self.assertTrue(cp.alphabet == result.alphabet) self.assertTrue(cp.start == result.start) self.assertTrue(cp.final == result.final) self.assertTrue(cp.transitions == result.transitions) self.assertTrue(cp.Flags == result.Flags) # 3) try second RE (move to next line) parser = pcre_parser() parser.set_text("/abc/\n/ABC/\n") parser.next_line() automat = b_Automaton() automat._automaton = parser.get_nfa() automat.remove_epsilons() cp = automat.get_automaton() result = nfa_data().load_from_file( "test_data/(3)pcre_get_nfa.nfa_data") self.assertTrue( sorted(cp.states.keys()) == sorted(result.states.keys())) self.assertTrue(cp.alphabet == result.alphabet) self.assertTrue(cp.start == result.start) self.assertTrue(cp.final == result.final) self.assertTrue(cp.transitions == result.transitions) self.assertTrue(cp.Flags == result.Flags) # 4) basic counting constratin parser = pcre_parser() parser.set_text("/ab{5}c/") automat = b_Automaton() automat._automaton = parser.get_nfa() automat.remove_epsilons() cp = automat.get_automaton() result = nfa_data().load_from_file( "test_data/(4)pcre_get_nfa.nfa_data") self.assertTrue( sorted(cp.states.keys()) == sorted(result.states.keys())) self.assertTrue(cp.alphabet == result.alphabet) self.assertTrue(cp.start == result.start) self.assertTrue(cp.final == result.final) self.assertTrue(cp.transitions == result.transitions) self.assertTrue(cp.Flags == result.Flags) # 5) branch, iteration +, harder counting constraint parser = pcre_parser() parser.set_text("/a[bc]+d{2,3}/") automat = b_Automaton() automat._automaton = parser.get_nfa() automat.remove_epsilons() cp = automat.get_automaton() result = nfa_data().load_from_file( "test_data/(5)pcre_get_nfa.nfa_data") self.assertTrue( sorted(cp.states.keys()) == sorted(result.states.keys())) self.assertTrue(cp.alphabet == result.alphabet) self.assertTrue(cp.start == result.start) self.assertTrue(cp.final == result.final) self.assertTrue(cp.transitions == result.transitions) self.assertTrue(cp.Flags == result.Flags) # 6) basic counting constratin, use param create_cnt_constr = True parser = pcre_parser(create_cnt_constr=True) parser.set_text("/ab{5}c/") automat = b_Automaton() automat._automaton = parser.get_nfa() automat.remove_epsilons() cp = automat.get_automaton() result = nfa_data().load_from_file( "test_data/(6)pcre_get_nfa.nfa_data") self.assertTrue( sorted(cp.states.keys()) == sorted(result.states.keys())) self.assertTrue(cp.alphabet == result.alphabet) self.assertTrue(cp.start == result.start) self.assertTrue(cp.final == result.final) self.assertTrue(cp.transitions == result.transitions) self.assertTrue(cp.Flags == result.Flags) # 7) branch, iteration +, harder counting constraint, # use param create_cnt_constr = True parser = pcre_parser(create_cnt_constr=True) parser.set_text("/a[bc]+d{2,3}/") automat = b_Automaton() automat._automaton = parser.get_nfa() automat.remove_epsilons() cp = automat.get_automaton() result = nfa_data().load_from_file( "test_data/(7)pcre_get_nfa.nfa_data") self.assertTrue( sorted(cp.states.keys()) == sorted(result.states.keys())) self.assertTrue(cp.alphabet == result.alphabet) self.assertTrue(cp.start == result.start) self.assertTrue(cp.final == result.final) self.assertTrue(cp.transitions == result.transitions) self.assertTrue(cp.Flags == result.Flags) # 8) concatenation, with create_eof_symbols = True, no $ parser = pcre_parser(create_eof_symbols=True) parser.set_text("/first/") automat = b_Automaton() automat._automaton = parser.get_nfa() automat.remove_epsilons() cp = automat.get_automaton() result = nfa_data().load_from_file( "test_data/(1)pcre_get_nfa.nfa_data") self.assertTrue( sorted(cp.states.keys()) == sorted(result.states.keys())) self.assertTrue(cp.alphabet == result.alphabet) self.assertTrue(cp.start == result.start) self.assertTrue(cp.final == result.final) self.assertTrue(cp.transitions == result.transitions) self.assertTrue(cp.Flags == result.Flags) # 9) concatenation, with create_eof_symbols = True, $ parser = pcre_parser(create_eof_symbols=True) parser.set_text("/first$/") automat = b_Automaton() automat._automaton = parser.get_nfa() automat.remove_epsilons() cp = automat.get_automaton() result = nfa_data().load_from_file( "test_data/(9)pcre_get_nfa.nfa_data") self.assertTrue( sorted(cp.states.keys()) == sorted(result.states.keys())) self.assertTrue(cp.alphabet == result.alphabet) self.assertTrue(cp.start == result.start) self.assertTrue(cp.final == result.final) self.assertTrue(cp.transitions == result.transitions) self.assertTrue(cp.Flags == result.Flags) # 10) branch, iteration +, harder counting constraint # create_eof_symbols = True, create_cnt_constr = True parser = pcre_parser(create_eof_symbols=True, create_cnt_constr=True) parser.set_text("/a[bc]+d{2,3}$/") automat = b_Automaton() automat._automaton = parser.get_nfa() automat.remove_epsilons() cp = automat.get_automaton() result = nfa_data().load_from_file( "test_data/(10)pcre_get_nfa.nfa_data") self.assertTrue( sorted(cp.states.keys()) == sorted(result.states.keys())) self.assertTrue(cp.alphabet == result.alphabet) self.assertTrue(cp.start == result.start) self.assertTrue(cp.final == result.final) self.assertTrue(cp.transitions == result.transitions) self.assertTrue(cp.Flags == result.Flags)