def test_jumptargets(): input = "01: char '2f'\n13: span [(30-39)]\n37: testcode [(1)(3)(39)] -> 13\n42: jmp -> 37" instructionlist = relabel(parse(input)) assert not instructionlist[0].isjumptarget assert instructionlist[1].isjumptarget assert instructionlist[2].isjumptarget assert not instructionlist[3].isjumptarget
def test_json(self): with open("examples/jsonpattern") as f: p = f.read() with open("examples/jsoninput") as f: i = f.read() instructionlist = relabel(parse(runpattern(p))) self.run_string(instructionlist, i)
def test_span(self): #this instructionset should match an arbitrarily long string of #'a's and 'b's, terminating with another character pattern = 'lpeg.R("az")^0 * lpeg.R("09")^0' instructionlist = relabel(parse(runpattern(pattern))) input = "abbac" * 100 + "c" + "012312312445598" * 10 self.run_string(instructionlist, input)
def test_optimize_testset(self): pattern = 'lpeg.P{lpeg.S"hH" * lpeg.P"allo" + 1 * lpeg.V(1)}^0' instrs = relabel(parse(runpattern(pattern))) input = "z" * 100 + "Halloyxxddcccxxddffhallojjjjjjgfffdssadgh" * 50 self.run_string(instrs, input, optimize_char=True, optimize_testchar=True)
def test_optimize_char(self): pattern = 'lpeg.P{lpeg.P"Hallo" + 1 * lpeg.V(1)}^0' instrs = relabel(parse(runpattern(pattern))) input = "z" * 100 + "Hallo" * 50 self.run_string(instrs, input, optimize_char=True, optimize_testchar=True)
def test_optimize_char_1(self): flags = self.flags #tests if the code runs in principle from pypeg.vm import look_for_chars, match_many_chars pattern = 'lpeg.P"Hallo"' bytecode = relabel(parse(runpattern(pattern))) assert look_for_chars( bytecode, 0, ) == 5 assert match_many_chars(bytecode, 0, 5, "Hallo", 0)
def test_url(self): pattern = """lpeg.P{ "S"; S = lpeg.V("URL") + lpeg.P(1) * lpeg.V("S"), URL = lpeg.Cp()*lpeg.C(lpeg.P"http" * lpeg.V("urlchar")^3), urlchar = lpeg.R("az","AZ","09") + lpeg.S("-._~:/?#@!$&*+,;=")}^0""" instructionlist = relabel(parse(runpattern(pattern))) input = "das hier ist eine url:https://www3.hhu.de/stups/downloads/pdf/BoCuFiRi09_246.pdf und das hier nicht 192.168.13.37" * 100 self.run_string(instructionlist, input)
def test_relabel(): input = "01: char '2f'\n13: span [(30-39)]\n37: testcode [(1)(3)(39)] -> 13" instructionlist = parse(input) output = relabel(instructionlist) assert output[0] == Instruction(label=0, name="char", character="/") assert output[1] == Instruction(label=1, name="span", charlist=[CharRange("0", "9")]) assert output[2] == Instruction(label=2, name="testcode", charlist=[ SingleChar(chr(0x1)), SingleChar(chr(0x3)), SingleChar(chr(0x39)) ], goto=1)
def make_instructionlist(pattern): bytecodestring = runpattern(pattern) instructionlist = parse(bytecodestring) return relabel(instructionlist)
def test_set(self): pattern = 'lpeg.P{lpeg.P"c" + (lpeg.P"a"+lpeg.P"z") * lpeg.V(1)}' instructionlist = relabel(parse(runpattern(pattern))) input = "azaaazaz" * 100 + "c" self.run_string(instructionlist, input)
def test_email(self): pattern = '(lpeg.P{ lpeg.C(lpeg.R("az","AZ","09")^1*lpeg.P("@")*lpeg.R("az","AZ","09")^1*lpeg.P(".de")) + 1 * lpeg.V(1)})^0' instructionlist = relabel(parse(runpattern(pattern))) input = " und es endet mit noch ner mail: [email protected] kommt was:, asdfasdf [email protected]" * 100 self.run_string(instructionlist, input)
def test_complex(self): pattern = '(lpeg.P"aa"+lpeg.P"zz")^0' instructionlist = relabel(parse(runpattern(pattern))) input = "aa" * 100 + "zz" * 50 + "aaaa" self.run_string(instructionlist, input)