def test_3(self): "test for unicode strings" test_strings = [ "ü", "Ⱥ", "Ω≈ç√∫˜µ≤≥÷", "åß∂ƒ©˙∆˚¬…æ", "œ∑´®†¥¨ˆøπ“‘", "¡™£¢∞§¶•ªº–≠", "¸˛Ç◊ı˜Â¯˘¿", "ÅÍÎÏ˝ÓÔÒÚÆ☃", "Œ„´‰ˇÁ¨ˆØ∏”’", "`⁄€‹›fifl‡°·‚—±", "⅛⅜⅝⅞", "ЁЂЃЄЅІЇЈЉЊтуфхцчшщъыьэюя", "٠١٢٣٤٥٦٧٨٩", "田中さんにあげて下さい", "𠜎𠜱𠝹𠱓𠱸𠲖𠳏", "ثم نفس سقطت وبالتحديد،,", "בְּרֵאשִׁית, בָּרָא", "ﷺ", "̡͓̞ͅI̗c҉͔̫͖͓͇͖ͅh̵̤̣͚͔á̗̼͕ͅo̼̣̥s̱͈̺̖̦̻͢.̛̖̞̠̯̹̞͓G̻O̭̗̮", "\U0001f300\U0001f5ff"] for test_str in test_strings: gmr = Grammar("root '%s'" % test_str) self.assertEqual(gmr.generate(), test_str)
def test_0(self): "test for string quoting and escaping" quotes = { "root '\\\\'": "\\", "root \"\\\\\"": "\\", "root '\\''": "'", "root \"\\\"\"": "\"", "root '\\'some'": "'some", "root \"\\\"some\"": "\"some", "root 'some\\''": "some'", "root \"some\\\"\"": "some\"", r"root '\\\\\\\'\\'": "\\\\\\'\\", r'root "\\\\\\\"\\"': "\\\\\\\"\\", "root \"'some\"": "'some", "root '\"some'": "\"some", "root \"'\"": "'", "root \"''\"": "''", "root \"'''\"": "'''", "root '\"'": "\"", "root '\"\"'": "\"\"", "root '\"\"\"'": "\"\"\"" } for gmr_s, expected in quotes.items(): gmr = Grammar(gmr_s) self.assertEqual(gmr.generate(), expected)
def test_buildin_eval(self): "test the built-in eval function" # XXX: test eval of non-existent symbol # XXX: test eval with <> # XXX: test that references work within eval (ie, a-value could use an @ reference from outside the eval) iters = 1000 gmr = Grammar("root decl unused{0}\n" "decl (name) ':' eval(@1 '-value')\n" "name 1 'a'\n" " 1 'b'\n" "a-value 'AAA'\n" "b-value 'BBB'\n" "unused a-value b-value") result = {'a': 0, 'b': 0} expected = {'a': 'AAA', 'b': 'BBB'} for _ in range(iters): name, value = gmr.generate().split(':') self.assertEqual(value, expected[name]) result[name] += 1 self.assertGreater(result['a'], 0) self.assertGreater(result['b'], 0) # test eval with unused (will raise for now ... should fix that?) with self.assertRaisesRegex(IntegrityError, r'^Unused symbols:'): Grammar("root decl\n" "decl (name) ':' eval(@1 '-value')\n" "name 1 'a'\n" " 1 'b'\n" "a-value 'AAA'\n" "b-value 'BBB'")
def test_funcs(self): "test that python filter functions work" gram = "root func{100}\n" \ "func 1 'z' zero(nuvar) '\\n'\n" \ " 1 'a' alpha(alvar , '*,' rep) '\\n'\n" \ " 1 nuvar '\\n'\n" \ " 1 alvar '\\n'\n" \ "nuvar 'n' /[0-9]{6}/\n" \ "alvar 'c' /[a-z]{6}/\n" \ "rep /[0-9]/" def zero(inp): return inp.replace("0", "z") def alpha(inp, rep): return "%s/%s" % (rep, inp.replace("a", rep)) gmr = Grammar(gram, zero=zero, alpha=alpha) for line in gmr.generate().splitlines(): self.assertTrue(line.startswith("zn") or line[0] in "anc") if line.startswith("zn"): self.assertRegex(line[2:], r"^[1-9z]{6}$") elif line.startswith("a"): self.assertRegex(line[1:], r"^(\*,[0-9])/c(\1|[b-z]){6}$") elif line.startswith("n"): self.assertRegex(line[1:], r"^[0-9]{6}$") elif line.startswith("c"): self.assertRegex(line[1:], r"^[a-z]{6}$")
def test_file(self): "test grammar with utf-8 file as input" with io.open('a.gmr', 'w+', encoding='utf-8') as fd: fd.write('root "aü"') fd.seek(0) gmr = Grammar(fd) self.assertEqual(gmr.generate(), 'aü')
def test_binfile(self): "test grammar with binary file as input" with open('a.gmr', 'w+b') as fd: fd.write(b'root "a"') fd.seek(0) gmr = Grammar(fd) self.assertEqual(gmr.generate(), 'a')
def test_import_with_unicode(self): "test that imports with unicode characters work" with open('a.gmr', 'wb') as fd: fd.write('a "ü"'.encode("utf-8")) gmr = Grammar("b import('a.gmr')\n" "root b.a") self.assertEqual(gmr.generate(), 'ü')
def test_simple(self): "test that imports work" with open('a.gmr', 'w') as fd: fd.write('a "A"') gmr = Grammar("b import('a.gmr')\n" "root b.a") self.assertEqual(gmr.generate(), 'A')
def test_notfound_import(self): "tests for bad imports" with self.assertRaisesRegex(ParseError, r'^Error parsing string'): Grammar("a import()") with self.assertRaisesRegex(IntegrityError, r'^Could not find imported grammar'): Grammar("a import('')")
def test_builtin_id(self): "test the built-in id function" gmr = Grammar("root id() ' ' id() ' ' id()") self.assertEqual(gmr.generate(), "0 1 2") self.assertEqual(gmr.generate(), "0 1 2") with self.assertRaisesRegex(GenerationError, r'^TypeError: id\(\) takes 0 arguments \(1 given\)'): Grammar("root id('')").generate()
def test_2(self): "tests for repeat sample" with self.assertRaisesRegex(IntegrityError, r'^Expecting exactly one ChoiceSymbol'): Grammar('root "A" <1,10>') with self.assertRaisesRegex(IntegrityError, r'^Expecting exactly one ChoiceSymbol'): Grammar('root (a a) <1,10>\n' 'a 1 "A"') gmr = Grammar('root a<1,10>\n' 'a 1 "A"') for _ in range(100): self.assertEqual(gmr.generate(), "A") gmr = Grammar('root ("a" a)<1,10>\n' 'a 1 "A"') for _ in range(100): self.assertEqual(gmr.generate(), "aA") gmr = Grammar('root a<1,10>\n' 'a "a" b\n' 'b 1 "A"') for _ in range(100): self.assertEqual(gmr.generate(), "aA") gmr = Grammar('root a <1,10>\n' 'a .9 "A"\n' ' .1 "B"') outs = {"A": 0, "B": 0, "BA": 0, "AB": 0} for _ in range(1000): outs[gmr.generate()] += 1 self.assertGreater(outs["AB"] + outs["BA"], outs["A"] + outs["B"]) self.assertGreater(outs["AB"], outs["BA"]) self.assertGreater(outs["A"], outs["B"]) gmr = Grammar('root ("A"|"A")<1,10>') for _ in range(100): self.assertIn(gmr.generate(), {"A", "AA"})
def test_7(self): "test that weights in a nested choice are ignored. has gone wrong before." gmr = Grammar("root a {10000}\n" "b .9 'b'\n" "a .1 'a'\n" " .1 b") output = gmr.generate() a_count = len([c for c in output if c == 'a']) / 10000.0 b_count = 1.0 - a_count self.assertAlmostEqual(a_count, b_count, delta=DELTA)
def test_limit(self): "test that limit is respected" gmr = Grammar( "root foo bar\n" "bar (@foo bar) {1}\n" "foo 'i0'", limit=10) self.assertLessEqual(len(gmr.generate()), 10)
def test_2(self): "test for tracked symbols" gmr = Grammar("root id '\\n' esc('not', @id)\n" "id 'id' /[0-9]/", esc=lambda x, y: x) defn, use = gmr.generate().splitlines() self.assertRegex(defn, r"^id[0-9]$") self.assertEqual(use, "not")
def test_2(self): "tests invalid weights" with self.assertRaisesRegex(IntegrityError, r'^Invalid weight value for choice.*'): Grammar("root 1 '1'\n" " 2 '2'\n") with self.assertRaisesRegex(IntegrityError, r'^Symbol -1 used but not defined \(*'): Grammar("root -1 '1'\n")
def balanced_choice(self, grammar, values, iters=2000): result = {value: 0 for value in values} gmr = Grammar(grammar) for _ in range(iters): result[gmr.generate()] += 1 log.debug("balanced_choice(%s) -> %s", values, result) for value in result.values(): self.assertAlmostEqual(float(value)/iters, 1.0/len(values), delta=DELTA)
def test_imported_choice(self): "test that repeat sample works across an import" with open('a.gmr', 'w') as fd: fd.write('a 1 "A"') gmr = Grammar("b import('a.gmr')\n" "root a<*>\n" "a b.a") self.assertEqual(gmr.generate(), 'A')
def test_2(self): "tests for unbalanced escapes" with self.assertRaisesRegex(ParseError, r'^Unterminated string literal'): Grammar(r"root '\\\\\\\'") with self.assertRaisesRegex(ParseError, r'^Unterminated string literal'): Grammar(r'root "\\\\\\\"')
def test_unused_import_sym(self): "test that unused symbols in an import are allowed" with open('a.gmr', 'w') as fd: fd.write('a "A"\n' 'b "B"') gmr = Grammar('a import("a.gmr")\n' 'root a.a') self.assertEqual(gmr.generate(), "A")
def test_3(self): "test for tracked symbols" gmr = Grammar("root esc(id) '\\n' @id\n" "id 'id' /[0-9]/", esc=lambda x: "%s\n%s" % (x, "".join("%02x" % ord(c) for c in x))) defn, hexn, use = gmr.generate().splitlines() self.assertRegex(defn, r"^id[0-9]$") self.assertEqual("".join("%02x" % ord(c) for c in defn), hexn) self.assertEqual(defn, use)
def test_1(self): "test for tracked symbols" gmr = Grammar("root id '\\n' esc(\"'\" @id \"'\")\n" "id 'id' /[0-9]/", esc=lambda x: re.sub(r"'", "\\'", x)) for _ in range(100): defn, use = gmr.generate().splitlines() self.assertRegex(defn, r"^id[0-9]$") self.assertEqual(use, "\\'%s\\'" % defn)
def test_builtin_id(self): "test the built-in id function" gmr = Grammar("root id() ' ' id() ' ' id()") self.assertEqual(gmr.generate(), "0 1 2") self.assertEqual(gmr.generate(), "0 1 2") with self.assertRaisesRegex( GenerationError, r'^TypeError: id\(\) takes 0 arguments \(1 given\)'): Grammar("root id('')").generate()
def test_2(self): "test for tracked symbols" gmr = Grammar( "root id '\\n' esc('not', @id)\n" "id 'id' /[0-9]/", esc=lambda x, y: x) defn, use = gmr.generate().splitlines() self.assertRegex(defn, r"^id[0-9]$") self.assertEqual(use, "not")
def test_nested(self): "test that circular imports are allowed" with open('a.gmr', 'w') as fd: fd.write('b import("b.gmr")\n' 'root a b.a\n' 'a "A"') with open('b.gmr', 'w') as fd: fd.write('x import("a.gmr")\n' 'a @x.a') with open('a.gmr') as fd: gmr = Grammar(fd) self.assertEqual(gmr.generate(), "AA")
def test_0(self): "test for tracked symbol use as a function arg" gmr = Grammar("root id a(b(@id))\n" "id /[a-z]/\n" , a=lambda x: "a" + x, b=lambda x: "b" + x) for _ in range(100): result = gmr.generate() self.assertEqual(result[0], result[-1]) self.assertEqual(result[1:-1], "ab")
def test_7(self): "test that weights in a nested choice are ignored. has gone wrong before." gmr = Grammar("root a {10000}\n" "b .9 'b'\n" "a .1 'a'\n" " .1 b") output = gmr.generate() a_count = len([c for c in output if c == 'a'])/10000.0 b_count = 1.0 - a_count self.assertAlmostEqual(a_count, b_count, delta=DELTA)
def test_3(self): "test for tracked symbols" gmr = Grammar( "root esc(id) '\\n' @id\n" "id 'id' /[0-9]/", esc=lambda x: "%s\n%s" % (x, "".join("%02x" % ord(c) for c in x))) defn, hexn, use = gmr.generate().splitlines() self.assertRegex(defn, r"^id[0-9]$") self.assertEqual("".join("%02x" % ord(c) for c in defn), hexn) self.assertEqual(defn, use)
def test_1(self): "test for tracked symbols" gmr = Grammar( "root id '\\n' esc(\"'\" @id \"'\")\n" "id 'id' /[0-9]/", esc=lambda x: re.sub(r"'", "\\'", x)) for _ in range(100): defn, use = gmr.generate().splitlines() self.assertRegex(defn, r"^id[0-9]$") self.assertEqual(use, "\\'%s\\'" % defn)
def test_0(self): "test for tracked symbol use as a function arg" gmr = Grammar("root id a(b(@id))\n" "id /[a-z]/\n", a=lambda x: "a" + x, b=lambda x: "b" + x) for _ in range(100): result = gmr.generate() self.assertEqual(result[0], result[-1]) self.assertEqual(result[1:-1], "ab")
def test_import_file_containing_eval(self): "test that importing files containing evals works as expected" with open('a.gmr', 'w') as fd: fd.write('IB import("b.gmr")\n') fd.write('B IB.X\n') with open('b.gmr', 'w') as fd: fd.write('X eval("Z")\n') fd.write('Z "z"\n') gmr = Grammar('A import("a.gmr")\n' 'root A.B\n') self.assertEqual(gmr.generate(), 'z')
def balanced_choice(self, grammar, values, iters=2000): result = {value: 0 for value in values} gmr = Grammar(grammar) for _ in range(iters): result[gmr.generate()] += 1 log.debug("balanced_choice(%s) -> %s", values, result) for value in result.values(): self.assertAlmostEqual(float(value) / iters, 1.0 / len(values), delta=DELTA)
def test_0(self): "test that basic backreferences work, generate a single digit and reference to it, make sure they match" gmr = Grammar("root (/[0-9]/) @1") for _ in range(100): x1, x2 = gmr.generate() self.assertEqual(x1, x2) gmr = Grammar("root (/[0-9]/|/[a-z]/) @1") for _ in range(100): x1, x2 = gmr.generate() self.assertEqual(x1, x2)
def test_1(self): "tests for choices with different weights" iters = 10000 self.balanced_choice("root 1 '1'\n" " 1 '2'\n" " 1 '3'", ['1', '2', '3']) gmr = Grammar("root .5 '1'\n" " 1 '2'\n" " .5 '3'") result = {1: 0, 2: 0, 3: 0} for _ in range(iters): result[int(gmr.generate())] += 1 self.assertAlmostEqual(float(result[1]) / iters, 0.25, delta=DELTA) self.assertAlmostEqual(float(result[2]) / iters, 0.5, delta=DELTA) self.assertAlmostEqual(float(result[3]) / iters, 0.25, delta=DELTA) gmr = Grammar("root .3 '1'\n" " .1 '2'\n" " .1 '3'") result = {1: 0, 2: 0, 3: 0} for _ in range(iters): result[int(gmr.generate())] += 1 self.assertAlmostEqual(float(result[1]) / iters, 0.6, delta=DELTA) self.assertAlmostEqual(float(result[2]) / iters, 0.2, delta=DELTA) self.assertAlmostEqual(float(result[3]) / iters, 0.2, delta=DELTA) gmr = Grammar("root .25 '1'\n" " .25 '2'\n" " 1 '3'") result = {1: 0, 2: 0, 3: 0} for _ in range(iters): result[int(gmr.generate())] += 1 self.assertAlmostEqual(float(result[1]) / iters, 1.0 / 6, delta=DELTA) self.assertAlmostEqual(float(result[2]) / iters, 1.0 / 6, delta=DELTA) self.assertAlmostEqual(float(result[3]) / iters, 2.0 / 3, delta=DELTA)
def test_3(self): "test that backreferences on different lines don't get messed up" gmr = Grammar("root (/[0-9]/) y @1\n" "y (/[0-9]/) @1") n_same = 0 for _ in range(100): x1, y1, y2, x2 = gmr.generate() self.assertEqual(x1, x2) self.assertEqual(y1, y2) if x1 == y1: n_same += 1 self.assertLess(n_same, 100)
def test_0(self): "tests for simple repeats" gmr = Grammar('root "A"{1,10}') lengths = set() for _ in range(2000): result = gmr.generate() self.assertEqual(len(set(result)), 1) self.assertEqual(result[0], "A") self.assertIn(len(result), range(1, 11)) lengths.add(len(result)) self.assertEqual(len(lengths), 10)
def test_9(self): "test for limit case of Choice." # this will fail intermittently if self.total is used instead of total in ChoiceSymbol.choice() # XXX: why intermittently?? gmr = Grammar("root ('x' t 'x'){10}\n" "t + u\n" " 1 'x'\n" "u 1 'x'\n" " 1 'x'\n", limit=4) for _ in range(100): gmr.generate()
def test_2(self): "test that backreferences work in function args" gmr = Grammar("root (/[0-9]/) rndint((/[0-9]/), @2) @2 @1") n_same = 0 for _ in range(100): x1, y1, y2, x2 = gmr.generate() self.assertEqual(x1, x2) self.assertEqual(y1, y2) if x1 == y1: n_same += 1 self.assertLess(n_same, 100)
def test_1(self): "test for repeat of implicit concatenation" gmr = Grammar('root ("A" "B" ","){ 0 , 10 } "AB"') lengths = set() for _ in range(2000): result = gmr.generate().split(",") self.assertEqual(len(set(result)), 1) self.assertEqual(result[0], "AB") self.assertIn(len(result), range(1, 12)) lengths.add(len(result)) self.assertEqual(len(lengths), 11)
def test_builtin_rndint(self): "test the built-in rndint function" gmr = Grammar("root rndint(1,10)") result = {i: 0 for i in range(1, 11)} iters = 10000 for _ in range(iters): value = int(gmr.generate()) result[value] += 1 for value in result.values(): self.assertAlmostEqual(float(value)/iters, 0.1, delta=DELTA) with self.assertRaisesRegex(GenerationError, r'^ValueError'): Grammar('root rndint(2,1)').generate()
def test_import_file_containing_eval(self): "test that importing files containing evals works as expected" with open('a.gmr', 'w') as fd: fd.write('IB import("b.gmr")\n') fd.write('B IB.X\n') with open('b.gmr', 'w') as fd: fd.write('X eval("Z")\n') fd.write('Z "z"\n') gmr = Grammar( 'A import("a.gmr")\n' 'root A.B\n') self.assertEqual(gmr.generate(), 'z')
def test_5(self): "test that '*' uses all choices from a choice included with '+'" gmr = Grammar("root a<*>\n" "a 1 'a'\n" " + b\n" "b 1 'b'\n" " 1 'c'\n" " + c\n" "c 1 'd'\n" " 1 'e'") result = gmr.generate() self.assertEqual("".join(sorted(result)), "abcde")
def test_builtin_rndint(self): "test the built-in rndint function" gmr = Grammar("root rndint(1,10)") result = {i: 0 for i in range(1, 11)} iters = 10000 for _ in range(iters): value = int(gmr.generate()) result[value] += 1 for value in result.values(): self.assertAlmostEqual(float(value) / iters, 0.1, delta=DELTA) with self.assertRaisesRegex(GenerationError, r'^ValueError'): Grammar('root rndint(2,1)').generate()
def test_incomplete_sym_defn(self): "test incomplete symbol definitions raise ParseError" with self.assertRaisesRegex( ParseError, r'^Failed to parse definition.*\(line 2\)'): Grammar("root a\n" "a") with self.assertRaisesRegex( ParseError, r'^Failed to parse definition.*\(line 2\)'): Grammar("root a\n" "a ") # just being mean here with self.assertRaisesRegex( ParseError, r'^Failed to parse definition.*\(line 2\)'): Grammar("root a\n" "a\r\t")
def test_5(self): "test that symbols are tracked even when not output" out = [0] def esc(x): out[0] = x return "" gmr = Grammar("root esc(id) @id\n" "id 'id' /[0-9]/", esc=esc) for _ in range(100): result = gmr.generate() self.assertRegex(result, r"^id[0-9]$") self.assertEqual(out[0], result)
def test_5(self): "test that '+' works with text appended to the choice symbol" iters = 10000 gmr = Grammar("root a\n" "a + (b 'X')\n" " 1 'c'\n" "b 1 'a'\n" " 1 'b'") result = {"c": 0, "aX": 0, "bX": 0} for _ in range(iters): result[gmr.generate()] += 1 for value in result.values(): self.assertAlmostEqual(float(value) / iters, 1.0 / 3, delta=DELTA)
def test_9(self): "test for limit case of Choice." # this will fail intermittently if self.total is used instead of total in ChoiceSymbol.choice() # XXX: why intermittently?? gmr = Grammar( "root ('x' t 'x'){10}\n" "t + u\n" " 1 'x'\n" "u 1 'x'\n" " 1 'x'\n", limit=4) for _ in range(100): gmr.generate()
def test_import_name_integrity(self): "test that import names don't get overwritten" with open('a.gmr', 'w') as fd: fd.write('X import("b.gmr")\n') fd.write('B X.B\n') with open('b.gmr', 'w') as fd: fd.write('B "B"\n') with open('c.gmr', 'w') as fd: fd.write('C "C"\n') gmr = Grammar('A import("a.gmr")\n' 'X import("c.gmr")\n' 'root A.B X.C\n') self.assertEqual(gmr.generate(), "BC")
def test_5(self): "test that '+' works with text appended to the choice symbol" iters = 10000 gmr = Grammar("root a\n" "a + (b 'X')\n" " 1 'c'\n" "b 1 'a'\n" " 1 'b'") result = {"c": 0, "aX": 0, "bX": 0} for _ in range(iters): result[gmr.generate()] += 1 for value in result.values(): self.assertAlmostEqual(float(value)/iters, 1.0/3, delta=DELTA)
def test_import_name_integrity(self): "test that import names don't get overwritten" with open('a.gmr', 'w') as fd: fd.write('X import("b.gmr")\n') fd.write('B X.B\n') with open('b.gmr', 'w') as fd: fd.write('B "B"\n') with open('c.gmr', 'w') as fd: fd.write('C "C"\n') gmr = Grammar( 'A import("a.gmr")\n' 'X import("c.gmr")\n' 'root A.B X.C\n') self.assertEqual(gmr.generate(), "BC")