def test_sxpr_roundtrip(self): sxpr = ( '(BelegText (Anker "interdico_1") (BelegLemma "inter.|ticente") (TEXT ", (") ' '(Anker "interdico_2") (BelegLemma "inter.|titente") (L " ") (Zusatz "var. l.") ' '(TEXT ") Deo."))') tree = parse_sxpr(sxpr) assert flatten_sxpr(tree.as_sxpr()) == sxpr
def test_as_etree(self): import xml.etree.ElementTree as ET # import lxml.etree as ET sxpr = '(R (A "1") (S (B `(class "bold") "2")) (C "3"))' xml = '<R><A>1</A><S><B class="bold">2</B></S><C>3</C></R>' node = parse_sxpr(sxpr) et = node.as_etree() assert ET.tostring(et, encoding="unicode") == xml, ET.tostring( et, encoding="unicode") node = Node.from_etree(et) assert node.as_sxpr() == sxpr et = ET.XML( '<R>mixed <A>1</A>mode <!-- comment --><B class="italic" /></R>') node = Node.from_etree(et) expected_sxpr = '(R (:Text "mixed ") (A "1") (:Text "mode ") (B `(class "italic")))' assert node.as_sxpr() == expected_sxpr et = node.as_etree() et = ET.XML(ET.tostring(et, encoding="unicode")) node = Node.from_etree(et) assert node.as_sxpr() == expected_sxpr empty_tags = set() tree = parse_xml('<a><b>1<c>2<d />3</c></b>4</a>', out_empty_tags=empty_tags) etree = tree.as_etree(empty_tags=empty_tags) assert ET.tostring(etree).replace( b' /', b'/') == b'<a><b>1<c>2<d/>3</c></b>4</a>' tree = Node.from_etree(etree) assert flatten_sxpr(tree.as_sxpr()) == \ '(a (b (:Text "1") (c (:Text "2") (d) (:Text "3"))) (:Text "4"))'
def test_mock_syntax_tree(self): sexpr = '(a (b c) (d e) (f (g h)))' tree = parse_sxpr(sexpr) assert flatten_sxpr(tree.as_sxpr().replace('"', '')) == sexpr # test different quotation marks sexpr = '''(a (b """c""" 'k' "l") (d e) (f (g h)))''' sexpr_stripped = '(a (b c k l) (d e) (f (g h)))' tree = parse_sxpr(sexpr) assert flatten_sxpr(tree.as_sxpr().replace('"', '')) == sexpr_stripped sexpr_clean = '(a (b "c" "k" "l") (d "e") (f (g "h")))' tree = parse_sxpr(sexpr_clean) assert flatten_sxpr(tree.as_sxpr()) == sexpr_clean tree = parse_sxpr(sexpr_stripped) assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c k l") (d "e") (f (g "h")))'
def test_parse_s_expression(self): tree = parse_sxpr('(a (b c))') assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c"))', flatten_sxpr( tree.as_sxpr()) tree = parse_sxpr('(a i\nj\nk)') assert flatten_sxpr(tree.as_sxpr()) == '(a "i" "j" "k")', flatten_sxpr( tree.as_sxpr()) try: tree = parse_sxpr('a b c') assert False, "parse_sxpr() should raise a ValueError " \ "if argument is not a tree!" except ValueError: pass try: tree = parse_sxpr('(a (b c)))') assert False, "parse_sxpr() should raise a ValueError for too many matching brackets." except ValueError: pass
def test_apply_if(self): tree = parse_sxpr('(A (B 1) (C 1) (B 2))').with_pos(0) trans_table = { 'B': [ apply_if( (change_tag_name('X'), add_attributes({'renamed': 'True' })), is_one_of('B')) ] } traverse(tree, trans_table) assert flatten_sxpr(tree.as_sxpr( )) == '(A (X `(renamed "True") "1") (C "1") (X `(renamed "True") "2"))'
def test_plaintext_handling(self): tree = parse_xml('<a>alpha <b>beta</b> gamma</a>') assert flatten_sxpr(tree.as_sxpr( )) == '(a (:Text "alpha ") (b "beta") (:Text " gamma"))' tree = parse_xml(' <a> <b>beta</b> </a> ') assert flatten_xml(tree.as_xml()) == \ '<a><ANONYMOUS_Text__> </ANONYMOUS_Text__><b>beta</b>' \ '<ANONYMOUS_Text__> </ANONYMOUS_Text__></a>' assert tree.as_xml(inline_tags={'a'}, string_tags={':Text'}) == '<a> <b>beta</b> </a>' tree = parse_xml(' <a>\n <b>beta</b>\n</a> ') assert tree.as_xml(inline_tags={'a'}) == '<a><b>beta</b></a>'
def test_parse_s_expression_w_attributes(self): s = '(A `(attr "1") (B "X"))' assert flatten_sxpr( parse_sxpr(s).as_sxpr()) == '(A `(attr "1") (B "X"))' s = """(BedeutungsPosition `(unterbedeutungstiefe "0") (Bedeutung (Beleg (Quellenangabe (Quelle (Autor "LIUTPR.") (L " ") (Werk "leg.")) (L " ") (BelegStelle (Stellenangabe (Stelle "21")) (L " ") (BelegText (TEXT "...")))))))""" tree = parse_sxpr(s) assert str(tree) == "LIUTPR. leg. 21 ..." assert tree.attr['unterbedeutungstiefe'] == '0'
def grammar_unit(test_unit, parser_factory, transformer_factory, report='REPORT', verbose=False): """ Unit tests for a grammar-parser and ast transformations. """ output = [] def write(s): nonlocal output """Append string `s` to output. The purpose is to defer printing to stdout in order to avoid muddled output when several unit tests run at the same time.""" output.append(s) def clean_key(k): try: return k.replace('*', '') except AttributeError: return k def get(tests, category, key) -> str: try: value = tests[category][key] if key in tests[category] \ else tests[category][clean_key(key)] except KeyError: return '' # raise AssertionError('%s-test %s for parser %s missing !?' # % (category, test_name, parser_name)) return value if isinstance(test_unit, str): _, unit_name = os.path.split(os.path.splitext(test_unit)[0]) test_unit = unit_from_file(test_unit) else: unit_name = 'unit_test_' + str(id(test_unit)) if verbose: write("\nGRAMMAR TEST UNIT: " + unit_name) errata = [] parser = parser_factory() transform = transformer_factory() def has_lookahead(parser_name: str) -> bool: """Returns True if the parser or any of its descendant parsers is a Lookahead parser.""" return parser[parser_name].apply( lambda ctx: isinstance(ctx[-1], Lookahead)) # lookahead_found = False # # def find_lookahead(p: Parser): # nonlocal lookahead_found # if not lookahead_found: # lookahead_found = isinstance(p, Lookahead) # # parser[parser_name].apply(find_lookahead) # return lookahead_found def lookahead_artifact(syntax_tree: Node): """ Returns True, if the error merely occurred, because the parser stopped in front of a sequence that was captured by a lookahead operator or if a mandatory lookahead failed at the end of data. This is required for testing of parsers that put a lookahead operator at the end. See test_testing.TestLookahead. """ if not get_config_value('test_suppress_lookahead_failures'): return False raw_errors = cast(RootNode, syntax_tree).errors_sorted is_artifact = ( {e.code for e in raw_errors} <= { PARSER_LOOKAHEAD_FAILURE_ONLY, AUTORETRIEVED_SYMBOL_NOT_CLEARED, PARSER_LOOKAHEAD_MATCH_ONLY } or (len(raw_errors) == 1 and (raw_errors[-1].code == PARSER_LOOKAHEAD_MATCH_ONLY # case 2: mandatory lookahead failure at end of text or raw_errors[-1].code == MANDATORY_CONTINUATION_AT_EOF))) if is_artifact: # don't remove zombie node with error message at the end # but change it's tag_name to indicate that it is an artifact! for parent in syntax_tree.select_if( lambda node: any(child.tag_name == ZOMBIE_TAG for child in node.children), include_root=True, reverse=True): zombie = parent.pick_child(ZOMBIE_TAG) zombie.tag_name = TEST_ARTIFACT zombie.result = 'Artifact can be ignored. Be aware, though, that also the ' \ 'tree structure may not be the same as in a non-testing ' \ 'environment, when a testing artifact has occurred!' # parent.result = tuple(c for c in parent.children if c.tag_name != ZOMBIE_TAG) break return is_artifact for parser_name, tests in test_unit.items(): # if not get_config_value('test_parallelization'): # print(' Testing parser: ' + parser_name) track_history = get_config_value('history_tracking') try: if has_lookahead(parser_name): set_tracer(all_descendants(parser[parser_name]), trace_history) track_history = True except AttributeError: pass assert parser_name, "Missing parser name in test %s!" % unit_name assert not any(test_type in RESULT_STAGES for test_type in tests), \ ("Test %s in %s already has results. Use reset_unit() before running again!" % (parser_name, unit_name)) assert set(tests.keys()).issubset(UNIT_STAGES), \ 'Unknown test-types: %s ! Must be one of %s' \ % (set(tests.keys()) - UNIT_STAGES, UNIT_STAGES) if verbose: write(' Match-Tests for parser "' + parser_name + '"') match_tests = set(tests['match'].keys()) if 'match' in tests else set() if 'ast' in tests: ast_tests = set(tests['ast'].keys()) if not {clean_key(k) for k in ast_tests} <= {clean_key(k) for k in match_tests}: raise AssertionError( 'AST-Tests %s for parser %s lack corresponding match-tests!' % (str(ast_tests - match_tests), parser_name)) if 'cst' in tests: cst_tests = set(tests['cst'].keys()) if not {clean_key(k) for k in cst_tests} <= {clean_key(k) for k in match_tests}: raise AssertionError( 'CST-Tests %s lack corresponding match-tests!' % str(cst_tests - match_tests)) # run match tests for test_name, test_code in tests.get('match', dict()).items(): # if not get_config_value('test_parallelization'): # print(' Test: ' + str(test_name)) errflag = len(errata) try: cst = parser(test_code, parser_name) except AttributeError as upe: cst = RootNode() cst = cst.new_error(Node(ZOMBIE_TAG, "").with_pos(0), str(upe)) clean_test_name = str(test_name).replace('*', '') tests.setdefault('__cst__', {})[test_name] = cst errors = [] # type: List[Error] if is_error(cst.error_flag) and not lookahead_artifact(cst): errors = [ e for e in cst.errors_sorted if e.code not in POSSIBLE_ARTIFACTS ] errata.append( 'Match test "%s" for parser "%s" failed:' '\nExpr.: %s\n\n%s\n\n' % (test_name, parser_name, md_codeblock(test_code), '\n'.join(str(m).replace('\n', '\n') for m in errors))) if "ast" in tests or report: ast = copy.deepcopy(cst) old_errors = set(ast.errors) traverse(ast, {'*': remove_children({TEST_ARTIFACT})}) try: transform(ast) except AssertionError as e: e.args = ('Test %s of parser %s failed, because:\n%s' % (test_name, parser_name, e.args[0]), ) raise e tests.setdefault('__ast__', {})[test_name] = ast ast_errors = [e for e in ast.errors if e not in old_errors] ast_errors.sort(key=lambda e: e.pos) if is_error( max(e.code for e in ast_errors) if ast_errors else 0): if ast_errors: if errata: errata[-1] = errata[-1].rstrip('\n') ast_errors.append('\n') errata.append('\t' + '\n\t'.join( str(msg).replace('\n', '\n\t\t') for msg in ast_errors)) if verbose: infostr = ' match-test "' + test_name + '" ... ' write(infostr + ("OK" if len(errata) == errflag else "FAIL")) if "cst" in tests and len(errata) == errflag: try: compare = parse_tree(get(tests, "cst", test_name)) except ValueError as e: raise SyntaxError( 'CST-TEST "%s" of parser "%s" failed with:\n%s' % (test_name, parser_name, str(e))) if compare: if not compare.equals(cst): errata.append( 'Concrete syntax tree test "%s" for parser "%s" failed:\n%s' % (test_name, parser_name, cst.serialize('cst'))) if verbose: infostr = ' cst-test "' + test_name + '" ... ' write(infostr + ("OK" if len(errata) == errflag else "FAIL")) if "ast" in tests and len(errata) == errflag: try: compare = parse_tree(get(tests, "ast", test_name)) except ValueError as e: raise SyntaxError( 'AST-TEST "%s" of parser "%s" failed with:\n%s' % (test_name, parser_name, str(e))) if compare: traverse(compare, {'*': remove_children({TEST_ARTIFACT})}) if not compare.equals( ast): # no worry: ast is defined if "ast" in tests ast_str = flatten_sxpr(ast.as_sxpr()) compare_str = flatten_sxpr(compare.as_sxpr()) # differ = difflib.Differ() # difference = ''.join(differ.compare([compare_str + '\n'], [ast_str + '\n'])) errata.append( 'Abstract syntax tree test "%s" for parser "%s" failed:' '\n\tExpr.: %s\n\tExpected: %s\n\tReceived: %s' % (test_name, parser_name, '\n\t'.join( test_code.split('\n')), compare_str, ast_str)) if verbose: infostr = ' ast-test "' + test_name + '" ... ' write(infostr + ("OK" if len(errata) == errflag else "FAIL")) if len(errata) > errflag: tests.setdefault('__err__', {})[test_name] = errata[-1] # write parsing-history log only in case of failure! if is_logging() and track_history: with local_log_dir('./LOGS'): log_parsing_history( parser, "match_%s_%s.log" % (parser_name, clean_test_name)) if verbose and 'fail' in tests: write(' Fail-Tests for parser "' + parser_name + '"') # run fail tests for test_name, test_code in tests.get('fail', dict()).items(): errflag = len(errata) try: cst = parser(test_code, parser_name) except AttributeError as upe: node = Node(ZOMBIE_TAG, "").with_pos(0) cst = RootNode(node).new_error(node, str(upe)) errata.append('Unknown parser "{}" in fail test "{}"!'.format( parser_name, test_name)) tests.setdefault('__err__', {})[test_name] = errata[-1] if "ast" in tests or report: traverse(cst, {'*': remove_children({TEST_ARTIFACT})}) transform(cst) if not (is_error(cst.error_flag) and not lookahead_artifact(cst)): errata.append( 'Fail test "%s" for parser "%s" yields match instead of ' 'expected failure!\n' % (test_name, parser_name)) tests.setdefault('__err__', {})[test_name] = errata[-1] # write parsing-history log only in case of test-failure if is_logging() and track_history: with local_log_dir('./LOGS'): log_parsing_history( parser, "fail_%s_%s.log" % (parser_name, test_name)) if cst.error_flag: tests.setdefault('__msg__', {})[test_name] = \ "\n".join(str(e) for e in cst.errors_sorted) if verbose: infostr = ' fail-test "' + test_name + '" ... ' write(infostr + ("OK" if len(errata) == errflag else "FAIL")) # remove tracers, in case there are any: set_tracer(all_descendants(parser.root_parser__), None) # write test-report if report: test_report = get_report(test_unit) if test_report: try: os.mkdir(report) # is a process-Lock needed, here? except FileExistsError: pass with open(os.path.join(report, unit_name + '.md'), 'w', encoding='utf8') as f: f.write(test_report) f.flush() print('\n'.join(output)) return errata
def test_compact_sexpr(self): assert flatten_sxpr("(a\n (b\n c\n )\n)\n") == "(a (b c))"
def test_flatten_sxpr(self): tree = parse_sxpr('(a (b " ") (d (e f) (h i)))') sxpr = tree.as_sxpr() flat = flatten_sxpr(sxpr) assert flat == '(a (b " ") (d (e "f") (h "i")))'