Beispiel #1
0
 def test_sxpr_roundtrip(self):
     sxpr = (
         '(BelegText (Anker "interdico_1") (BelegLemma "inter.|ticente") (TEXT ", (") '
         '(Anker "interdico_2") (BelegLemma "inter.|titente") (L " ") (Zusatz "var. l.") '
         '(TEXT ") Deo."))')
     tree = parse_sxpr(sxpr)
     assert flatten_sxpr(tree.as_sxpr()) == sxpr
Beispiel #2
0
 def test_as_etree(self):
     import xml.etree.ElementTree as ET
     # import lxml.etree as ET
     sxpr = '(R (A "1") (S (B `(class "bold") "2")) (C "3"))'
     xml = '<R><A>1</A><S><B class="bold">2</B></S><C>3</C></R>'
     node = parse_sxpr(sxpr)
     et = node.as_etree()
     assert ET.tostring(et, encoding="unicode") == xml, ET.tostring(
         et, encoding="unicode")
     node = Node.from_etree(et)
     assert node.as_sxpr() == sxpr
     et = ET.XML(
         '<R>mixed <A>1</A>mode <!-- comment --><B class="italic" /></R>')
     node = Node.from_etree(et)
     expected_sxpr = '(R (:Text "mixed ") (A "1") (:Text "mode ") (B `(class "italic")))'
     assert node.as_sxpr() == expected_sxpr
     et = node.as_etree()
     et = ET.XML(ET.tostring(et, encoding="unicode"))
     node = Node.from_etree(et)
     assert node.as_sxpr() == expected_sxpr
     empty_tags = set()
     tree = parse_xml('<a><b>1<c>2<d />3</c></b>4</a>',
                      out_empty_tags=empty_tags)
     etree = tree.as_etree(empty_tags=empty_tags)
     assert ET.tostring(etree).replace(
         b' /', b'/') == b'<a><b>1<c>2<d/>3</c></b>4</a>'
     tree = Node.from_etree(etree)
     assert flatten_sxpr(tree.as_sxpr()) == \
            '(a (b (:Text "1") (c (:Text "2") (d) (:Text "3"))) (:Text "4"))'
Beispiel #3
0
    def test_mock_syntax_tree(self):
        sexpr = '(a (b c) (d e) (f (g h)))'
        tree = parse_sxpr(sexpr)
        assert flatten_sxpr(tree.as_sxpr().replace('"', '')) == sexpr

        # test different quotation marks
        sexpr = '''(a (b """c""" 'k' "l") (d e) (f (g h)))'''
        sexpr_stripped = '(a (b c k l) (d e) (f (g h)))'
        tree = parse_sxpr(sexpr)
        assert flatten_sxpr(tree.as_sxpr().replace('"', '')) == sexpr_stripped

        sexpr_clean = '(a (b "c" "k" "l") (d "e") (f (g "h")))'
        tree = parse_sxpr(sexpr_clean)
        assert flatten_sxpr(tree.as_sxpr()) == sexpr_clean

        tree = parse_sxpr(sexpr_stripped)
        assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c k l") (d "e") (f (g "h")))'
Beispiel #4
0
 def test_parse_s_expression(self):
     tree = parse_sxpr('(a (b c))')
     assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c"))', flatten_sxpr(
         tree.as_sxpr())
     tree = parse_sxpr('(a i\nj\nk)')
     assert flatten_sxpr(tree.as_sxpr()) == '(a "i" "j" "k")', flatten_sxpr(
         tree.as_sxpr())
     try:
         tree = parse_sxpr('a b c')
         assert False, "parse_sxpr() should raise a ValueError " \
                       "if argument is not a tree!"
     except ValueError:
         pass
     try:
         tree = parse_sxpr('(a (b c)))')
         assert False, "parse_sxpr() should raise a ValueError for too many matching brackets."
     except ValueError:
         pass
Beispiel #5
0
 def test_apply_if(self):
     tree = parse_sxpr('(A (B 1) (C 1) (B 2))').with_pos(0)
     trans_table = {
         'B': [
             apply_if(
                 (change_tag_name('X'), add_attributes({'renamed': 'True'
                                                        })), is_one_of('B'))
         ]
     }
     traverse(tree, trans_table)
     assert flatten_sxpr(tree.as_sxpr(
     )) == '(A (X `(renamed "True") "1") (C "1") (X `(renamed "True") "2"))'
Beispiel #6
0
 def test_plaintext_handling(self):
     tree = parse_xml('<a>alpha <b>beta</b> gamma</a>')
     assert flatten_sxpr(tree.as_sxpr(
     )) == '(a (:Text "alpha ") (b "beta") (:Text " gamma"))'
     tree = parse_xml(' <a>  <b>beta</b>  </a> ')
     assert flatten_xml(tree.as_xml()) == \
            '<a><ANONYMOUS_Text__>  </ANONYMOUS_Text__><b>beta</b>' \
            '<ANONYMOUS_Text__>  </ANONYMOUS_Text__></a>'
     assert tree.as_xml(inline_tags={'a'},
                        string_tags={':Text'}) == '<a>  <b>beta</b>  </a>'
     tree = parse_xml(' <a>\n  <b>beta</b>\n</a> ')
     assert tree.as_xml(inline_tags={'a'}) == '<a><b>beta</b></a>'
Beispiel #7
0
 def test_parse_s_expression_w_attributes(self):
     s = '(A `(attr "1") (B "X"))'
     assert flatten_sxpr(
         parse_sxpr(s).as_sxpr()) == '(A `(attr "1") (B "X"))'
     s = """(BedeutungsPosition `(unterbedeutungstiefe "0")
              (Bedeutung
                (Beleg
                  (Quellenangabe (Quelle (Autor "LIUTPR.") (L " ") (Werk "leg.")) (L " ")
                    (BelegStelle (Stellenangabe (Stelle "21")) (L " ")
                      (BelegText (TEXT "...")))))))"""
     tree = parse_sxpr(s)
     assert str(tree) == "LIUTPR. leg. 21 ..."
     assert tree.attr['unterbedeutungstiefe'] == '0'
Beispiel #8
0
def grammar_unit(test_unit,
                 parser_factory,
                 transformer_factory,
                 report='REPORT',
                 verbose=False):
    """
    Unit tests for a grammar-parser and ast transformations.
    """
    output = []

    def write(s):
        nonlocal output
        """Append string `s` to output. The purpose is to defer printing to
        stdout in order to avoid muddled output when several unit tests run
        at the same time."""
        output.append(s)

    def clean_key(k):
        try:
            return k.replace('*', '')
        except AttributeError:
            return k

    def get(tests, category, key) -> str:
        try:
            value = tests[category][key] if key in tests[category] \
                else tests[category][clean_key(key)]
        except KeyError:
            return ''
            # raise AssertionError('%s-test %s for parser %s missing !?'
            #                      % (category, test_name, parser_name))
        return value

    if isinstance(test_unit, str):
        _, unit_name = os.path.split(os.path.splitext(test_unit)[0])
        test_unit = unit_from_file(test_unit)
    else:
        unit_name = 'unit_test_' + str(id(test_unit))
    if verbose:
        write("\nGRAMMAR TEST UNIT: " + unit_name)
    errata = []
    parser = parser_factory()
    transform = transformer_factory()

    def has_lookahead(parser_name: str) -> bool:
        """Returns True if the parser or any of its descendant parsers is a
        Lookahead parser."""
        return parser[parser_name].apply(
            lambda ctx: isinstance(ctx[-1], Lookahead))
        # lookahead_found = False
        #
        # def find_lookahead(p: Parser):
        #     nonlocal lookahead_found
        #     if not lookahead_found:
        #         lookahead_found = isinstance(p, Lookahead)
        #
        # parser[parser_name].apply(find_lookahead)
        # return lookahead_found

    def lookahead_artifact(syntax_tree: Node):
        """
        Returns True, if the error merely occurred, because the parser
        stopped in front of a sequence that was captured by a lookahead
        operator or if a mandatory lookahead failed at the end of data.
        This is required for testing of parsers that put a lookahead
        operator at the end. See test_testing.TestLookahead.
        """
        if not get_config_value('test_suppress_lookahead_failures'):
            return False
        raw_errors = cast(RootNode, syntax_tree).errors_sorted
        is_artifact = (
            {e.code
             for e in raw_errors} <= {
                 PARSER_LOOKAHEAD_FAILURE_ONLY,
                 AUTORETRIEVED_SYMBOL_NOT_CLEARED, PARSER_LOOKAHEAD_MATCH_ONLY
             } or (len(raw_errors) == 1 and
                   (raw_errors[-1].code == PARSER_LOOKAHEAD_MATCH_ONLY
                    #  case 2:  mandatory lookahead failure at end of text
                    or raw_errors[-1].code == MANDATORY_CONTINUATION_AT_EOF)))
        if is_artifact:
            # don't remove zombie node with error message at the end
            # but change it's tag_name to indicate that it is an artifact!
            for parent in syntax_tree.select_if(
                    lambda node: any(child.tag_name == ZOMBIE_TAG
                                     for child in node.children),
                    include_root=True,
                    reverse=True):
                zombie = parent.pick_child(ZOMBIE_TAG)
                zombie.tag_name = TEST_ARTIFACT
                zombie.result = 'Artifact can be ignored. Be aware, though, that also the ' \
                                'tree structure may not be the same as in a non-testing ' \
                                'environment, when a testing artifact has occurred!'
                # parent.result = tuple(c for c in parent.children if c.tag_name != ZOMBIE_TAG)
                break
        return is_artifact

    for parser_name, tests in test_unit.items():
        # if not get_config_value('test_parallelization'):
        #     print('  Testing parser: ' + parser_name)

        track_history = get_config_value('history_tracking')
        try:
            if has_lookahead(parser_name):
                set_tracer(all_descendants(parser[parser_name]), trace_history)
                track_history = True
        except AttributeError:
            pass

        assert parser_name, "Missing parser name in test %s!" % unit_name
        assert not any(test_type in RESULT_STAGES for test_type in tests), \
            ("Test %s in %s already has results. Use reset_unit() before running again!"
             % (parser_name, unit_name))
        assert set(tests.keys()).issubset(UNIT_STAGES), \
            'Unknown test-types: %s ! Must be one of %s' \
            % (set(tests.keys()) - UNIT_STAGES, UNIT_STAGES)
        if verbose:
            write('  Match-Tests for parser "' + parser_name + '"')
        match_tests = set(tests['match'].keys()) if 'match' in tests else set()
        if 'ast' in tests:
            ast_tests = set(tests['ast'].keys())
            if not {clean_key(k)
                    for k in ast_tests} <= {clean_key(k)
                                            for k in match_tests}:
                raise AssertionError(
                    'AST-Tests %s for parser %s lack corresponding match-tests!'
                    % (str(ast_tests - match_tests), parser_name))
        if 'cst' in tests:
            cst_tests = set(tests['cst'].keys())
            if not {clean_key(k)
                    for k in cst_tests} <= {clean_key(k)
                                            for k in match_tests}:
                raise AssertionError(
                    'CST-Tests %s lack corresponding match-tests!' %
                    str(cst_tests - match_tests))

        # run match tests

        for test_name, test_code in tests.get('match', dict()).items():
            # if not get_config_value('test_parallelization'):
            #     print('    Test: ' + str(test_name))

            errflag = len(errata)
            try:
                cst = parser(test_code, parser_name)
            except AttributeError as upe:
                cst = RootNode()
                cst = cst.new_error(Node(ZOMBIE_TAG, "").with_pos(0), str(upe))
            clean_test_name = str(test_name).replace('*', '')
            tests.setdefault('__cst__', {})[test_name] = cst
            errors = []  # type: List[Error]
            if is_error(cst.error_flag) and not lookahead_artifact(cst):
                errors = [
                    e for e in cst.errors_sorted
                    if e.code not in POSSIBLE_ARTIFACTS
                ]
                errata.append(
                    'Match test "%s" for parser "%s" failed:'
                    '\nExpr.:  %s\n\n%s\n\n' %
                    (test_name, parser_name, md_codeblock(test_code),
                     '\n'.join(str(m).replace('\n', '\n') for m in errors)))
            if "ast" in tests or report:
                ast = copy.deepcopy(cst)
                old_errors = set(ast.errors)
                traverse(ast, {'*': remove_children({TEST_ARTIFACT})})
                try:
                    transform(ast)
                except AssertionError as e:
                    e.args = ('Test %s of parser %s failed, because:\n%s' %
                              (test_name, parser_name, e.args[0]), )
                    raise e
                tests.setdefault('__ast__', {})[test_name] = ast
                ast_errors = [e for e in ast.errors if e not in old_errors]
                ast_errors.sort(key=lambda e: e.pos)
                if is_error(
                        max(e.code for e in ast_errors) if ast_errors else 0):
                    if ast_errors:
                        if errata: errata[-1] = errata[-1].rstrip('\n')
                        ast_errors.append('\n')
                        errata.append('\t' + '\n\t'.join(
                            str(msg).replace('\n', '\n\t\t')
                            for msg in ast_errors))

            if verbose:
                infostr = '    match-test "' + test_name + '" ... '
                write(infostr + ("OK" if len(errata) == errflag else "FAIL"))

            if "cst" in tests and len(errata) == errflag:
                try:
                    compare = parse_tree(get(tests, "cst", test_name))
                except ValueError as e:
                    raise SyntaxError(
                        'CST-TEST "%s" of parser "%s" failed with:\n%s' %
                        (test_name, parser_name, str(e)))
                if compare:
                    if not compare.equals(cst):
                        errata.append(
                            'Concrete syntax tree test "%s" for parser "%s" failed:\n%s'
                            % (test_name, parser_name, cst.serialize('cst')))
                    if verbose:
                        infostr = '      cst-test "' + test_name + '" ... '
                        write(infostr +
                              ("OK" if len(errata) == errflag else "FAIL"))

            if "ast" in tests and len(errata) == errflag:
                try:
                    compare = parse_tree(get(tests, "ast", test_name))
                except ValueError as e:
                    raise SyntaxError(
                        'AST-TEST "%s" of parser "%s" failed with:\n%s' %
                        (test_name, parser_name, str(e)))
                if compare:
                    traverse(compare, {'*': remove_children({TEST_ARTIFACT})})
                    if not compare.equals(
                            ast):  # no worry: ast is defined if "ast" in tests
                        ast_str = flatten_sxpr(ast.as_sxpr())
                        compare_str = flatten_sxpr(compare.as_sxpr())
                        # differ = difflib.Differ()
                        # difference = ''.join(differ.compare([compare_str + '\n'], [ast_str + '\n']))
                        errata.append(
                            'Abstract syntax tree test "%s" for parser "%s" failed:'
                            '\n\tExpr.:     %s\n\tExpected:  %s\n\tReceived:  %s'
                            % (test_name, parser_name, '\n\t'.join(
                                test_code.split('\n')), compare_str, ast_str))
                    if verbose:
                        infostr = '      ast-test "' + test_name + '" ... '
                        write(infostr +
                              ("OK" if len(errata) == errflag else "FAIL"))

            if len(errata) > errflag:
                tests.setdefault('__err__', {})[test_name] = errata[-1]
                # write parsing-history log only in case of failure!
                if is_logging() and track_history:
                    with local_log_dir('./LOGS'):
                        log_parsing_history(
                            parser,
                            "match_%s_%s.log" % (parser_name, clean_test_name))

        if verbose and 'fail' in tests:
            write('  Fail-Tests for parser "' + parser_name + '"')

        # run fail tests

        for test_name, test_code in tests.get('fail', dict()).items():
            errflag = len(errata)
            try:
                cst = parser(test_code, parser_name)
            except AttributeError as upe:
                node = Node(ZOMBIE_TAG, "").with_pos(0)
                cst = RootNode(node).new_error(node, str(upe))
                errata.append('Unknown parser "{}" in fail test "{}"!'.format(
                    parser_name, test_name))
                tests.setdefault('__err__', {})[test_name] = errata[-1]
            if "ast" in tests or report:
                traverse(cst, {'*': remove_children({TEST_ARTIFACT})})
                transform(cst)
            if not (is_error(cst.error_flag) and not lookahead_artifact(cst)):
                errata.append(
                    'Fail test "%s" for parser "%s" yields match instead of '
                    'expected failure!\n' % (test_name, parser_name))
                tests.setdefault('__err__', {})[test_name] = errata[-1]
                # write parsing-history log only in case of test-failure
                if is_logging() and track_history:
                    with local_log_dir('./LOGS'):
                        log_parsing_history(
                            parser,
                            "fail_%s_%s.log" % (parser_name, test_name))
            if cst.error_flag:
                tests.setdefault('__msg__', {})[test_name] = \
                    "\n".join(str(e) for e in cst.errors_sorted)
            if verbose:
                infostr = '    fail-test  "' + test_name + '" ... '
                write(infostr + ("OK" if len(errata) == errflag else "FAIL"))

    # remove tracers, in case there are any:
    set_tracer(all_descendants(parser.root_parser__), None)

    # write test-report
    if report:
        test_report = get_report(test_unit)
        if test_report:
            try:
                os.mkdir(report)  # is a process-Lock needed, here?
            except FileExistsError:
                pass
            with open(os.path.join(report, unit_name + '.md'),
                      'w',
                      encoding='utf8') as f:
                f.write(test_report)
                f.flush()

    print('\n'.join(output))
    return errata
Beispiel #9
0
 def test_compact_sexpr(self):
     assert flatten_sxpr("(a\n    (b\n        c\n    )\n)\n") == "(a (b c))"
Beispiel #10
0
 def test_flatten_sxpr(self):
     tree = parse_sxpr('(a (b "  ") (d (e f) (h i)))')
     sxpr = tree.as_sxpr()
     flat = flatten_sxpr(sxpr)
     assert flat == '(a (b "  ") (d (e "f") (h "i")))'