Esempio n. 1
0
def process_tree(tp: TreeProcessor,
                 tree: RootNode) -> Tuple[RootNode, List[Error]]:
    """Process a tree with the tree-processor `tp` only if no fatal error
    has occurred so far. Catch any Python-exceptions in case
    any normal errors have occurred earlier in the processing pipeline.
    Don't catch Python-exceptions if no errors have occurred earlier.

    This behaviour is based on the assumption that given any non-fatal
    errors have occurred earlier, the tree passed through the pipeline
    might not be in a state that is expected by the later stages, thus if
    an exception occurs it is not really to be considered a programming
    error. Processing stages should be written with possible errors
    occurring in earlier stages in mind, though. However, because it could
    be difficult to provide for all possible kinds of badly structured
    trees resulting from errors, exceptions occurring when processing
    potentially faulty trees will be dealt with gracefully.

    Although process_tree returns the root-node of the processed tree,
    tree processing should generally be assumed to change the tree
    in place. If the input tree shall be preserved, it is necessary to
    make a deep copy of the input tree, before calling process_tree.
    """
    assert isinstance(tp, TreeProcessor)
    if not is_fatal(tree.error_flag):
        if is_error(tree.error_flag):
            # assume Python crashes are merely a consequence of earlier
            # errors, so let's catch them
            try:
                tree = tp(tree)
            except Exception as e:
                node = tp.context[-1] if tp.context else tree
                st = traceback.format_list(
                    traceback.extract_tb(e.__traceback__))
                trace = ''.join(filter_stacktrace(st))
                tree.new_error(
                    node,
                    "Tree-processing failed, most likely, due to errors earlier in "
                    "in the processing pipeline. Crash Message: %s: %s\n%s" %
                    (e.__class__.__name__, str(e), trace),
                    TREE_PROCESSING_CRASH)
        else:
            # assume Python crashes are programming mistakes, so let
            # the exceptions through
            tree = tp(tree)
        assert isinstance(tree, RootNode)

    messages = tree.errors_sorted  # type: List[Error]
    new_msgs = [msg for msg in messages if msg.line < 0]
    # Obsolete, because RootNode adjusts error locations whenever an error is added:
    # adjust_error_locations(new_msgs, tree.source, tree.source_mapping)
    return tree, messages
Esempio n. 2
0
 def test_error_handling(self):
     tree = parse_sxpr('(A (B D) (C E))')
     tree.with_pos(0)
     root = RootNode()
     root.new_error(tree.children[1], "error C")
     root.new_error(tree.children[0], "error B")
     root.swallow(tree)
     assert root.error_flag
     errors = root.errors_sorted
     assert root.error_flag
     # assert errors == root.errors(True)
     # assert not root.error_flag and not root.errors()
     error_str = "\n".join(str(e) for e in errors)
     assert error_str.find("A") < error_str.find("B")
Esempio n. 3
0
def grammar_unit(test_unit,
                 parser_factory,
                 transformer_factory,
                 report='REPORT',
                 verbose=False):
    """
    Unit tests for a grammar-parser and ast transformations.
    """
    output = []

    def write(s):
        nonlocal output
        """Append string `s` to output. The purpose is to defer printing to
        stdout in order to avoid muddled output when several unit tests run
        at the same time."""
        output.append(s)

    def clean_key(k):
        try:
            return k.replace('*', '')
        except AttributeError:
            return k

    def get(tests, category, key) -> str:
        try:
            value = tests[category][key] if key in tests[category] \
                else tests[category][clean_key(key)]
        except KeyError:
            return ''
            # raise AssertionError('%s-test %s for parser %s missing !?'
            #                      % (category, test_name, parser_name))
        return value

    if isinstance(test_unit, str):
        _, unit_name = os.path.split(os.path.splitext(test_unit)[0])
        test_unit = unit_from_file(test_unit)
    else:
        unit_name = 'unit_test_' + str(id(test_unit))
    if verbose:
        write("\nGRAMMAR TEST UNIT: " + unit_name)
    errata = []
    parser = parser_factory()
    transform = transformer_factory()

    def has_lookahead(parser_name: str) -> bool:
        """Returns True if the parser or any of its descendant parsers is a
        Lookahead parser."""
        return parser[parser_name].apply(
            lambda ctx: isinstance(ctx[-1], Lookahead))
        # lookahead_found = False
        #
        # def find_lookahead(p: Parser):
        #     nonlocal lookahead_found
        #     if not lookahead_found:
        #         lookahead_found = isinstance(p, Lookahead)
        #
        # parser[parser_name].apply(find_lookahead)
        # return lookahead_found

    def lookahead_artifact(syntax_tree: Node):
        """
        Returns True, if the error merely occurred, because the parser
        stopped in front of a sequence that was captured by a lookahead
        operator or if a mandatory lookahead failed at the end of data.
        This is required for testing of parsers that put a lookahead
        operator at the end. See test_testing.TestLookahead.
        """
        if not get_config_value('test_suppress_lookahead_failures'):
            return False
        raw_errors = cast(RootNode, syntax_tree).errors_sorted
        is_artifact = (
            {e.code
             for e in raw_errors} <= {
                 PARSER_LOOKAHEAD_FAILURE_ONLY,
                 AUTORETRIEVED_SYMBOL_NOT_CLEARED, PARSER_LOOKAHEAD_MATCH_ONLY
             } or (len(raw_errors) == 1 and
                   (raw_errors[-1].code == PARSER_LOOKAHEAD_MATCH_ONLY
                    #  case 2:  mandatory lookahead failure at end of text
                    or raw_errors[-1].code == MANDATORY_CONTINUATION_AT_EOF)))
        if is_artifact:
            # don't remove zombie node with error message at the end
            # but change it's tag_name to indicate that it is an artifact!
            for parent in syntax_tree.select_if(
                    lambda node: any(child.tag_name == ZOMBIE_TAG
                                     for child in node.children),
                    include_root=True,
                    reverse=True):
                zombie = parent.pick_child(ZOMBIE_TAG)
                zombie.tag_name = TEST_ARTIFACT
                zombie.result = 'Artifact can be ignored. Be aware, though, that also the ' \
                                'tree structure may not be the same as in a non-testing ' \
                                'environment, when a testing artifact has occurred!'
                # parent.result = tuple(c for c in parent.children if c.tag_name != ZOMBIE_TAG)
                break
        return is_artifact

    for parser_name, tests in test_unit.items():
        # if not get_config_value('test_parallelization'):
        #     print('  Testing parser: ' + parser_name)

        track_history = get_config_value('history_tracking')
        try:
            if has_lookahead(parser_name):
                set_tracer(all_descendants(parser[parser_name]), trace_history)
                track_history = True
        except AttributeError:
            pass

        assert parser_name, "Missing parser name in test %s!" % unit_name
        assert not any(test_type in RESULT_STAGES for test_type in tests), \
            ("Test %s in %s already has results. Use reset_unit() before running again!"
             % (parser_name, unit_name))
        assert set(tests.keys()).issubset(UNIT_STAGES), \
            'Unknown test-types: %s ! Must be one of %s' \
            % (set(tests.keys()) - UNIT_STAGES, UNIT_STAGES)
        if verbose:
            write('  Match-Tests for parser "' + parser_name + '"')
        match_tests = set(tests['match'].keys()) if 'match' in tests else set()
        if 'ast' in tests:
            ast_tests = set(tests['ast'].keys())
            if not {clean_key(k)
                    for k in ast_tests} <= {clean_key(k)
                                            for k in match_tests}:
                raise AssertionError(
                    'AST-Tests %s for parser %s lack corresponding match-tests!'
                    % (str(ast_tests - match_tests), parser_name))
        if 'cst' in tests:
            cst_tests = set(tests['cst'].keys())
            if not {clean_key(k)
                    for k in cst_tests} <= {clean_key(k)
                                            for k in match_tests}:
                raise AssertionError(
                    'CST-Tests %s lack corresponding match-tests!' %
                    str(cst_tests - match_tests))

        # run match tests

        for test_name, test_code in tests.get('match', dict()).items():
            # if not get_config_value('test_parallelization'):
            #     print('    Test: ' + str(test_name))

            errflag = len(errata)
            try:
                cst = parser(test_code, parser_name)
            except AttributeError as upe:
                cst = RootNode()
                cst = cst.new_error(Node(ZOMBIE_TAG, "").with_pos(0), str(upe))
            clean_test_name = str(test_name).replace('*', '')
            tests.setdefault('__cst__', {})[test_name] = cst
            errors = []  # type: List[Error]
            if is_error(cst.error_flag) and not lookahead_artifact(cst):
                errors = [
                    e for e in cst.errors_sorted
                    if e.code not in POSSIBLE_ARTIFACTS
                ]
                errata.append(
                    'Match test "%s" for parser "%s" failed:'
                    '\nExpr.:  %s\n\n%s\n\n' %
                    (test_name, parser_name, md_codeblock(test_code),
                     '\n'.join(str(m).replace('\n', '\n') for m in errors)))
            if "ast" in tests or report:
                ast = copy.deepcopy(cst)
                old_errors = set(ast.errors)
                traverse(ast, {'*': remove_children({TEST_ARTIFACT})})
                try:
                    transform(ast)
                except AssertionError as e:
                    e.args = ('Test %s of parser %s failed, because:\n%s' %
                              (test_name, parser_name, e.args[0]), )
                    raise e
                tests.setdefault('__ast__', {})[test_name] = ast
                ast_errors = [e for e in ast.errors if e not in old_errors]
                ast_errors.sort(key=lambda e: e.pos)
                if is_error(
                        max(e.code for e in ast_errors) if ast_errors else 0):
                    if ast_errors:
                        if errata: errata[-1] = errata[-1].rstrip('\n')
                        ast_errors.append('\n')
                        errata.append('\t' + '\n\t'.join(
                            str(msg).replace('\n', '\n\t\t')
                            for msg in ast_errors))

            if verbose:
                infostr = '    match-test "' + test_name + '" ... '
                write(infostr + ("OK" if len(errata) == errflag else "FAIL"))

            if "cst" in tests and len(errata) == errflag:
                try:
                    compare = parse_tree(get(tests, "cst", test_name))
                except ValueError as e:
                    raise SyntaxError(
                        'CST-TEST "%s" of parser "%s" failed with:\n%s' %
                        (test_name, parser_name, str(e)))
                if compare:
                    if not compare.equals(cst):
                        errata.append(
                            'Concrete syntax tree test "%s" for parser "%s" failed:\n%s'
                            % (test_name, parser_name, cst.serialize('cst')))
                    if verbose:
                        infostr = '      cst-test "' + test_name + '" ... '
                        write(infostr +
                              ("OK" if len(errata) == errflag else "FAIL"))

            if "ast" in tests and len(errata) == errflag:
                try:
                    compare = parse_tree(get(tests, "ast", test_name))
                except ValueError as e:
                    raise SyntaxError(
                        'AST-TEST "%s" of parser "%s" failed with:\n%s' %
                        (test_name, parser_name, str(e)))
                if compare:
                    traverse(compare, {'*': remove_children({TEST_ARTIFACT})})
                    if not compare.equals(
                            ast):  # no worry: ast is defined if "ast" in tests
                        ast_str = flatten_sxpr(ast.as_sxpr())
                        compare_str = flatten_sxpr(compare.as_sxpr())
                        # differ = difflib.Differ()
                        # difference = ''.join(differ.compare([compare_str + '\n'], [ast_str + '\n']))
                        errata.append(
                            'Abstract syntax tree test "%s" for parser "%s" failed:'
                            '\n\tExpr.:     %s\n\tExpected:  %s\n\tReceived:  %s'
                            % (test_name, parser_name, '\n\t'.join(
                                test_code.split('\n')), compare_str, ast_str))
                    if verbose:
                        infostr = '      ast-test "' + test_name + '" ... '
                        write(infostr +
                              ("OK" if len(errata) == errflag else "FAIL"))

            if len(errata) > errflag:
                tests.setdefault('__err__', {})[test_name] = errata[-1]
                # write parsing-history log only in case of failure!
                if is_logging() and track_history:
                    with local_log_dir('./LOGS'):
                        log_parsing_history(
                            parser,
                            "match_%s_%s.log" % (parser_name, clean_test_name))

        if verbose and 'fail' in tests:
            write('  Fail-Tests for parser "' + parser_name + '"')

        # run fail tests

        for test_name, test_code in tests.get('fail', dict()).items():
            errflag = len(errata)
            try:
                cst = parser(test_code, parser_name)
            except AttributeError as upe:
                node = Node(ZOMBIE_TAG, "").with_pos(0)
                cst = RootNode(node).new_error(node, str(upe))
                errata.append('Unknown parser "{}" in fail test "{}"!'.format(
                    parser_name, test_name))
                tests.setdefault('__err__', {})[test_name] = errata[-1]
            if "ast" in tests or report:
                traverse(cst, {'*': remove_children({TEST_ARTIFACT})})
                transform(cst)
            if not (is_error(cst.error_flag) and not lookahead_artifact(cst)):
                errata.append(
                    'Fail test "%s" for parser "%s" yields match instead of '
                    'expected failure!\n' % (test_name, parser_name))
                tests.setdefault('__err__', {})[test_name] = errata[-1]
                # write parsing-history log only in case of test-failure
                if is_logging() and track_history:
                    with local_log_dir('./LOGS'):
                        log_parsing_history(
                            parser,
                            "fail_%s_%s.log" % (parser_name, test_name))
            if cst.error_flag:
                tests.setdefault('__msg__', {})[test_name] = \
                    "\n".join(str(e) for e in cst.errors_sorted)
            if verbose:
                infostr = '    fail-test  "' + test_name + '" ... '
                write(infostr + ("OK" if len(errata) == errflag else "FAIL"))

    # remove tracers, in case there are any:
    set_tracer(all_descendants(parser.root_parser__), None)

    # write test-report
    if report:
        test_report = get_report(test_unit)
        if test_report:
            try:
                os.mkdir(report)  # is a process-Lock needed, here?
            except FileExistsError:
                pass
            with open(os.path.join(report, unit_name + '.md'),
                      'w',
                      encoding='utf8') as f:
                f.write(test_report)
                f.flush()

    print('\n'.join(output))
    return errata