Esempio n. 1
0
def instantiate_executor(allow_parallel: bool,
                         preferred_executor: Type[concurrent.futures.Executor],
                         *args, **kwargs) -> concurrent.futures.Executor:
    """Instantiates an Executor of a particular type, if the value of the
    configuration variable 'debug_parallel_execution' allows to do so.
    Otherwise a surrogate executor will be returned.
    If 'allow_parallel` is False, a SinglethredExecutor will be instantiated,
    regardless of the preferred_executor and any configuration values.
    """
    if allow_parallel:
        mode = get_config_value('debug_parallel_execution')  # type: str
        if mode == "commandline":
            options = [arg for arg in sys.argv
                       if arg[:2] == '--']  # type: List[str]
            if '--singlethread' in options: mode = 'singlethread'
            elif '--multithreading' in options: mode = 'multithreading'
            else: mode = 'multiprocessing'
        if mode == "singlethread":
            return SingleThreadExecutor()
        elif mode == "multithreading":
            if issubclass(preferred_executor,
                          concurrent.futures.ProcessPoolExecutor):
                return concurrent.futures.ThreadPoolExecutor(*args, **kwargs)
        else:
            assert mode == "multiprocessing", \
                'Config variable "debug_parallel_execution" as illegal value "%s"' % mode
        return preferred_executor(*args, **kwargs)
    return SingleThreadExecutor()
Esempio n. 2
0
def grammar_provider(ebnf_src: str, branding="DSL", additional_code: str = '') -> ParserFactoryFunc:
    """
    Compiles an EBNF-grammar and returns a grammar-parser provider
    function for that grammar.

    Args:
        ebnf_src(str):  Either the file name of an EBNF grammar or
            the EBNF-grammar itself as a string.
        branding (str or bool):  Branding name for the compiler
            suite source code.
        additional_code: Python code added to the generated source. This typically
            contains the source code of semantic actions referred to in the
            generated source, e.g. filter-functions, resume-point-search-functions

    Returns:
        A provider function for a grammar object for texts in the
        language defined by ``ebnf_src``.
    """
    grammar_src = compileDSL(ebnf_src, nil_preprocessor, get_ebnf_grammar(),
                             get_ebnf_transformer(), get_ebnf_compiler(branding, ebnf_src))
    log_name = get_config_value('compiled_EBNF_log')
    if log_name:
        if is_logging():
            append_log(log_name, grammar_src)
        else:
            print(grammar_src)
    imports = DHPARSER_IMPORTS.format(dhparser_parentdir=relative_path('.', DHPARSER_PARENTDIR))
    grammar_factory = compile_python_object('\n'.join([imports, additional_code, grammar_src]),
                                            r'get_(?:\w+_)?grammar$')
    if callable(grammar_factory):
        grammar_factory.python_src__ = grammar_src
        return grammar_factory
    raise ValueError('Could not compile grammar provider!')
Esempio n. 3
0
 def reset(self):
     self.source = ''  # type: str
     self.tree = ROOTNODE_PLACEHOLDER  # type: RootNode
     self.context = []  # type: TreeContext
     self._None_check = True  # type: bool
     self._dirty_flag = False
     self._debug = get_config_value('debug_compiler')  # type: bool
     self._debug_already_compiled = set()  # type: Set[Node]
     self.finalizers = []  # type: List[Tuple[Callable, Tuple]]
Esempio n. 4
0
def is_logging(thread_local_query: bool = True) -> bool:
    """-> True, if logging is turned on."""
    if thread_local_query:
        return bool(get_config_value('log_dir'))
    else:
        access_presets()
        result = bool(get_preset_value('log_dir'))
        finalize_presets()
        return result
Esempio n. 5
0
def local_log_dir(path: str = './LOGS'):
    """Context manager for temporarily switching to a different log-directory."""
    assert path, "Pathname cannot be empty"
    saved_log_dir = get_config_value('log_dir')
    log_dir(path)
    try:
        yield
    finally:
        set_config_value('log_dir', saved_log_dir)
Esempio n. 6
0
def log_dir(path: str = "") -> str:
    """Creates a directory for log files (if it does not exist) and
    returns its path.

    WARNING: Any files in the log dir will eventually be overwritten.
    Don't use a directory name that could be the name of a directory
    for other purposes than logging.

    ATTENTION: The log-dir is stored thread locally, which means the log-dir
    as well as the information whether logging is turned on or off will not
    automatically be transferred to any subprocesses. This needs to be done
    explicitly. (See `testing.grammar_suite()` for an example, how this can
    be done.

    Parameters:
        path:   The directory path. If empty, the configured value will be
            used: `configuration.get_config_value('log_dir')`.

    Returns:
        str - name of the logging directory or '' if logging is turned off.
    """
    dirname = path if path else get_config_value('log_dir')
    if not dirname:
        return ''
    dirname = os.path.normpath(dirname)
    # `try ... except` rather than `if os.path.exists(...)` for thread-safety
    try:
        os.mkdir(dirname)
        info_file_name = os.path.join(dirname, 'info.txt')
        if not os.path.exists(info_file_name):
            with open(info_file_name, 'w', encoding="utf-8") as f:
                f.write(
                    "This directory has been created by DHParser to store log files from\n"
                    "parsing. ANY FILE IN THIS DIRECTORY CAN BE OVERWRITTEN! Therefore,\n"
                    "do not place any files here and do not bother editing files in this\n"
                    "directory as any changes will get lost.\n")
    except FileExistsError:
        if not os.path.isdir(dirname):
            raise IOError('"' + dirname + '" cannot be used as log directory, '
                          'because it is not a directory!')
    set_config_value('log_dir', dirname)
    return dirname
Esempio n. 7
0
 def test_compact_representation(self):
     tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
     compact = tree.as_sxpr(compact=True, flatten_threshold=0)
     assert compact == '(A\n  (B\n    (C "D")\n    (E "F"))\n  (G "H"))'
     tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
     compact = tree.as_sxpr(compact=True, flatten_threshold=0)
     assert compact == '(A\n  (B\n    (C\n      "D"\n      "X")\n    (E "F"))' \
         '\n  (G\n    " H "\n    " Y "))'
     tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
     C = tree['B']['C']
     C.attr['attr'] = 'val'
     threshold = get_config_value('flatten_sxpr_threshold')
     set_config_value('flatten_sxpr_threshold', 20)
     compact = tree.serialize('indented')
     # assert compact == 'A\n  B\n    C `(attr "val")\n      "D"\n    E\n      "F"\n  G\n    "H"'
     assert compact == 'A\n  B\n    C `(attr "val") "D"\n    E "F"\n  G "H"', compact
     tree = parse_xml(
         '<note><priority level="high" /><remark></remark></note>')
     assert tree.serialize(
         how='indented') == 'note\n  priority `(level "high")\n  remark'
     set_config_value('flatten_sxpr_threshold', threshold)
Esempio n. 8
0
def recompile_grammar(grammar_src, force):
    grammar_tests_dir = os.path.join(scriptpath, 'test_grammar')
    testing.create_test_templates(grammar_src, grammar_tests_dir)
    DHParser.log.start_logging('LOGS')
    # recompiles Grammar only if it has changed
    saved_syntax_variant = get_config_value('syntax_variant')
    set_config_value('syntax_variant', 'heuristic')
    if not dsl.recompile_grammar(
            grammar_src,
            force=force,
            notify=lambda: print('recompiling ' + grammar_src)):
        print('\nErrors while recompiling "%s":' % grammar_src +
              '\n--------------------------------------\n\n')
        if is_filename(grammar_src):
            err_name = grammar_src.replace('.', '_') + '_ERRORS.txt'
        else:
            err_name = 'EBNF_ebnf_ERRORS.txt'
        with open(err_name, encoding='utf-8') as f:
            print(f.read())
        sys.exit(1)
    set_config_value('syntax_variant', saved_syntax_variant)
Esempio n. 9
0
def run_path(path):
    """Runs all unit tests in `path`"""
    if os.path.isdir(path):
        sys.path.append(path)
        files = os.listdir(path)
        results = []
        with instantiate_executor(
                get_config_value('test_parallelization') and len(files) > 1,
                concurrent.futures.ProcessPoolExecutor) as pool:
            for f in files:
                results.append(pool.submit(run_file, f))
                # run_file(f)  # for testing!
            concurrent.futures.wait(results)
            for r in results:
                try:
                    _ = r.result()
                except AssertionError as failure:
                    print(failure)

    else:
        path, fname = os.path.split(path)
        sys.path.append(path)
        run_file(fname)
    sys.path.pop()
Esempio n. 10
0
 def lookahead_artifact(syntax_tree: Node):
     """
     Returns True, if the error merely occurred, because the parser
     stopped in front of a sequence that was captured by a lookahead
     operator or if a mandatory lookahead failed at the end of data.
     This is required for testing of parsers that put a lookahead
     operator at the end. See test_testing.TestLookahead.
     """
     if not get_config_value('test_suppress_lookahead_failures'):
         return False
     raw_errors = cast(RootNode, syntax_tree).errors_sorted
     is_artifact = (
         {e.code
          for e in raw_errors} <= {
              PARSER_LOOKAHEAD_FAILURE_ONLY,
              AUTORETRIEVED_SYMBOL_NOT_CLEARED, PARSER_LOOKAHEAD_MATCH_ONLY
          } or (len(raw_errors) == 1 and
                (raw_errors[-1].code == PARSER_LOOKAHEAD_MATCH_ONLY
                 #  case 2:  mandatory lookahead failure at end of text
                 or raw_errors[-1].code == MANDATORY_CONTINUATION_AT_EOF)))
     if is_artifact:
         # don't remove zombie node with error message at the end
         # but change it's tag_name to indicate that it is an artifact!
         for parent in syntax_tree.select_if(
                 lambda node: any(child.tag_name == ZOMBIE_TAG
                                  for child in node.children),
                 include_root=True,
                 reverse=True):
             zombie = parent.pick_child(ZOMBIE_TAG)
             zombie.tag_name = TEST_ARTIFACT
             zombie.result = 'Artifact can be ignored. Be aware, though, that also the ' \
                             'tree structure may not be the same as in a non-testing ' \
                             'environment, when a testing artifact has occurred!'
             # parent.result = tuple(c for c in parent.children if c.tag_name != ZOMBIE_TAG)
             break
     return is_artifact
Esempio n. 11
0
def grammar_suite(directory,
                  parser_factory,
                  transformer_factory,
                  fn_patterns=('*test*', ),
                  ignore_unknown_filetypes=False,
                  report='REPORT',
                  verbose=True):
    """
    Runs all grammar unit tests in a directory. A file is considered a test
    unit, if it has the word "test" in its name.
    """
    assert isinstance(report, str)

    if not isinstance(fn_patterns, collections.abc.Iterable):
        fn_patterns = [fn_patterns]
    all_errors = collections.OrderedDict()
    if verbose:
        print("\nScanning test-directory: " + directory)
    save_cwd = os.getcwd()
    os.chdir(directory)
    if is_logging():
        clear_logs()

    tests = [
        fn for fn in sorted(os.listdir('.')) if any(
            fnmatch.fnmatch(fn, pattern) for pattern in fn_patterns)
    ]

    # TODO: fix "handle is closed" error in pypy3 when exiting the interpreter!
    with instantiate_executor(
            get_config_value('test_parallelization') and len(tests) > 1,
            concurrent.futures.ProcessPoolExecutor) as pool:
        results = []
        for filename in tests:
            parameters = filename, parser_factory, transformer_factory, report, verbose
            results.append(pool.submit(grammar_unit, *parameters))
        done, not_done = concurrent.futures.wait(results)
        assert not not_done, str(not_done)
        for filename, err_future in zip(tests, results):
            try:
                errata = err_future.result()
                if errata:
                    all_errors[filename] = errata
            except ValueError as e:
                if not ignore_unknown_filetypes or str(e).find("Unknown") < 0:
                    raise e
            except AssertionError as e:
                e.args = ('When processing "%s":\n%s' %
                          (filename, e.args[0]) if e.args else '', )
                raise e
    os.chdir(save_cwd)
    error_report = []
    err_N = 0
    if all_errors:
        for filename in all_errors:
            error_report.append('\n\nErrors found by unit test "%s":\n' %
                                filename)
            err_N += len(all_errors[filename])
            for error in all_errors[filename]:
                error_report.append('\t' + '\n\t'.join(error.split('\n')))
    if error_report:
        # if verbose:
        #     print("\nFAILURE! %i error%s found!\n" % (err_N, 's' if err_N > 1 else ''))
        return ('Test suite "%s" revealed %s error%s:\n\n' %
                (directory, err_N, 's' if err_N > 1 else '') +
                '\n'.join(error_report))
    if verbose:
        print("\nSUCCESS! All tests passed :-)\n")
    return ''
Esempio n. 12
0
def grammar_unit(test_unit,
                 parser_factory,
                 transformer_factory,
                 report='REPORT',
                 verbose=False):
    """
    Unit tests for a grammar-parser and ast transformations.
    """
    output = []

    def write(s):
        nonlocal output
        """Append string `s` to output. The purpose is to defer printing to
        stdout in order to avoid muddled output when several unit tests run
        at the same time."""
        output.append(s)

    def clean_key(k):
        try:
            return k.replace('*', '')
        except AttributeError:
            return k

    def get(tests, category, key) -> str:
        try:
            value = tests[category][key] if key in tests[category] \
                else tests[category][clean_key(key)]
        except KeyError:
            return ''
            # raise AssertionError('%s-test %s for parser %s missing !?'
            #                      % (category, test_name, parser_name))
        return value

    if isinstance(test_unit, str):
        _, unit_name = os.path.split(os.path.splitext(test_unit)[0])
        test_unit = unit_from_file(test_unit)
    else:
        unit_name = 'unit_test_' + str(id(test_unit))
    if verbose:
        write("\nGRAMMAR TEST UNIT: " + unit_name)
    errata = []
    parser = parser_factory()
    transform = transformer_factory()

    def has_lookahead(parser_name: str) -> bool:
        """Returns True if the parser or any of its descendant parsers is a
        Lookahead parser."""
        return parser[parser_name].apply(
            lambda ctx: isinstance(ctx[-1], Lookahead))
        # lookahead_found = False
        #
        # def find_lookahead(p: Parser):
        #     nonlocal lookahead_found
        #     if not lookahead_found:
        #         lookahead_found = isinstance(p, Lookahead)
        #
        # parser[parser_name].apply(find_lookahead)
        # return lookahead_found

    def lookahead_artifact(syntax_tree: Node):
        """
        Returns True, if the error merely occurred, because the parser
        stopped in front of a sequence that was captured by a lookahead
        operator or if a mandatory lookahead failed at the end of data.
        This is required for testing of parsers that put a lookahead
        operator at the end. See test_testing.TestLookahead.
        """
        if not get_config_value('test_suppress_lookahead_failures'):
            return False
        raw_errors = cast(RootNode, syntax_tree).errors_sorted
        is_artifact = (
            {e.code
             for e in raw_errors} <= {
                 PARSER_LOOKAHEAD_FAILURE_ONLY,
                 AUTORETRIEVED_SYMBOL_NOT_CLEARED, PARSER_LOOKAHEAD_MATCH_ONLY
             } or (len(raw_errors) == 1 and
                   (raw_errors[-1].code == PARSER_LOOKAHEAD_MATCH_ONLY
                    #  case 2:  mandatory lookahead failure at end of text
                    or raw_errors[-1].code == MANDATORY_CONTINUATION_AT_EOF)))
        if is_artifact:
            # don't remove zombie node with error message at the end
            # but change it's tag_name to indicate that it is an artifact!
            for parent in syntax_tree.select_if(
                    lambda node: any(child.tag_name == ZOMBIE_TAG
                                     for child in node.children),
                    include_root=True,
                    reverse=True):
                zombie = parent.pick_child(ZOMBIE_TAG)
                zombie.tag_name = TEST_ARTIFACT
                zombie.result = 'Artifact can be ignored. Be aware, though, that also the ' \
                                'tree structure may not be the same as in a non-testing ' \
                                'environment, when a testing artifact has occurred!'
                # parent.result = tuple(c for c in parent.children if c.tag_name != ZOMBIE_TAG)
                break
        return is_artifact

    for parser_name, tests in test_unit.items():
        # if not get_config_value('test_parallelization'):
        #     print('  Testing parser: ' + parser_name)

        track_history = get_config_value('history_tracking')
        try:
            if has_lookahead(parser_name):
                set_tracer(all_descendants(parser[parser_name]), trace_history)
                track_history = True
        except AttributeError:
            pass

        assert parser_name, "Missing parser name in test %s!" % unit_name
        assert not any(test_type in RESULT_STAGES for test_type in tests), \
            ("Test %s in %s already has results. Use reset_unit() before running again!"
             % (parser_name, unit_name))
        assert set(tests.keys()).issubset(UNIT_STAGES), \
            'Unknown test-types: %s ! Must be one of %s' \
            % (set(tests.keys()) - UNIT_STAGES, UNIT_STAGES)
        if verbose:
            write('  Match-Tests for parser "' + parser_name + '"')
        match_tests = set(tests['match'].keys()) if 'match' in tests else set()
        if 'ast' in tests:
            ast_tests = set(tests['ast'].keys())
            if not {clean_key(k)
                    for k in ast_tests} <= {clean_key(k)
                                            for k in match_tests}:
                raise AssertionError(
                    'AST-Tests %s for parser %s lack corresponding match-tests!'
                    % (str(ast_tests - match_tests), parser_name))
        if 'cst' in tests:
            cst_tests = set(tests['cst'].keys())
            if not {clean_key(k)
                    for k in cst_tests} <= {clean_key(k)
                                            for k in match_tests}:
                raise AssertionError(
                    'CST-Tests %s lack corresponding match-tests!' %
                    str(cst_tests - match_tests))

        # run match tests

        for test_name, test_code in tests.get('match', dict()).items():
            # if not get_config_value('test_parallelization'):
            #     print('    Test: ' + str(test_name))

            errflag = len(errata)
            try:
                cst = parser(test_code, parser_name)
            except AttributeError as upe:
                cst = RootNode()
                cst = cst.new_error(Node(ZOMBIE_TAG, "").with_pos(0), str(upe))
            clean_test_name = str(test_name).replace('*', '')
            tests.setdefault('__cst__', {})[test_name] = cst
            errors = []  # type: List[Error]
            if is_error(cst.error_flag) and not lookahead_artifact(cst):
                errors = [
                    e for e in cst.errors_sorted
                    if e.code not in POSSIBLE_ARTIFACTS
                ]
                errata.append(
                    'Match test "%s" for parser "%s" failed:'
                    '\nExpr.:  %s\n\n%s\n\n' %
                    (test_name, parser_name, md_codeblock(test_code),
                     '\n'.join(str(m).replace('\n', '\n') for m in errors)))
            if "ast" in tests or report:
                ast = copy.deepcopy(cst)
                old_errors = set(ast.errors)
                traverse(ast, {'*': remove_children({TEST_ARTIFACT})})
                try:
                    transform(ast)
                except AssertionError as e:
                    e.args = ('Test %s of parser %s failed, because:\n%s' %
                              (test_name, parser_name, e.args[0]), )
                    raise e
                tests.setdefault('__ast__', {})[test_name] = ast
                ast_errors = [e for e in ast.errors if e not in old_errors]
                ast_errors.sort(key=lambda e: e.pos)
                if is_error(
                        max(e.code for e in ast_errors) if ast_errors else 0):
                    if ast_errors:
                        if errata: errata[-1] = errata[-1].rstrip('\n')
                        ast_errors.append('\n')
                        errata.append('\t' + '\n\t'.join(
                            str(msg).replace('\n', '\n\t\t')
                            for msg in ast_errors))

            if verbose:
                infostr = '    match-test "' + test_name + '" ... '
                write(infostr + ("OK" if len(errata) == errflag else "FAIL"))

            if "cst" in tests and len(errata) == errflag:
                try:
                    compare = parse_tree(get(tests, "cst", test_name))
                except ValueError as e:
                    raise SyntaxError(
                        'CST-TEST "%s" of parser "%s" failed with:\n%s' %
                        (test_name, parser_name, str(e)))
                if compare:
                    if not compare.equals(cst):
                        errata.append(
                            'Concrete syntax tree test "%s" for parser "%s" failed:\n%s'
                            % (test_name, parser_name, cst.serialize('cst')))
                    if verbose:
                        infostr = '      cst-test "' + test_name + '" ... '
                        write(infostr +
                              ("OK" if len(errata) == errflag else "FAIL"))

            if "ast" in tests and len(errata) == errflag:
                try:
                    compare = parse_tree(get(tests, "ast", test_name))
                except ValueError as e:
                    raise SyntaxError(
                        'AST-TEST "%s" of parser "%s" failed with:\n%s' %
                        (test_name, parser_name, str(e)))
                if compare:
                    traverse(compare, {'*': remove_children({TEST_ARTIFACT})})
                    if not compare.equals(
                            ast):  # no worry: ast is defined if "ast" in tests
                        ast_str = flatten_sxpr(ast.as_sxpr())
                        compare_str = flatten_sxpr(compare.as_sxpr())
                        # differ = difflib.Differ()
                        # difference = ''.join(differ.compare([compare_str + '\n'], [ast_str + '\n']))
                        errata.append(
                            'Abstract syntax tree test "%s" for parser "%s" failed:'
                            '\n\tExpr.:     %s\n\tExpected:  %s\n\tReceived:  %s'
                            % (test_name, parser_name, '\n\t'.join(
                                test_code.split('\n')), compare_str, ast_str))
                    if verbose:
                        infostr = '      ast-test "' + test_name + '" ... '
                        write(infostr +
                              ("OK" if len(errata) == errflag else "FAIL"))

            if len(errata) > errflag:
                tests.setdefault('__err__', {})[test_name] = errata[-1]
                # write parsing-history log only in case of failure!
                if is_logging() and track_history:
                    with local_log_dir('./LOGS'):
                        log_parsing_history(
                            parser,
                            "match_%s_%s.log" % (parser_name, clean_test_name))

        if verbose and 'fail' in tests:
            write('  Fail-Tests for parser "' + parser_name + '"')

        # run fail tests

        for test_name, test_code in tests.get('fail', dict()).items():
            errflag = len(errata)
            try:
                cst = parser(test_code, parser_name)
            except AttributeError as upe:
                node = Node(ZOMBIE_TAG, "").with_pos(0)
                cst = RootNode(node).new_error(node, str(upe))
                errata.append('Unknown parser "{}" in fail test "{}"!'.format(
                    parser_name, test_name))
                tests.setdefault('__err__', {})[test_name] = errata[-1]
            if "ast" in tests or report:
                traverse(cst, {'*': remove_children({TEST_ARTIFACT})})
                transform(cst)
            if not (is_error(cst.error_flag) and not lookahead_artifact(cst)):
                errata.append(
                    'Fail test "%s" for parser "%s" yields match instead of '
                    'expected failure!\n' % (test_name, parser_name))
                tests.setdefault('__err__', {})[test_name] = errata[-1]
                # write parsing-history log only in case of test-failure
                if is_logging() and track_history:
                    with local_log_dir('./LOGS'):
                        log_parsing_history(
                            parser,
                            "fail_%s_%s.log" % (parser_name, test_name))
            if cst.error_flag:
                tests.setdefault('__msg__', {})[test_name] = \
                    "\n".join(str(e) for e in cst.errors_sorted)
            if verbose:
                infostr = '    fail-test  "' + test_name + '" ... '
                write(infostr + ("OK" if len(errata) == errflag else "FAIL"))

    # remove tracers, in case there are any:
    set_tracer(all_descendants(parser.root_parser__), None)

    # write test-report
    if report:
        test_report = get_report(test_unit)
        if test_report:
            try:
                os.mkdir(report)  # is a process-Lock needed, here?
            except FileExistsError:
                pass
            with open(os.path.join(report, unit_name + '.md'),
                      'w',
                      encoding='utf8') as f:
                f.write(test_report)
                f.flush()

    print('\n'.join(output))
    return errata
Esempio n. 13
0
def compile_source(source: str,
                   preprocessor: Optional[PreprocessorFunc],  # str -> str
                   parser: GrammarCallable,  # str -> Node (concrete syntax tree (CST))
                   transformer: TransformationFunc,  # Node (CST) -> Node (abstract ST (AST))
                   compiler: CompilerCallable,  # Node (AST), Source -> Any
                   # out_source_data: list = NOPE,  # Tuple[str, SourceMapFunc]
                   *, preserve_AST: bool = False) \
        -> Tuple[Optional[Any], List[Error], Optional[Node]]:
    """Compiles a source in four stages:

    1. Pre-Processing (if needed)
    2. Parsing
    3. AST-transformation
    4. Compiling.

    The later stages AST-transformation, compilation will only be invoked if
    no fatal errors occurred in any of the earlier stages of the processing
    pipeline.

    :param source: The input text for compilation or a the name of a
            file containing the input text.
    :param preprocessor:  text -> text. A preprocessor function
            or None, if no preprocessor is needed.
    :param parser:  A parsing function or grammar class
    :param transformer:  A transformation function that takes
            the root-node of the concrete syntax tree as an argument and
            transforms it (in place) into an abstract syntax tree.
    :param compiler: A compiler function or compiler class
            instance
    :param preserve_AST: Preserves the AST-tree.

    :returns: The result of the compilation as a 3-tuple
        (result, errors, abstract syntax tree). In detail:

        1. The result as returned by the compiler or ``None`` in case of failure
        2. A list of error or warning messages
        3. The root-node of the abstract syntax tree if `preserve_ast` is True
            or `None` otherwise.
    """
    ast = None  # type: Optional[Node]
    original_text = load_if_file(source)  # type: str
    source_name = source if is_filename(source) else 'source'
    compiler.source = original_text
    log_file_name = logfile_basename(
        source, compiler) if is_logging() else ''  # type: str
    if not hasattr(parser,
                   'free_char_parsefunc__') or parser.history_tracking__:
        # log only for custom parser/transformer/compilers
        log_syntax_trees = get_config_value('log_syntax_trees')
    else:
        log_syntax_trees = set()

    # preprocessing

    errors = []
    if preprocessor is None:
        source_text = original_text  # type: str
        source_mapping = gen_neutral_srcmap_func(source_text, source_name)
        # lambda i: SourceLocation(source_name, 0, i)    # type: SourceMapFunc
    else:
        _, source_text, source_mapping, errors = preprocessor(
            original_text, source_name)

    if has_errors(errors, FATAL):
        return None, errors, None

    # parsing

    syntax_tree = parser(source_text,
                         source_mapping=source_mapping)  # type: RootNode
    for e in errors:
        syntax_tree.add_error(None, e)
    syntax_tree.source = original_text
    syntax_tree.source_mapping = source_mapping
    if 'cst' in log_syntax_trees:
        log_ST(syntax_tree, log_file_name + '.cst')
    if parser.history_tracking__:
        log_parsing_history(parser, log_file_name)

    # assert is_error(syntax_tree.error_flag) or str(syntax_tree) == strip_tokens(source_text), \
    #     str(syntax_tree) # Ony valid if neither tokens or whitespace are dropped early

    result = None
    if not is_fatal(syntax_tree.error_flag):

        # AST-transformation

        if is_error(syntax_tree.error_flag):
            # catch Python exception, because if an error has occurred
            # earlier, the syntax tree might not look like expected,
            # which could (fatally) break AST transformations.
            try:
                transformer(syntax_tree)
            except Exception as e:
                syntax_tree.new_error(
                    syntax_tree,
                    "AST-Transformation failed due to earlier parser errors. "
                    "Crash Message: %s: %s" % (e.__class__.__name__, str(e)),
                    AST_TRANSFORM_CRASH)
        else:
            transformer(syntax_tree)

        if 'ast' in log_syntax_trees:
            log_ST(syntax_tree, log_file_name + '.ast')

        if not is_fatal(syntax_tree.error_flag):
            if preserve_AST:
                ast = copy.deepcopy(syntax_tree)

            # Compilation

            if is_error(syntax_tree.error_flag):
                # assume Python crashes are merely a consequence of earlier
                # errors, so let's catch them
                try:
                    result = compiler(syntax_tree)
                except Exception as e:
                    # raise e
                    node = syntax_tree  # type: Node
                    if isinstance(compiler, Compiler) and compiler.context:
                        node = compiler.context[-1]
                    st = traceback.format_list(
                        traceback.extract_tb(e.__traceback__))
                    trace = ''.join(filter_stacktrace(st))
                    syntax_tree.new_error(
                        node,
                        "Compilation failed, most likely, due to errors earlier "
                        "in the processing pipeline. Crash Message: %s: %s\n%s"
                        % (e.__class__.__name__, str(e), trace),
                        COMPILER_CRASH)
            else:
                # assume Python crashes are programming mistakes, so let
                # the exceptions through
                result = compiler(syntax_tree)

    messages = syntax_tree.errors_sorted  # type: List[Error]
    # Obsolete, because RootNode adjusts error locations whenever an error is added:
    # adjust_error_locations(messages, original_text, source_mapping)
    return result, messages, ast
Esempio n. 14
0
def suspend_logging() -> str:
    """Suspends logging in the current thread. Returns the log-dir
    for resuming logging later."""
    save = get_config_value('log_dir')
    set_config_value('log_dir', '')
    return save
Esempio n. 15
0
def log_parsing_history(grammar,
                        log_file_name: str = '',
                        html: bool = True) -> bool:
    """
    Writes a log of the parsing history of the most recently parsed document, if
    logging is turned on. Returns True, if that was the case and writing the
    history was successful.

    Parameters:
        grammar (Grammar):  The Grammar object from which the parsing history
            shall be logged.
        log_file_name (str):  The (base-)name of the log file to be written.
            If no name is given (default), then the class name of the grammar
            object will be used.
        html (bool):  If true (default), the log will be output as html-Table,
            otherwise as plain test. (Browsers might take a few seconds or
            minutes to display the table for long histories.)
    """
    def write_log(history: List[str], log_name: str) -> None:
        htm = '.html' if html else ''
        path = os.path.join(log_dir(), log_name + "_parser.log" + htm)
        if os.path.exists(path):
            os.remove(path)
            # print('WARNING: Log-file "%s" already existed and was deleted.' % path)
        if history:
            with open(path, "w", encoding="utf-8") as f:
                if html:
                    f.write(HistoryRecord.HTML_LEAD_IN + '\n')
                    f.writelines(history)
                    f.write('\n</table>\n' + HistoryRecord.HTML_LEAD_OUT)
                else:
                    f.write("\n".join(history))

    def append_line(log: List[str], line: str) -> None:
        """Appends a line to a list of HTML table rows. Starts a new
        table every 100 rows to allow browser to speed up rendering.
        Does this really work...?"""
        log.append(line)
        if html and len(log) % 50 == 0:
            log.append('\n'.join(['</table>\n<table>',
                                  HistoryRecord.COLGROUP]))

    if not is_logging():
        return False

    if not log_file_name:
        name = grammar.__class__.__name__
        log_file_name = name[:-7] if name.lower().endswith('grammar') else name
    elif log_file_name.lower().endswith('.log'):
        log_file_name = log_file_name[:-4]

    history = ['<h1>Parsing history of "%s"</h1>' % log_file_name
               ]  # type: List[str]
    LOG_SIZE_THRESHOLD = get_config_value('log_size_threshold')
    if len(grammar.history__) > LOG_SIZE_THRESHOLD:
        warning = (
            'Sorry, man, %iK history records is just too many! '
            'Only looking at the last %iK records.' %
            (len(grammar.history__) // 1000, LOG_SIZE_THRESHOLD // 1000))
        html_warning = '<p><strong>' + warning + '</strong></p>'
        history.append(html_warning)

    lead_in = '\n'.join(
        ['<table>', HistoryRecord.COLGROUP, HistoryRecord.HEADINGS])
    history.append(lead_in)

    for record in grammar.history__[-LOG_SIZE_THRESHOLD:]:
        line = record.as_html_tr() if html else (str(record) + '\n')
        append_line(history, line)

    write_log(history, log_file_name)
    return True
Esempio n. 16
0
            if interpreter not in found:
                print('Interpreter ' + arguments[1] + ' not found.')
                sys.exit(1)
            else:
                interpreters.append(interpreter)
    else:
        interpreters = found

    cwd = os.getcwd()
    os.chdir(os.path.join(scriptdir, '..'))

    timestamp = time.time()

    run_doctests('toolkit')

    with instantiate_executor(get_config_value('test_parallelization'),
                              concurrent.futures.ProcessPoolExecutor) as pool:
        results = []

        # doctests
        for filename in os.listdir('DHParser'):
            if filename.endswith('.py') and filename not in \
                    ("foreign_typing.py", "shadow_cython.py", "versionnumber.py",
                     "__init__.py"):
                results.append(pool.submit(run_doctests, filename[:-3]))

        # unit tests
        for interpreter in interpreters:
            if run_cmd([interpreter.strip(), '--version']):
                for filename in os.listdir('tests'):
                    if filename.endswith('.py') and (