예제 #1
0
파일: dsl.py 프로젝트: jecki/DHParser
def load_compiler_suite(compiler_suite: str) -> \
        Tuple[PreprocessorFactoryFunc, ParserFactoryFunc,
              TransformerFactoryFunc, CompilerFactoryFunc]:
    """
    Extracts a compiler suite from file or string `compiler_suite`
    and returns it as a tuple (preprocessor, parser, ast, compiler).

    Returns:
        4-tuple (preprocessor function, parser class,
                 ast transformer function, compiler class)
    """
    global RX_SECTION_MARKER
    assert isinstance(compiler_suite, str)
    source = load_if_file(compiler_suite)
    dhpath = relative_path(os.path.dirname('.'), DHPARSER_PARENTDIR)
    imports = DHPARSER_IMPORTS.format(dhparser_parentdir=dhpath)
    if is_python_code(compiler_suite):
        sections = split_source(compiler_suite, source)
        _, imports, preprocessor_py, parser_py, ast_py, compiler_py, _ = sections
        # TODO: Compile in one step and pick parts from namespace later ?
        preprocessor = compile_python_object(imports + preprocessor_py,
                                             r'get_(?:\w+_)?preprocessor$')
        parser = compile_python_object(imports + parser_py, r'get_(?:\w+_)?grammar$')
        ast = compile_python_object(imports + ast_py, r'get_(?:\w+_)?transformer$')
    else:
        # Assume source is an ebnf grammar.
        # Is there really any reasonable application case for this?
        lg_dir = suspend_logging()
        compiler_py, messages, _ = compile_source(source, None, get_ebnf_grammar(),
                                                  get_ebnf_transformer(),
                                                  get_ebnf_compiler(compiler_suite, source))
        resume_logging(lg_dir)
        if has_errors(messages):
            raise DefinitionError(only_errors(messages), source)
        preprocessor = get_ebnf_preprocessor
        parser = get_ebnf_grammar
        ast = get_ebnf_transformer
    compiler = compile_python_object(imports + compiler_py, r'get_(?:\w+_)?compiler$')
    if callable(preprocessor) and callable(parser) and callable(Callable) and callable(compiler):
        return preprocessor, parser, ast, compiler
    raise ValueError('Could not generate compiler suite from source code!')
예제 #2
0
파일: dsl.py 프로젝트: jecki/DHParser
def grammar_instance(grammar_representation) -> Tuple[Grammar, str]:
    """
    Returns a grammar object and the source code of the grammar, from
    the given `grammar`-data which can be either a file name, ebnf-code,
    python-code, a Grammar-derived grammar class or an instance of
    such a class (i.e. a grammar object already).
    """
    if isinstance(grammar_representation, str):
        # read grammar
        grammar_src = load_if_file(grammar_representation)
        if is_python_code(grammar_src):
            parser_py = grammar_src  # type: str
            messages = []            # type: List[Error]
        else:
            lg_dir = suspend_logging()
            result, messages, _ = compile_source(
                grammar_src, None,
                get_ebnf_grammar(), get_ebnf_transformer(), get_ebnf_compiler())
            parser_py = cast(str, result)
            resume_logging(lg_dir)
        if has_errors(messages):
            raise DefinitionError(only_errors(messages), grammar_src)
        imports = DHPARSER_IMPORTS.format(
            dhparser_parentdir=relative_path('.', DHPARSER_PARENTDIR))
        grammar_class = compile_python_object(imports + parser_py, r'\w+Grammar$')
        if inspect.isclass(grammar_class) and issubclass(grammar_class, Grammar):
            parser_root = grammar_class()
        else:
            raise ValueError('Could not compile or Grammar class!')
    else:
        # assume that dsl_grammar is a ParserHQ-object or Grammar class
        grammar_src = ''
        if isinstance(grammar_representation, Grammar):
            parser_root = grammar_representation
        else:
            # assume ``grammar_representation`` is a grammar class and get the root object
            parser_root = grammar_representation()
    return parser_root, grammar_src
예제 #3
0
파일: dsl.py 프로젝트: jecki/DHParser
def compileDSL(text_or_file: str,
               preprocessor: Optional[PreprocessorFunc],
               dsl_grammar: Union[str, Grammar],
               ast_transformation: TransformationFunc,
               compiler: Compiler) -> Any:
    """
    Compiles a text in a domain specific language (DSL) with an
    EBNF-specified grammar. Returns the compiled text or raises a
    compilation error.

    Raises:
        CompilationError if any errors occurred during compilation
    """
    assert isinstance(text_or_file, str)
    assert isinstance(compiler, Compiler)

    parser, grammar_src = grammar_instance(dsl_grammar)
    result, messages, AST = compile_source(text_or_file, preprocessor, parser,
                                           ast_transformation, compiler)
    if has_errors(messages):
        src = load_if_file(text_or_file)
        raise CompilationError(only_errors(messages), src, grammar_src, AST, result)
    return result
예제 #4
0
    def test_load_if_file(self):
        # an error should be raised if file expected but not found
        error_raised = False
        try:
            load_if_file('this_is_code_and_not_a_file')
        except FileNotFoundError:
            error_raised = True
        assert error_raised

        # multiline text will never be mistaken for a file
        assert load_if_file('this_is_code_and_not_a_file\n')

        # neither will text that does not look like a file name
        s = "this is code * and not a file"
        assert s == load_if_file(s)

        # not a file and not mistaken for a file
        assert self.code1 == load_if_file(self.code1)

        # not a file and not mistaken for a file either
        assert self.code2 == load_if_file(self.code2)

        # file correctly loaded
        assert self.code2 == load_if_file(self.filename)
예제 #5
0
파일: testing.py 프로젝트: jecki/DHParser
def extract_symbols(ebnf_text_or_file: str) -> SymbolsDictType:
    r"""
    Extracts all defined symbols from an EBNF-grammar. This can be used to
    prepare grammar-tests. The symbols will be returned as lists of strings
    which are grouped by the sections to which they belong and returned as
    an ordered dictionary, they keys of which are the section names.
    In order to define a section in the ebnf-source, add a comment-line
    starting with "#:", followed by the section name. It is recommended
    to use valid file names as section names. Example:

        #: components

        expression = term  { EXPR_OP~ term}
        term       = factor  { TERM_OP~ factor}
        factor     = [SIGN] ( NUMBER | VARIABLE | group ) { VARIABLE | group }
        group      = "(" expression ")"


        #: leaf_expressions

        EXPR_OP    = /\+/ | /-/
        TERM_OP    = /\*/ | /\//
        SIGN       = /-/

        NUMBER     = /(?:0|(?:[1-9]\d*))(?:\.\d+)?/~
        VARIABLE   = /[A-Za-z]/~

    If no sections have been defined in the comments, there will be only
    one group with the empty string as a key.

    :param ebnf_text_or_file: Either an ebnf-grammar or the file-name
        of an ebnf-grammar
    :return: Ordered dictionary mapping the section names of the
        grammar to lists of symbols that appear under that section.
    """
    def trim_section_name(name: str) -> str:
        return re.sub(r'[^\w-]', '_', name.replace('#:', '').strip())

    ebnf = load_if_file(ebnf_text_or_file)
    deflist = RX_DEFINITION_OR_SECTION.findall(ebnf)
    if not deflist:
        if ebnf_text_or_file.find('\n') < 0 and ebnf_text_or_file.endswith(
                '.ebnf'):
            deflist = '#: ' + os.path.splitext(ebnf_text_or_file)[0]
        else:
            deflist = '#: ALL'
    symbols = collections.OrderedDict()  # type: SymbolsDictType
    if deflist[0][:2] != '#:':
        curr_section = ''
        symbols[curr_section] = []
    for df in deflist:
        if df[:2] == '#:':
            curr_section = trim_section_name(df)
            if curr_section in symbols:
                raise AssertionError('Section name must not be repeated: ' +
                                     curr_section)
            symbols[curr_section] = []
        else:
            symbols[curr_section].append(
                df)  # no worry, curr_section is always defined
    return symbols
예제 #6
0
파일: compile.py 프로젝트: jecki/DHParser
def compile_source(source: str,
                   preprocessor: Optional[PreprocessorFunc],  # str -> str
                   parser: GrammarCallable,  # str -> Node (concrete syntax tree (CST))
                   transformer: TransformationFunc,  # Node (CST) -> Node (abstract ST (AST))
                   compiler: CompilerCallable,  # Node (AST), Source -> Any
                   # out_source_data: list = NOPE,  # Tuple[str, SourceMapFunc]
                   *, preserve_AST: bool = False) \
        -> Tuple[Optional[Any], List[Error], Optional[Node]]:
    """Compiles a source in four stages:

    1. Pre-Processing (if needed)
    2. Parsing
    3. AST-transformation
    4. Compiling.

    The later stages AST-transformation, compilation will only be invoked if
    no fatal errors occurred in any of the earlier stages of the processing
    pipeline.

    :param source: The input text for compilation or a the name of a
            file containing the input text.
    :param preprocessor:  text -> text. A preprocessor function
            or None, if no preprocessor is needed.
    :param parser:  A parsing function or grammar class
    :param transformer:  A transformation function that takes
            the root-node of the concrete syntax tree as an argument and
            transforms it (in place) into an abstract syntax tree.
    :param compiler: A compiler function or compiler class
            instance
    :param preserve_AST: Preserves the AST-tree.

    :returns: The result of the compilation as a 3-tuple
        (result, errors, abstract syntax tree). In detail:

        1. The result as returned by the compiler or ``None`` in case of failure
        2. A list of error or warning messages
        3. The root-node of the abstract syntax tree if `preserve_ast` is True
            or `None` otherwise.
    """
    ast = None  # type: Optional[Node]
    original_text = load_if_file(source)  # type: str
    source_name = source if is_filename(source) else 'source'
    compiler.source = original_text
    log_file_name = logfile_basename(
        source, compiler) if is_logging() else ''  # type: str
    if not hasattr(parser,
                   'free_char_parsefunc__') or parser.history_tracking__:
        # log only for custom parser/transformer/compilers
        log_syntax_trees = get_config_value('log_syntax_trees')
    else:
        log_syntax_trees = set()

    # preprocessing

    errors = []
    if preprocessor is None:
        source_text = original_text  # type: str
        source_mapping = gen_neutral_srcmap_func(source_text, source_name)
        # lambda i: SourceLocation(source_name, 0, i)    # type: SourceMapFunc
    else:
        _, source_text, source_mapping, errors = preprocessor(
            original_text, source_name)

    if has_errors(errors, FATAL):
        return None, errors, None

    # parsing

    syntax_tree = parser(source_text,
                         source_mapping=source_mapping)  # type: RootNode
    for e in errors:
        syntax_tree.add_error(None, e)
    syntax_tree.source = original_text
    syntax_tree.source_mapping = source_mapping
    if 'cst' in log_syntax_trees:
        log_ST(syntax_tree, log_file_name + '.cst')
    if parser.history_tracking__:
        log_parsing_history(parser, log_file_name)

    # assert is_error(syntax_tree.error_flag) or str(syntax_tree) == strip_tokens(source_text), \
    #     str(syntax_tree) # Ony valid if neither tokens or whitespace are dropped early

    result = None
    if not is_fatal(syntax_tree.error_flag):

        # AST-transformation

        if is_error(syntax_tree.error_flag):
            # catch Python exception, because if an error has occurred
            # earlier, the syntax tree might not look like expected,
            # which could (fatally) break AST transformations.
            try:
                transformer(syntax_tree)
            except Exception as e:
                syntax_tree.new_error(
                    syntax_tree,
                    "AST-Transformation failed due to earlier parser errors. "
                    "Crash Message: %s: %s" % (e.__class__.__name__, str(e)),
                    AST_TRANSFORM_CRASH)
        else:
            transformer(syntax_tree)

        if 'ast' in log_syntax_trees:
            log_ST(syntax_tree, log_file_name + '.ast')

        if not is_fatal(syntax_tree.error_flag):
            if preserve_AST:
                ast = copy.deepcopy(syntax_tree)

            # Compilation

            if is_error(syntax_tree.error_flag):
                # assume Python crashes are merely a consequence of earlier
                # errors, so let's catch them
                try:
                    result = compiler(syntax_tree)
                except Exception as e:
                    # raise e
                    node = syntax_tree  # type: Node
                    if isinstance(compiler, Compiler) and compiler.context:
                        node = compiler.context[-1]
                    st = traceback.format_list(
                        traceback.extract_tb(e.__traceback__))
                    trace = ''.join(filter_stacktrace(st))
                    syntax_tree.new_error(
                        node,
                        "Compilation failed, most likely, due to errors earlier "
                        "in the processing pipeline. Crash Message: %s: %s\n%s"
                        % (e.__class__.__name__, str(e), trace),
                        COMPILER_CRASH)
            else:
                # assume Python crashes are programming mistakes, so let
                # the exceptions through
                result = compiler(syntax_tree)

    messages = syntax_tree.errors_sorted  # type: List[Error]
    # Obsolete, because RootNode adjusts error locations whenever an error is added:
    # adjust_error_locations(messages, original_text, source_mapping)
    return result, messages, ast