Ejemplo n.º 1
0
    def __init__(self, options_dict):
        o = dict(options_dict)

        options = {}
        for name, default in self._defaults.items():
            if name in o:
                value = o.pop(name)
                if isinstance(default,
                              bool) and name not in ('cache', 'use_bytes',
                                                     'propagate_positions'):
                    value = bool(value)
            else:
                value = default

            options[name] = value

        if isinstance(options['start'], STRING_TYPE):
            options['start'] = [options['start']]

        self.__dict__['options'] = options

        assert_config(self.parser, ('earley', 'lalr', 'cyk', None))

        if self.parser == 'earley' and self.transformer:
            raise ConfigurationError(
                'Cannot specify an embedded transformer when using the Earley algorithm.'
                'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)'
            )

        if o:
            raise ConfigurationError("Unknown options: %s" % o.keys())
Ejemplo n.º 2
0
    def __init__(self, grammar, **options):
        self.options = LarkOptions(options)

        # Set regex or re module
        use_regex = self.options.regex
        if use_regex:
            if regex:
                re_module = regex
            else:
                raise ImportError(
                    '`regex` module must be installed if calling `Lark(regex=True)`.'
                )
        else:
            re_module = re

        # Some, but not all file-like objects have a 'name' attribute
        if self.options.source_path is None:
            try:
                self.source_path = grammar.name
            except AttributeError:
                self.source_path = '<string>'
        else:
            self.source_path = self.options.source_path

        # Drain file-like objects to get their contents
        try:
            read = grammar.read
        except AttributeError:
            pass
        else:
            grammar = read()

        cache_fn = None
        cache_md5 = None
        if isinstance(grammar, STRING_TYPE):
            self.source_grammar = grammar
            if self.options.use_bytes:
                if not isascii(grammar):
                    raise ConfigurationError(
                        "Grammar must be ascii only, when use_bytes=True")
                if sys.version_info[
                        0] == 2 and self.options.use_bytes != 'force':
                    raise ConfigurationError(
                        "`use_bytes=True` may have issues on python2."
                        "Use `use_bytes='force'` to use it at your own risk.")

            if self.options.cache:
                if self.options.parser != 'lalr':
                    raise ConfigurationError(
                        "cache only works with parser='lalr' for now")

                unhashable = ('transformer', 'postlex', 'lexer_callbacks',
                              'edit_terminals')
                options_str = ''.join(k + str(v) for k, v in options.items()
                                      if k not in unhashable)
                from . import __version__
                s = grammar + options_str + __version__ + str(
                    sys.version_info[:2])
                cache_md5 = hashlib.md5(s.encode('utf8')).hexdigest()

                if isinstance(self.options.cache, STRING_TYPE):
                    cache_fn = self.options.cache
                else:
                    if self.options.cache is not True:
                        raise ConfigurationError(
                            "cache argument must be bool or str")
                    # Python2.7 doesn't support * syntax in tuples
                    cache_fn = tempfile.gettempdir(
                    ) + '/.lark_cache_%s_%s_%s.tmp' % (
                        (cache_md5, ) + sys.version_info[:2])

                if FS.exists(cache_fn):
                    logger.debug('Loading grammar from cache: %s', cache_fn)
                    # Remove options that aren't relevant for loading from cache
                    for name in (set(options) - _LOAD_ALLOWED_OPTIONS):
                        del options[name]
                    with FS.open(cache_fn, 'rb') as f:
                        old_options = self.options
                        try:
                            file_md5 = f.readline().rstrip(b'\n')
                            cached_used_files = pickle.load(f)
                            if file_md5 == cache_md5.encode(
                                    'utf8') and verify_used_files(
                                        cached_used_files):
                                cached_parser_data = pickle.load(f)
                                self._load(cached_parser_data, **options)
                                return
                        except Exception:  # We should probably narrow done which errors we catch here.
                            logger.exception(
                                "Failed to load Lark from cache: %r. We will try to carry on."
                                % cache_fn)

                            # In theory, the Lark instance might have been messed up by the call to `_load`.
                            # In practice the only relevant thing that might have been overriden should be `options`
                            self.options = old_options

            # Parse the grammar file and compose the grammars
            self.grammar, used_files = load_grammar(
                grammar, self.source_path, self.options.import_paths,
                self.options.keep_all_tokens)
        else:
            assert isinstance(grammar, Grammar)
            self.grammar = grammar

        if self.options.lexer == 'auto':
            if self.options.parser == 'lalr':
                self.options.lexer = 'contextual'
            elif self.options.parser == 'earley':
                if self.options.postlex is not None:
                    logger.info(
                        "postlex can't be used with the dynamic lexer, so we use standard instead. "
                        "Consider using lalr with contextual instead of earley"
                    )
                    self.options.lexer = 'standard'
                else:
                    self.options.lexer = 'dynamic'
            elif self.options.parser == 'cyk':
                self.options.lexer = 'standard'
            else:
                assert False, self.options.parser
        lexer = self.options.lexer
        if isinstance(lexer, type):
            assert issubclass(
                lexer, Lexer
            )  # XXX Is this really important? Maybe just ensure interface compliance
        else:
            assert_config(
                lexer,
                ('standard', 'contextual', 'dynamic', 'dynamic_complete'))
            if self.options.postlex is not None and 'dynamic' in lexer:
                raise ConfigurationError(
                    "Can't use postlex with a dynamic lexer. Use standard or contextual instead"
                )

        if self.options.ambiguity == 'auto':
            if self.options.parser == 'earley':
                self.options.ambiguity = 'resolve'
        else:
            assert_config(
                self.options.parser, ('earley', 'cyk'),
                "%r doesn't support disambiguation. Use one of these parsers instead: %s"
            )

        if self.options.priority == 'auto':
            self.options.priority = 'normal'

        if self.options.priority not in _VALID_PRIORITY_OPTIONS:
            raise ConfigurationError(
                "invalid priority option: %r. Must be one of %r" %
                (self.options.priority, _VALID_PRIORITY_OPTIONS))
        assert self.options.ambiguity not in (
            'resolve__antiscore_sum',
        ), 'resolve__antiscore_sum has been replaced with the option priority="invert"'
        if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS:
            raise ConfigurationError(
                "invalid ambiguity option: %r. Must be one of %r" %
                (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS))

        if self.options.postlex is not None:
            terminals_to_keep = set(self.options.postlex.always_accept)
        else:
            terminals_to_keep = set()

        # Compile the EBNF grammar into BNF
        self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(
            self.options.start, terminals_to_keep)

        if self.options.edit_terminals:
            for t in self.terminals:
                self.options.edit_terminals(t)

        self._terminals_dict = {t.name: t for t in self.terminals}

        # If the user asked to invert the priorities, negate them all here.
        # This replaces the old 'resolve__antiscore_sum' option.
        if self.options.priority == 'invert':
            for rule in self.rules:
                if rule.options.priority is not None:
                    rule.options.priority = -rule.options.priority
        # Else, if the user asked to disable priorities, strip them from the
        # rules. This allows the Earley parsers to skip an extra forest walk
        # for improved performance, if you don't need them (or didn't specify any).
        elif self.options.priority is None:
            for rule in self.rules:
                if rule.options.priority is not None:
                    rule.options.priority = None

        # TODO Deprecate lexer_callbacks?
        self.lexer_conf = LexerConf(self.terminals,
                                    re_module,
                                    self.ignore_tokens,
                                    self.options.postlex,
                                    self.options.lexer_callbacks,
                                    self.options.g_regex_flags,
                                    use_bytes=self.options.use_bytes)

        if self.options.parser:
            self.parser = self._build_parser()
        elif lexer:
            self.lexer = self._build_lexer()

        if cache_fn:
            logger.debug('Saving grammar to cache: %s', cache_fn)
            with FS.open(cache_fn, 'wb') as f:
                f.write(cache_md5.encode('utf8') + b'\n')
                pickle.dump(used_files, f)
                self.save(f)
Ejemplo n.º 3
0
 def __setattr__(self, name, value):
     assert_config(name, self.options.keys(),
                   "%r isn't a valid option. Expected one of: %s")
     self.options[name] = value