Python SourceData Examples, libxyz.parser.SourceData Python Examples

Example #1

0

Show file

File: regexp.py Project: me-vlad/xyzcmd

    def parse(self, source):
        """
        Parse config
        """

        _lineno = 0
        _source = SourceData(source, bytes=False)

        for _line in _source:
            _lineno += 1
            _line = _line.strip()

            # Empty line
            if not _line:
                continue

            _matched = False

            for _regexp in self.cbpool:
                _res = _regexp.search(_line)

                if _res is not None:
                    _matched = True
                    try:
                        self.cbpool[_regexp](_res)
                    except XYZValueError, e:
                        raise ParseError(_(u"%s: parse error on line %d: %s")\
                                         % (_source.desc(), _lineno, e))
                    else:
                        break

            if not _matched:
                raise ParseError(_(u"Unmatched line %d: %s") %
                                 (_lineno, _line))

Example #2

0

Show file

File: lexer.py Project: me-vlad/xyzcmd

    def __init__(self, source, tokens, comment=u"#", macro=u"&"):
        """
        @param source: Parsing source. If file object is passed, it must be
                       closed by caller function after parsing completes.
        @type source: string, file-like object or SourceData object

        @param tokens: List of tokens
        @type tokens: sequence

        @param comment: Comment char
        @param macro: Macros char
        """

        if isinstance(source, SourceData):
            self.sdata = source
        else:
            self.sdata = SourceData(source)

        self.tokens = tokens
        self.comment = comment
        self.macro = macro

        self._escapechar = u"\\"
        self._xqchar = u"'"
        self._xqcount = 3
        self._xqtotal = 0
        self._skip_next = 0

        # Should be set to True when done parsing
        self._done = False
        # Should be set to True when parsing id can use escaped characters
        self._can_escape = False
        self._escaped = False
        self._in_quote = False
        self._in_xquote = False
        self._in_comment = False
        # Keeps next token
        self._idt = []

Example #3

0

Show file

File: lexer.py Project: me-vlad/xyzcmd

class Lexer(object):
    """
    Lexical analyzer

    Lexer rules:
    -----------
    * Blank chars are usually ignored. Except from in quotes.
    * Quote can be one-line: "quoted value", or multiline:
      '''quoted value1,
         quoted value2,
      '''
    * New-line char ends commented line if any.
    * Values can be provided as simple literals or quoted ones.
    * If value contains spaces or any other non-alphanumeric values it is better
      to quote it or escape it using escapechar.
    * Variable can take list of values, separated by comma
    * Escaping can only be used in rval position.

    Macros:
    ------
    Macros are special internal variables that get expanded upon parsing.
    Macro definition is similar to variable definition, but macro char
    (default '&') is prepended to var name:
    &macro = value
    var = &macro
    """

    TOKEN_IDT = 0
    TOKEN_MACRO = 1

    def __init__(self, source, tokens, comment=u"#", macro=u"&"):
        """
        @param source: Parsing source. If file object is passed, it must be
                       closed by caller function after parsing completes.
        @type source: string, file-like object or SourceData object

        @param tokens: List of tokens
        @type tokens: sequence

        @param comment: Comment char
        @param macro: Macros char
        """

        if isinstance(source, SourceData):
            self.sdata = source
        else:
            self.sdata = SourceData(source)

        self.tokens = tokens
        self.comment = comment
        self.macro = macro

        self._escapechar = u"\\"
        self._xqchar = u"'"
        self._xqcount = 3
        self._xqtotal = 0
        self._skip_next = 0

        # Should be set to True when done parsing
        self._done = False
        # Should be set to True when parsing id can use escaped characters
        self._can_escape = False
        self._escaped = False
        self._in_quote = False
        self._in_xquote = False
        self._in_comment = False
        # Keeps next token
        self._idt = []

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    def lexer(self):
        """
        Scan input for lexemes and return to parser

        @return: typle (token_type, token_value)
        """

        def _token_type(tok):
            """
            Determine token type
            """

            _type = self.TOKEN_IDT
            _tok = tok

            if tok and self.macro and tok[0] == self.macro:
                _type = self.TOKEN_MACRO
                _tok = tok[1:]

            return (_type, _tok)

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

        _quoted = False

        for char in self.sdata:
            if self._done:
                self.unget(char)
                return None

            if self._in_comment and char != u"\n":
                continue

            if self._skip_next == 0:
                if 0 < self._xqtotal < self._xqcount:
                    if char != self._xqchar:
                        # Put read-ahead chars back
                        _back_tk = "%s%s" %(self._xqchar * self._xqtotal, char)
                        self.unget(_back_tk)
                        self._skip_next = len(_back_tk)
                        self._xqtotal = 0
                        continue

                if char == self._xqchar:
                    self._xqtotal += 1

                    # Assembled xquote
                    if self._xqtotal == self._xqcount:
                        if self._in_xquote:
                            # Finishing
                            self._in_xquote = False
                        else:
                            # Beginning
                            self._in_xquote = True
                            _quoted = True

                        self._xqtotal = 0

                    continue
            else:
                self._skip_next -= 1

            if self._in_xquote:
                self._idt.append(char)
                continue

            # Escape only when allowed, usually in values
            if self._can_escape:
                if self._escaped:
                    self._idt.append(char)
                    self._escaped = False
                    continue

                if char == self._escapechar:
                    self._escaped = True
                    continue

            if char == u"\n":
                if self._in_quote:
                    raise LexerError(_(u"Unterminated quote"))

                _token = None

                if self._idt or _quoted:
                    _token = u"".join(self._idt)
                    self._idt = []
                    _quoted = False
                else:
                    self._in_comment = False

                if char in self.tokens:
                    if _token is not None:
                        self.unget(char)
                    else:
                        _token = char

                if _token is not None:
                    return _token_type(_token)
                else:
                    continue

            if char == u'"':
                if self._in_quote:
                    self._in_quote = False
                else:
                    self._in_quote = True
                    _quoted = True

                continue

            if self._in_quote:
                self._idt.append(char)
                continue

            if char in self.tokens or char.isspace():
                _token = None

                # Check if we finished assembling the token
                if self._idt or _quoted:
                    _token = u"".join(self._idt)
                    self._idt = []
                    _quoted = False
                if not char.isspace():
                    if _token is not None:
                        self.unget(char)
                    else:
                        _token = char

                if _token is not None:
                    return _token_type(_token)
                else:
                    continue

            if char == self.comment and not self._in_xquote:
                # skip to the EOL
                self._in_comment = True
                continue

            self._idt.append(char)

        if self._idt:
            _token = u"".join(self._idt)
            self._idt = []
            return _token_type(_token)

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    def get_idt(self):
        """
        Return current state of token buffer
        """

        return self._idt

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    def done(self):
        """
        Order lexer to stop processing
        """

        self._done = True

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    def unget(self, token):
        """
        Put read token back to input stream
        """

        self.sdata.unget(token)

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    def escaping_on(self):
        """
        Enable escaping
        """

        self._can_escape = True

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    def escaping_off(self):
        """
        Disable escaping
        """

        self._can_escape = False