def parse(self, source): """ Parse config """ _lineno = 0 _source = SourceData(source, bytes=False) for _line in _source: _lineno += 1 _line = _line.strip() # Empty line if not _line: continue _matched = False for _regexp in self.cbpool: _res = _regexp.search(_line) if _res is not None: _matched = True try: self.cbpool[_regexp](_res) except XYZValueError, e: raise ParseError(_(u"%s: parse error on line %d: %s")\ % (_source.desc(), _lineno, e)) else: break if not _matched: raise ParseError(_(u"Unmatched line %d: %s") % (_lineno, _line))
def __init__(self, source, tokens, comment=u"#", macro=u"&"): """ @param source: Parsing source. If file object is passed, it must be closed by caller function after parsing completes. @type source: string, file-like object or SourceData object @param tokens: List of tokens @type tokens: sequence @param comment: Comment char @param macro: Macros char """ if isinstance(source, SourceData): self.sdata = source else: self.sdata = SourceData(source) self.tokens = tokens self.comment = comment self.macro = macro self._escapechar = u"\\" self._xqchar = u"'" self._xqcount = 3 self._xqtotal = 0 self._skip_next = 0 # Should be set to True when done parsing self._done = False # Should be set to True when parsing id can use escaped characters self._can_escape = False self._escaped = False self._in_quote = False self._in_xquote = False self._in_comment = False # Keeps next token self._idt = []
class Lexer(object): """ Lexical analyzer Lexer rules: ----------- * Blank chars are usually ignored. Except from in quotes. * Quote can be one-line: "quoted value", or multiline: '''quoted value1, quoted value2, ''' * New-line char ends commented line if any. * Values can be provided as simple literals or quoted ones. * If value contains spaces or any other non-alphanumeric values it is better to quote it or escape it using escapechar. * Variable can take list of values, separated by comma * Escaping can only be used in rval position. Macros: ------ Macros are special internal variables that get expanded upon parsing. Macro definition is similar to variable definition, but macro char (default '&') is prepended to var name: ¯o = value var = ¯o """ TOKEN_IDT = 0 TOKEN_MACRO = 1 def __init__(self, source, tokens, comment=u"#", macro=u"&"): """ @param source: Parsing source. If file object is passed, it must be closed by caller function after parsing completes. @type source: string, file-like object or SourceData object @param tokens: List of tokens @type tokens: sequence @param comment: Comment char @param macro: Macros char """ if isinstance(source, SourceData): self.sdata = source else: self.sdata = SourceData(source) self.tokens = tokens self.comment = comment self.macro = macro self._escapechar = u"\\" self._xqchar = u"'" self._xqcount = 3 self._xqtotal = 0 self._skip_next = 0 # Should be set to True when done parsing self._done = False # Should be set to True when parsing id can use escaped characters self._can_escape = False self._escaped = False self._in_quote = False self._in_xquote = False self._in_comment = False # Keeps next token self._idt = [] #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ def lexer(self): """ Scan input for lexemes and return to parser @return: typle (token_type, token_value) """ def _token_type(tok): """ Determine token type """ _type = self.TOKEN_IDT _tok = tok if tok and self.macro and tok[0] == self.macro: _type = self.TOKEN_MACRO _tok = tok[1:] return (_type, _tok) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _quoted = False for char in self.sdata: if self._done: self.unget(char) return None if self._in_comment and char != u"\n": continue if self._skip_next == 0: if 0 < self._xqtotal < self._xqcount: if char != self._xqchar: # Put read-ahead chars back _back_tk = "%s%s" %(self._xqchar * self._xqtotal, char) self.unget(_back_tk) self._skip_next = len(_back_tk) self._xqtotal = 0 continue if char == self._xqchar: self._xqtotal += 1 # Assembled xquote if self._xqtotal == self._xqcount: if self._in_xquote: # Finishing self._in_xquote = False else: # Beginning self._in_xquote = True _quoted = True self._xqtotal = 0 continue else: self._skip_next -= 1 if self._in_xquote: self._idt.append(char) continue # Escape only when allowed, usually in values if self._can_escape: if self._escaped: self._idt.append(char) self._escaped = False continue if char == self._escapechar: self._escaped = True continue if char == u"\n": if self._in_quote: raise LexerError(_(u"Unterminated quote")) _token = None if self._idt or _quoted: _token = u"".join(self._idt) self._idt = [] _quoted = False else: self._in_comment = False if char in self.tokens: if _token is not None: self.unget(char) else: _token = char if _token is not None: return _token_type(_token) else: continue if char == u'"': if self._in_quote: self._in_quote = False else: self._in_quote = True _quoted = True continue if self._in_quote: self._idt.append(char) continue if char in self.tokens or char.isspace(): _token = None # Check if we finished assembling the token if self._idt or _quoted: _token = u"".join(self._idt) self._idt = [] _quoted = False if not char.isspace(): if _token is not None: self.unget(char) else: _token = char if _token is not None: return _token_type(_token) else: continue if char == self.comment and not self._in_xquote: # skip to the EOL self._in_comment = True continue self._idt.append(char) if self._idt: _token = u"".join(self._idt) self._idt = [] return _token_type(_token) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ def get_idt(self): """ Return current state of token buffer """ return self._idt #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ def done(self): """ Order lexer to stop processing """ self._done = True #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ def unget(self, token): """ Put read token back to input stream """ self.sdata.unget(token) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ def escaping_on(self): """ Enable escaping """ self._can_escape = True #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ def escaping_off(self): """ Disable escaping """ self._can_escape = False