コード例 #1
0
ファイル: lexer.py プロジェクト: gcarothers/lepl
 def __init__(self, matcher, tokens, alphabet, discard, 
               t_regexp=None, s_regexp=None):
     '''
     matcher is the head of the original matcher graph, which will be called
     with a tokenised stream. 
     
     tokens is the set of `Token` instances that define the lexer.
     
     alphabet is the alphabet for which the regexps are defined.
     
     discard is the regular expression for spaces (which are silently
     dropped if not token can be matcher).
     
     t_regexp and s_regexp are internally compiled state, used in cloning,
     and should not be provided by non-cloning callers.
     '''
     super(Lexer, self).__init__(TOKENS, TokenNamespace)
     if t_regexp is None:
         unique = {}
         for token in tokens:
             token.compile(alphabet)
             self._debug(fmt('Token: {0}', token))
             # this just reduces the work for the regexp compiler
             unique[token.id_] = token
         t_regexp = Compiler.multiple(alphabet, 
                         [(t.id_, t.regexp) 
                          for t in unique.values() if t.regexp is not None]).dfa()
     if s_regexp is None and discard is not None:
         s_regexp = Compiler.single(alphabet, discard).dfa()
     self._arg(matcher=matcher)
     self._arg(tokens=tokens)
     self._arg(alphabet=alphabet)
     self._arg(discard=discard)
     self._karg(t_regexp=t_regexp)
     self._karg(s_regexp=s_regexp)
コード例 #2
0
def binary_parser(*regexps):
    '''
    Parse a set of binary regular expressions, returning the associated Regexp.
    '''
    return Compiler.multiple(BINARY,
                [Labelled(BINARY, label, *__compiled_binary_parser(text))
                 for (label, text) in regexps])
コード例 #3
0
 def __init__(self,
              matcher,
              tokens,
              alphabet,
              discard,
              t_regexp=None,
              s_regexp=None):
     '''
     matcher is the head of the original matcher graph, which will be called
     with a tokenised stream. 
     
     tokens is the set of `Token` instances that define the lexer.
     
     alphabet is the alphabet for which the regexps are defined.
     
     discard is the regular expression for spaces (which are silently
     dropped if not token can be matcher).
     
     t_regexp and s_regexp are internally compiled state, used in cloning,
     and should not be provided by non-cloning callers.
     '''
     super(Lexer, self).__init__(TOKENS, TokenNamespace)
     if t_regexp is None:
         unique = {}
         for token in tokens:
             token.compile(alphabet)
             self._debug(fmt('Token: {0}', token))
             # this just reduces the work for the regexp compiler
             unique[token.id_] = token
         t_regexp = Compiler.multiple(
             alphabet,
             [(t.id_, t.regexp)
              for t in unique.values() if t.regexp is not None]).dfa()
     if s_regexp is None and discard is not None:
         s_regexp = Compiler.single(alphabet, discard).dfa()
     self._arg(matcher=matcher)
     self._arg(tokens=tokens)
     self._arg(alphabet=alphabet)
     self._arg(discard=discard)
     self._karg(t_regexp=t_regexp)
     self._karg(s_regexp=s_regexp)