Exemple #1
0
    def __call__(self, tokenStream):
        tokens = PrettyPrintingList()
        for token in tokenStream:
            if token.__class__ not in self._ignore:
                tokens.append(token)

        return (tokens, [])
Exemple #2
0
   def __call__(self, source):
      
      # This approach is more memory intensive but so much simpler to get right.
      characterStream = PrettyPrintingList()
      
      while True:
         # Be sure to get the position before reading the character.
         position = source.tell()
         char = source.read(1)

         if not char:
            break
         else:
            characterStream.append((char, position))
      
      characterStream.append((EOFSymbol, source.tell()))
      return (characterStream, [])
Exemple #3
0
   def __call__(self, charStream):
      tokenStream = PrettyPrintingList()
      issues = []

      # This offset can be done away with and instead iterate over the stream.
      offset = 0

      while offset < len(charStream):
         (numCharsMatched, tokenType) = self._longestMatch(charStream[offset:])

         if numCharsMatched:
            if tokenType != None:
               # XXX should this be an ASSERT tokenType?
               startIndex = charStream[offset][1]
               try:
                  endIndex = charStream[offset + numCharsMatched][1]
                  matched = charStream[offset : offset + numCharsMatched]
               except IndexError:
                  # It the file doesn't end with a newline.
                  endIndex = charStream[offset + numCharsMatched -1][1]
                  matched = charStream[offset : offset + numCharsMatched -1]


               # XXX this is kind of ugly. Only certain token types need to 
               # hold on the the original character information for their 
               # values. In some cases this even involved re-scanning (eg: 
               # numeric literals).
               lexeme = "".join([item[0] for item in matched])
               nextTok = tokenType(startIndex, endIndex, lexeme)

               tokenStream.append(nextTok)
            offset += numCharsMatched
         else:
            # XXX Lots more smarts here in error recovery.
            tokenStream = None
            issues.append(UnexpectedSymbolError(charStream[offset][1],\
                                                charStream[offset][1]))
            break

      # XXX jsut hack in the insertion of BOF and EOF for now
      if tokenStream != None:
         tokenStream.insert(0, token.BOF())
         tokenStream.append(token.EOF())
      return (tokenStream, issues)