Esempio n. 1
0
 def tokenize(self, s):
     '''Tokenize comments, strings, identifiers, whitespace and operators.'''
     i, result = 0, []
     while i < len(s):
         # Loop invariant: at end: j > i and s[i:j] is the new token.
         j = i
         ch = s[i]
         if ch in '@\n':  # Make *sure* these are separate tokens.
             j += 1
         elif ch == '#':  # Preprocessor directive.
             j = g.skip_to_end_of_line(s, i)
         elif ch in ' \t':
             j = g.skip_ws(s, i)
         elif ch.isalpha() or ch == '_':
             j = g.skip_c_id(s, i)
         elif g.match(s, i, '//'):
             j = g.skip_line(s, i)
         elif g.match(s, i, '/*'):
             j = self.skip_block_comment(s, i)
         elif ch in "'\"":
             j = g.skip_string(s, i)
         else:
             j += 1
         assert j > i
         result.append(''.join(s[i:j]))
         i = j  # Advance.
     return result
Esempio n. 2
0
    def skipCodeBlock (self,s,i,kind):

        trace = False ; verbose = True
        # if trace: g.trace('***',g.callers())
        startIndent = self.startSigIndent
        if trace: g.trace('startIndent',startIndent)
        assert startIndent is not None
        i = start = g.skip_ws_and_nl(s,i)
        parenCount = 0
        underIndentedStart = None # The start of trailing underindented blank or comment lines.
        while i < len(s):
            progress = i
            ch = s[i]
            if g.is_nl(s,i):
                if trace and verbose: g.trace(g.get_line(s,i))
                backslashNewline = (i > 0 and g.match(s,i-1,'\\\n'))
                if backslashNewline:
                    # An underindented line, including docstring,
                    # does not end the code block.
                    i += 1 # 2010/11/01
                else:
                    i = g.skip_nl(s,i)
                    j = g.skip_ws(s,i)
                    if g.is_nl(s,j):
                        pass # We have already made progress.
                    else:
                        i,underIndentedStart,breakFlag = self.pythonNewlineHelper(
                            s,i,parenCount,startIndent,underIndentedStart)
                        if breakFlag: break
            elif ch == '#':
                i = g.skip_to_end_of_line(s,i)
            elif ch == '"' or ch == '\'':
                i = g.skip_python_string(s,i)
            elif ch in '[{(':
                i += 1 ; parenCount += 1
                # g.trace('ch',ch,parenCount)
            elif ch in ']})':
                i += 1 ; parenCount -= 1
                # g.trace('ch',ch,parenCount)
            else: i += 1
            assert(progress < i)

        # The actual end of the block.
        if underIndentedStart is not None:
            i = underIndentedStart
            if trace: g.trace('***backtracking to underindent range')
            if trace: g.trace(g.get_line(s,i))

        if 0 < i < len(s) and not g.match(s,i-1,'\n'):
            g.trace('Can not happen: Python block does not end in a newline.')
            g.trace(g.get_line(s,i))
            return i,False

        # 2010/02/19: Include all following material
        # until the next 'def' or 'class'
        i = self.skipToTheNextClassOrFunction(s,i,startIndent)

        if (trace or self.trace) and s[start:i].strip():
            g.trace('%s returns\n' % (kind) + s[start:i])
        return i,True
Esempio n. 3
0
 def tokenize(self, s):
     '''Tokenize comments, strings, identifiers, whitespace and operators.'''
     i, result = 0, []
     while i < len(s):
         # Loop invariant: at end: j > i and s[i:j] is the new token.
         j = i
         ch = s[i]
         if ch in '@\n': # Make *sure* these are separate tokens.
             j += 1
         elif ch == '#': # Preprocessor directive.
             j = g.skip_to_end_of_line(s, i)
         elif ch in ' \t':
             j = g.skip_ws(s, i)
         elif ch.isalpha() or ch == '_':
             j = g.skip_c_id(s, i)
         elif g.match(s, i, '//'):
             j = g.skip_line(s, i)
         elif g.match(s, i, '/*'):
             j = self.skip_block_comment(s, i)
         elif ch in "'\"":
             j = g.skip_string(s, i)
         else:
             j += 1
         assert j > i
         result.append(''.join(s[i: j]))
         i = j # Advance.
     return result
Esempio n. 4
0
 def skipCodeBlock(self, s, i, kind):
     trace = False
     verbose = True
     # if trace: g.trace('***',g.callers())
     startIndent = self.startSigIndent
     if trace: g.trace('startIndent', startIndent)
     assert startIndent is not None
     i = start = g.skip_ws_and_nl(s, i)
     parenCount = 0
     underIndentedStart = None  # The start of trailing underindented blank or comment lines.
     while i < len(s):
         progress = i
         ch = s[i]
         if g.is_nl(s, i):
             if trace and verbose: g.trace(g.get_line(s, i))
             backslashNewline = (i > 0 and g.match(s, i - 1, '\\\n'))
             if backslashNewline:
                 # An underindented line, including docstring,
                 # does not end the code block.
                 i += 1  # 2010/11/01
             else:
                 i = g.skip_nl(s, i)
                 j = g.skip_ws(s, i)
                 if g.is_nl(s, j):
                     pass  # We have already made progress.
                 else:
                     i, underIndentedStart, breakFlag = self.pythonNewlineHelper(
                         s, i, parenCount, startIndent, underIndentedStart)
                     if breakFlag: break
         elif ch == '#':
             i = g.skip_to_end_of_line(s, i)
         elif ch == '"' or ch == '\'':
             i = g.skip_python_string(s, i)
         elif ch in '[{(':
             i += 1
             parenCount += 1
             # g.trace('ch',ch,parenCount)
         elif ch in ']})':
             i += 1
             parenCount -= 1
             # g.trace('ch',ch,parenCount)
         else:
             i += 1
         assert (progress < i)
     # The actual end of the block.
     if underIndentedStart is not None:
         i = underIndentedStart
         if trace: g.trace('***backtracking to underindent range')
         if trace: g.trace(g.get_line(s, i))
     if 0 < i < len(s) and not g.match(s, i - 1, '\n'):
         g.trace('Can not happen: Python block does not end in a newline.')
         g.trace(g.get_line(s, i))
         return i, False
     # 2010/02/19: Include all following material
     # until the next 'def' or 'class'
     i = self.skipToTheNextClassOrFunction(s, i, startIndent)
     if (trace or self.trace) and s[start:i].strip():
         g.trace('%s returns\n' % (kind) + s[start:i])
     return i, True