def tokenize(self, s): '''Tokenize comments, strings, identifiers, whitespace and operators.''' i, result = 0, [] while i < len(s): # Loop invariant: at end: j > i and s[i:j] is the new token. j = i ch = s[i] if ch in '@\n': # Make *sure* these are separate tokens. j += 1 elif ch == '#': # Preprocessor directive. j = g.skip_to_end_of_line(s, i) elif ch in ' \t': j = g.skip_ws(s, i) elif ch.isalpha() or ch == '_': j = g.skip_c_id(s, i) elif g.match(s, i, '//'): j = g.skip_line(s, i) elif g.match(s, i, '/*'): j = self.skip_block_comment(s, i) elif ch in "'\"": j = g.skip_string(s, i) else: j += 1 assert j > i result.append(''.join(s[i:j])) i = j # Advance. return result
def skipCodeBlock (self,s,i,kind): trace = False ; verbose = True # if trace: g.trace('***',g.callers()) startIndent = self.startSigIndent if trace: g.trace('startIndent',startIndent) assert startIndent is not None i = start = g.skip_ws_and_nl(s,i) parenCount = 0 underIndentedStart = None # The start of trailing underindented blank or comment lines. while i < len(s): progress = i ch = s[i] if g.is_nl(s,i): if trace and verbose: g.trace(g.get_line(s,i)) backslashNewline = (i > 0 and g.match(s,i-1,'\\\n')) if backslashNewline: # An underindented line, including docstring, # does not end the code block. i += 1 # 2010/11/01 else: i = g.skip_nl(s,i) j = g.skip_ws(s,i) if g.is_nl(s,j): pass # We have already made progress. else: i,underIndentedStart,breakFlag = self.pythonNewlineHelper( s,i,parenCount,startIndent,underIndentedStart) if breakFlag: break elif ch == '#': i = g.skip_to_end_of_line(s,i) elif ch == '"' or ch == '\'': i = g.skip_python_string(s,i) elif ch in '[{(': i += 1 ; parenCount += 1 # g.trace('ch',ch,parenCount) elif ch in ']})': i += 1 ; parenCount -= 1 # g.trace('ch',ch,parenCount) else: i += 1 assert(progress < i) # The actual end of the block. if underIndentedStart is not None: i = underIndentedStart if trace: g.trace('***backtracking to underindent range') if trace: g.trace(g.get_line(s,i)) if 0 < i < len(s) and not g.match(s,i-1,'\n'): g.trace('Can not happen: Python block does not end in a newline.') g.trace(g.get_line(s,i)) return i,False # 2010/02/19: Include all following material # until the next 'def' or 'class' i = self.skipToTheNextClassOrFunction(s,i,startIndent) if (trace or self.trace) and s[start:i].strip(): g.trace('%s returns\n' % (kind) + s[start:i]) return i,True
def tokenize(self, s): '''Tokenize comments, strings, identifiers, whitespace and operators.''' i, result = 0, [] while i < len(s): # Loop invariant: at end: j > i and s[i:j] is the new token. j = i ch = s[i] if ch in '@\n': # Make *sure* these are separate tokens. j += 1 elif ch == '#': # Preprocessor directive. j = g.skip_to_end_of_line(s, i) elif ch in ' \t': j = g.skip_ws(s, i) elif ch.isalpha() or ch == '_': j = g.skip_c_id(s, i) elif g.match(s, i, '//'): j = g.skip_line(s, i) elif g.match(s, i, '/*'): j = self.skip_block_comment(s, i) elif ch in "'\"": j = g.skip_string(s, i) else: j += 1 assert j > i result.append(''.join(s[i: j])) i = j # Advance. return result
def skipCodeBlock(self, s, i, kind): trace = False verbose = True # if trace: g.trace('***',g.callers()) startIndent = self.startSigIndent if trace: g.trace('startIndent', startIndent) assert startIndent is not None i = start = g.skip_ws_and_nl(s, i) parenCount = 0 underIndentedStart = None # The start of trailing underindented blank or comment lines. while i < len(s): progress = i ch = s[i] if g.is_nl(s, i): if trace and verbose: g.trace(g.get_line(s, i)) backslashNewline = (i > 0 and g.match(s, i - 1, '\\\n')) if backslashNewline: # An underindented line, including docstring, # does not end the code block. i += 1 # 2010/11/01 else: i = g.skip_nl(s, i) j = g.skip_ws(s, i) if g.is_nl(s, j): pass # We have already made progress. else: i, underIndentedStart, breakFlag = self.pythonNewlineHelper( s, i, parenCount, startIndent, underIndentedStart) if breakFlag: break elif ch == '#': i = g.skip_to_end_of_line(s, i) elif ch == '"' or ch == '\'': i = g.skip_python_string(s, i) elif ch in '[{(': i += 1 parenCount += 1 # g.trace('ch',ch,parenCount) elif ch in ']})': i += 1 parenCount -= 1 # g.trace('ch',ch,parenCount) else: i += 1 assert (progress < i) # The actual end of the block. if underIndentedStart is not None: i = underIndentedStart if trace: g.trace('***backtracking to underindent range') if trace: g.trace(g.get_line(s, i)) if 0 < i < len(s) and not g.match(s, i - 1, '\n'): g.trace('Can not happen: Python block does not end in a newline.') g.trace(g.get_line(s, i)) return i, False # 2010/02/19: Include all following material # until the next 'def' or 'class' i = self.skipToTheNextClassOrFunction(s, i, startIndent) if (trace or self.trace) and s[start:i].strip(): g.trace('%s returns\n' % (kind) + s[start:i]) return i, True