def test_tokenize_multiline_I(self): # Make sure multiline string having newlines have the end marker on the # next line fundef = '''""""\n''' tokens = tokenize.source_tokens(fundef) token_list = list(tokens) assert token_list == [ TokenInfo(ERRORTOKEN, '""""\n', (1, 0), ''), TokenInfo(ENDMARKER, '', (2, 0), '') ]
def _diff_tokenize(self, lines, until_line, line_offset=0): is_first_token = True omitted_first_indent = False indents = [] l = iter(lines) tokens = generate_tokens(lambda: next(l, ''), use_exact_op_types=True) stack = self._active_parser.pgen_parser.stack for typ, string, start_pos, prefix in tokens: start_pos = start_pos[0] + line_offset, start_pos[1] if typ == INDENT: indents.append(start_pos[1]) if is_first_token: omitted_first_indent = True # We want to get rid of indents that are only here because # we only parse part of the file. These indents would only # get parsed as error leafs, which doesn't make any sense. is_first_token = False continue is_first_token = False if typ == DEDENT: indents.pop() if omitted_first_indent and not indents: # We are done here, only thing that can come now is an # endmarker or another dedented code block. typ, string, start_pos, prefix = next(tokens) if '\n' in prefix: prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix) else: prefix = '' yield TokenInfo(ENDMARKER, '', (start_pos[0] + line_offset, 0), prefix) break elif typ == NEWLINE and start_pos[0] >= until_line: yield TokenInfo(typ, string, start_pos, prefix) # Check if the parser is actually in a valid suite state. if suite_or_file_input_is_valid(self._grammar, stack): start_pos = start_pos[0] + 1, 0 while len(indents) > int(omitted_first_indent): indents.pop() yield TokenInfo(DEDENT, '', start_pos, '') yield TokenInfo(ENDMARKER, '', start_pos, '') break else: continue yield TokenInfo(typ, string, start_pos, prefix)