Beispiel #1
0
def test_tokenize_multiline_II():
    # Make sure multiline string having no newlines have the end marker on
    # same line
    fundef = '''""""'''
    token_list = _get_token_list(fundef)
    assert token_list == [PythonToken(ERRORTOKEN, '""""', (1, 0), ''),
                          PythonToken(ENDMARKER,      '', (1, 4), '')]
Beispiel #2
0
def test_tokenize_multiline_III():
    # Make sure multiline string having newlines have the end marker on the
    # next line even if several newline
    fundef = '''""""\n\n'''
    token_list = _get_token_list(fundef)
    assert token_list == [PythonToken(ERRORTOKEN, '""""\n\n', (1, 0), ''),
                          PythonToken(ENDMARKER,          '', (3, 0), '')]
Beispiel #3
0
    def _diff_tokenize(self, lines, until_line, line_offset=0):
        was_newline = False
        indents = self._nodes_tree.indents
        initial_indentation_count = len(indents)

        tokens = self._tokenizer(
            lines,
            start_pos=(line_offset + 1, 0),
            indents=indents,
            is_first_token=line_offset == 0,
        )
        stack = self._active_parser.stack
        self._replace_tos_indent = None
        self._keyword_token_indents = {}
        # print('start', line_offset + 1, indents)
        for token in tokens:
            # print(token, indents)
            typ = token.type
            if typ == DEDENT:
                if len(indents) < initial_indentation_count:
                    # We are done here, only thing that can come now is an
                    # endmarker or another dedented code block.
                    while True:
                        typ, string, start_pos, prefix = token = next(tokens)
                        if typ in (DEDENT, ERROR_DEDENT):
                            if typ == ERROR_DEDENT:
                                # We want to force an error dedent in the next
                                # parser/pass. To make this possible we just
                                # increase the location by one.
                                self._replace_tos_indent = start_pos[1] + 1
                                pass
                        else:
                            break

                    if '\n' in prefix or '\r' in prefix:
                        prefix = re.sub(r'[^\n\r]+\Z', '', prefix)
                    else:
                        assert start_pos[1] >= len(prefix), repr(prefix)
                        if start_pos[1] - len(prefix) == 0:
                            prefix = ''
                    yield PythonToken(
                        ENDMARKER, '',
                        start_pos,
                        prefix
                    )
                    break
            elif typ == NEWLINE and token.start_pos[0] >= until_line:
                was_newline = True
            elif was_newline:
                was_newline = False
                if len(indents) == initial_indentation_count:
                    # Check if the parser is actually in a valid suite state.
                    if _suite_or_file_input_is_valid(self._pgen_grammar, stack):
                        yield PythonToken(ENDMARKER, '', token.start_pos, '')
                        break

            if typ == NAME and token.string in ('class', 'def'):
                self._keyword_token_indents[token.start_pos] = list(indents)

            yield token
Beispiel #4
0
    def _diff_tokenize(self, lines, until_line, line_offset=0):
        is_first_token = True
        omitted_first_indent = False
        indents = []
        tokens = self._tokenizer(lines, (1, 0))
        stack = self._active_parser.stack
        for typ, string, start_pos, prefix in tokens:
            start_pos = start_pos[0] + line_offset, start_pos[1]
            if typ == PythonTokenTypes.INDENT:
                indents.append(start_pos[1])
                if is_first_token:
                    omitted_first_indent = True
                    # We want to get rid of indents that are only here because
                    # we only parse part of the file. These indents would only
                    # get parsed as error leafs, which doesn't make any sense.
                    is_first_token = False
                    continue
            is_first_token = False

            # In case of omitted_first_indent, it might not be dedented fully.
            # However this is a sign for us that a dedent happened.
            if typ == PythonTokenTypes.DEDENT \
                    or typ == PythonTokenTypes.ERROR_DEDENT \
                    and omitted_first_indent and len(indents) == 1:
                indents.pop()
                if omitted_first_indent and not indents:
                    # We are done here, only thing that can come now is an
                    # endmarker or another dedented code block.
                    typ, string, start_pos, prefix = next(tokens)
                    if '\n' in prefix or '\r' in prefix:
                        prefix = re.sub(r'[^\n\r]+\Z', '', prefix)
                    else:
                        assert start_pos[1] >= len(prefix), repr(prefix)
                        if start_pos[1] - len(prefix) == 0:
                            prefix = ''
                    yield PythonToken(
                        PythonTokenTypes.ENDMARKER, '',
                        (start_pos[0] + line_offset, 0),
                        prefix
                    )
                    break
            elif typ == PythonTokenTypes.NEWLINE and start_pos[0] >= until_line:
                yield PythonToken(typ, string, start_pos, prefix)
                # Check if the parser is actually in a valid suite state.
                if _suite_or_file_input_is_valid(self._pgen_grammar, stack):
                    start_pos = start_pos[0] + 1, 0
                    while len(indents) > int(omitted_first_indent):
                        indents.pop()
                        yield PythonToken(PythonTokenTypes.DEDENT, '', start_pos, '')

                    yield PythonToken(PythonTokenTypes.ENDMARKER, '', start_pos, '')
                    break
                else:
                    continue

            yield PythonToken(typ, string, start_pos, prefix)
def tokenize_without_endmarker(code):
    safeword = 'ZZZ_USER_WANTS_TO_COMPLETE_HERE'
    for token in tokenize(code + safeword, (2, 7)):
        if token.string == safeword:
            return
        elif token.string.endswith(safeword):
            yield PythonToken(token.type, token.string[:-len(safeword)],
                              token.start_pos, token.prefix)
            return
        else:
            yield token
def tokenize_without_endmarker(code):
    safeword = 'ZZZ_USER_WANTS_TO_COMPLETE_HERE'
    grammar = load_grammar()
    tokens = grammar._tokenize(code + safeword)
    for token_ in tokens:
        if token_.string == safeword:
            return
        elif token_.string.endswith(safeword):
            yield PythonToken(token_.type, token_.string[:-len(safeword)],
                              token_.start_pos, token_.prefix)
            return
        else:
            yield token_