コード例 #1
0
ファイル: test_parser.py プロジェクト: bstaint/jedi
def test_python3_octal():
    parser = ParserWithRecovery(load_grammar(), u'0o660')
    module = parser.get_parsed_node()
    if is_py3:
        assert module.children[0].children[0].type == 'number'
    else:
        assert module.children[0].type == 'error_node'
コード例 #2
0
ファイル: test_parser.py プロジェクト: bstaint/jedi
def test_python2_octal():
    parser = ParserWithRecovery(load_grammar(), u'0660')
    first = parser.get_parsed_node().children[0]
    if is_py3:
        assert first.type == 'error_node'
    else:
        assert first.children[0].type == 'number'
コード例 #3
0
ファイル: test_parser.py プロジェクト: ANtlord/jedi
def test_carriage_return_statements():
    source = u(dedent('''
        foo = 'ns1!'

        # this is a namespace package
    '''))
    source = source.replace('\n', '\r\n')
    stmt = ParserWithRecovery(load_grammar(), source).module.statements[0]
    assert '#' not in stmt.get_code()
コード例 #4
0
def _evaluate_for_statement_string(evaluator, string, module):
    code = dedent("""
    def pseudo_docstring_stuff():
        # Create a pseudo function for docstring statements.
    %s
    """)
    if string is None:
        return []

    for element in re.findall('((?:\w+\.)*\w+)\.', string):
        # Try to import module part in dotted name.
        # (e.g., 'threading' in 'threading.Thread').
        string = 'import %s\n' % element + string

    # Take the default grammar here, if we load the Python 2.7 grammar here, it
    # will be impossible to use `...` (Ellipsis) as a token. Docstring types
    # don't need to conform with the current grammar.
    p = ParserWithRecovery(load_grammar(), code % indent_block(string))
    try:
        pseudo_cls = p.module.subscopes[0]
        # First pick suite, then simple_stmt (-2 for DEDENT) and then the node,
        # which is also not the last item, because there's a newline.
        stmt = pseudo_cls.children[-1].children[-2].children[-2]
    except (AttributeError, IndexError):
        return []

    # Use the module of the param.
    # TODO this module is not the module of the param in case of a function
    # call. In that case it's the module of the function call.
    # stuffed with content from a function call.
    pseudo_cls.parent = module
    return list(_execute_types_in_stmt(evaluator, stmt))
コード例 #5
0
def test_get_code():
    """Use the same code that the parser also generates, to compare"""
    s = u('''"""a docstring"""
class SomeClass(object, mixin):
    def __init__(self):
        self.xy = 3.0
        """statement docstr"""
    def some_method(self):
        return 1
    def yield_method(self):
        while hasattr(self, 'xy'):
            yield True
        for x in [1, 2]:
            yield x
    def empty(self):
        pass
class Empty:
    pass
class WithDocstring:
    """class docstr"""
    pass
def method_with_docstring():
    """class docstr"""
    pass
''')
    assert ParserWithRecovery(load_grammar(), s).module.get_code() == s
コード例 #6
0
    def _parse_part(self, source, parser_code, line_offset, nodes):
        """
        Side effect: Alters the list of nodes.
        """
        h = hash(source)
        for index, node in enumerate(nodes):
            if node.hash == h and node.source == source:
                node.reset_node()
                nodes.remove(node)
                parser_code = source
                break
        else:
            tokenizer = FastTokenizer(parser_code)
            self.number_parsers_used += 1
            p = ParserWithRecovery(self._grammar,
                                   parser_code,
                                   self.module_path,
                                   tokenizer=tokenizer)

            end = line_offset + p.module.end_pos[0]
            used_lines = self._lines[line_offset:end - 1]
            code_part_actually_used = ''.join(used_lines)

            node = ParserNode(self.module, p, code_part_actually_used)

        indent = len(parser_code) - len(parser_code.lstrip('\t '))

        self.current_node.add_node(node, line_offset, indent)
        self.current_node = node
コード例 #7
0
ファイル: fake.py プロジェクト: imdone/nuclide
def _load_faked_module(module):
    module_name = module.__name__
    if module_name == '__builtin__' and not is_py3:
        module_name = 'builtins'

    try:
        return modules[module_name]
    except KeyError:
        path = os.path.dirname(os.path.abspath(__file__))
        try:
            with open(os.path.join(path, 'fake', module_name) + '.pym') as f:
                source = f.read()
        except IOError:
            modules[module_name] = None
            return
        grammar = load_grammar(version='3.4')
        module = ParserWithRecovery(grammar, unicode(source),
                                    module_name).module
        modules[module_name] = module

        if module_name == 'builtins' and not is_py3:
            # There are two implementations of `open` for either python 2/3.
            # -> Rename the python2 version (`look at fake/builtins.pym`).
            open_func = search_scope(module, 'open')
            open_func.children[1] = FakeName('open_python3')
            open_func = search_scope(module, 'open_python2')
            open_func.children[1] = FakeName('open')
        return module
コード例 #8
0
ファイル: test_tokenize.py プロジェクト: zhangguosen3033/jedi
 def test_end_pos_multi_line(self):
     parsed = ParserWithRecovery(load_grammar(), dedent(u('''
     def testit():
         a = """huhu
     asdfasdf""" + "h"
     ''')))
     tok = parsed.module.subscopes[0].statements[0].children[2].children[0]
     assert tok.end_pos == (4, 11)
コード例 #9
0
def test_user_statement_on_import():
    """github #285"""
    s = u("from datetime import (\n" "    time)")

    for pos in [(2, 1), (2, 4)]:
        p = ParserWithRecovery(load_grammar(), s)
        stmt = p.module.get_statement_for_position(pos)
        assert isinstance(stmt, pt.Import)
        assert [str(n) for n in stmt.get_defined_names()] == ['time']
コード例 #10
0
def test_sys_path_with_modifications():
    SRC = dedent(u("""
        import os
    """))
    grammar = load_grammar()
    p = ParserWithRecovery(grammar, SRC)
    p.module.path = os.path.abspath(os.path.join(os.curdir, 'module_name.py'))
    paths = sys_path_with_modifications(Evaluator(grammar), p.module)
    assert '/tmp/.buildout/eggs/important_package.egg' in paths
コード例 #11
0
def test_path_from_invalid_sys_path_assignment():
    SRC = dedent(u("""
        import sys
        sys.path = 'invalid'"""))
    grammar = load_grammar()
    p = ParserWithRecovery(grammar, SRC)
    paths = _check_module(Evaluator(grammar), p.module)
    assert len(paths) > 0
    assert 'invalid' not in paths
コード例 #12
0
def main(args):
    jedi.set_debug_function(notices=args['--debug'])
    with open(args['<file>']) as f:
        code = f.read()
    grammar = load_grammar()
    parser = ParserWithRecovery(grammar, u(code))

    code = code + '\na\n'  # Add something so the diff parser needs to run.
    lines = splitlines(code, keepends=True)
    cProfile.runctx('run(parser, lines)', globals(), locals(), sort=args['-s'])
コード例 #13
0
 def test_end_pos_one_line(self):
     parsed = ParserWithRecovery(
         load_grammar(),
         dedent(
             u('''
     def testit():
         a = "huhu"
     ''')))
     tok = parsed.module.subscopes[0].statements[0].children[2]
     assert tok.end_pos == (3, 14)
コード例 #14
0
ファイル: diff.py プロジェクト: jiapei100/pythonVSCode
 def _try_parse_part(self, until_line):
     """
     Sets up a normal parser that uses a spezialized tokenizer to only parse
     until a certain position (or a bit longer if the statement hasn't
     ended.
     """
     self._parser_count += 1
     # TODO speed up, shouldn't copy the whole list all the time.
     # memoryview?
     parsed_until_line = self._nodes_stack.parsed_until_line
     lines_after = self._parser_lines_new[parsed_until_line:]
     #print('parse_content', parsed_until_line, lines_after, until_line)
     tokenizer = self._diff_tokenize(lines_after,
                                     until_line,
                                     line_offset=parsed_until_line)
     self._active_parser = ParserWithRecovery(self._grammar,
                                              source='\n',
                                              start_parsing=False)
     return self._active_parser.parse(tokenizer=tokenizer)
コード例 #15
0
ファイル: diff.py プロジェクト: jiapei100/pythonVSCode
    def __call__(self, grammar, source, module_path=None):
        pi = parser_cache.get(module_path, None)
        if pi is None or not settings.fast_parser:
            return ParserWithRecovery(grammar, source, module_path)

        parser = pi.parser
        d = DiffParser(parser)
        new_lines = splitlines(source, keepends=True)
        parser.module = parser._parsed = d.update(new_lines)
        return parser
コード例 #16
0
    def load(buildout_script):
        try:
            with open(buildout_script, 'rb') as f:
                source = common.source_to_unicode(f.read())
        except IOError:
            debug.dbg('Error trying to read buildout_script: %s', buildout_script)
            return

        p = ParserWithRecovery(evaluator.grammar, source, buildout_script)
        save_parser(buildout_script, p)
        return p.module
コード例 #17
0
def test_end_pos():
    s = u(
        dedent('''
                 x = ['a', 'b', 'c']
                 def func():
                     y = None
                 '''))
    parser = ParserWithRecovery(load_grammar(), s)
    scope = parser.module.subscopes[0]
    assert scope.start_pos == (3, 0)
    assert scope.end_pos == (5, 0)
コード例 #18
0
 def check(src, result):
     # Python 2 tuple params should be ignored for now.
     grammar = load_grammar('%s.%s' % sys.version_info[:2])
     m = ParserWithRecovery(grammar, u(src)).module
     if is_py3:
         assert not m.subscopes
     else:
         # We don't want b and c to be a part of the param enumeration. Just
         # ignore them, because it's not what we want to support in the
         # future.
         assert [str(param.name)
                 for param in m.subscopes[0].params] == result
コード例 #19
0
ファイル: test_parser.py プロジェクト: zhangguosen3033/jedi
def test_end_pos_error_correction():
    """
    Source code without ending newline are given one, because the Python
    grammar needs it. However, they are removed again. We still want the right
    end_pos, even if something breaks in the parser (error correction).
    """
    s = u('def x():\n .')
    m = ParserWithRecovery(load_grammar(), s).module
    func = m.children[0]
    assert func.type == 'funcdef'
    assert func.end_pos == (2, 2)
    assert m.end_pos == (2, 2)
コード例 #20
0
def test_sys_path_with_modifications():
    code = dedent(u("""
        import os
    """))

    path = os.path.abspath(os.path.join(os.curdir, 'module_name.py'))
    grammar = load_grammar()
    p = ParserWithRecovery(grammar, code, module_path=path)
    module_context = ModuleContext(Evaluator(grammar), p.module)
    paths = sys_path_with_modifications(module_context.evaluator,
                                        module_context)
    assert '/tmp/.buildout/eggs/important_package.egg' in paths
コード例 #21
0
    def __call__(self, grammar, source, module_path=None):
        if not settings.fast_parser:
            return ParserWithRecovery(grammar, source, module_path)

        pi = parser_cache.get(module_path, None)
        if pi is None or isinstance(pi.parser, ParserWithRecovery):
            p = super(CachedFastParser, self).__call__(grammar, source,
                                                       module_path)
        else:
            p = pi.parser  # pi is a `cache.ParserCacheItem`
            p.update(source)
        return p
コード例 #22
0
def test_append_on_non_sys_path():
    SRC = dedent(
        u("""
        class Dummy(object):
            path = []

        d = Dummy()
        d.path.append('foo')"""))
    grammar = load_grammar()
    p = ParserWithRecovery(grammar, SRC)
    paths = _check_module(Evaluator(grammar), p.module)
    assert len(paths) > 0
    assert 'foo' not in paths
コード例 #23
0
def test_end_pos_error_correction():
    """
    Source code without ending newline are given one, because the Python
    grammar needs it. However, they are removed again. We still want the right
    end_pos, even if something breaks in the parser (error correction).
    """
    s = u('def x():\n .')
    m = ParserWithRecovery(load_grammar(), s).module
    func = m.children[0]
    assert func.type == 'funcdef'
    # This is not exactly correct, but ok, because it doesn't make a difference
    # at all. We just want to make sure that the module end_pos is correct!
    assert func.end_pos == (3, 0)
    assert m.end_pos == (2, 2)
コード例 #24
0
ファイル: pep0484.py プロジェクト: nikimar1/cloneofg4g2019
def _get_typing_replacement_module():
    """
    The idea is to return our jedi replacement for the PEP-0484 typing module
    as discussed at https://github.com/davidhalter/jedi/issues/663
    """
    global _typing_module
    if _typing_module is None:
        typing_path = \
            os.path.abspath(os.path.join(__file__, "../jedi_typing.py"))
        with open(typing_path) as f:
            code = _compatibility.unicode(f.read())
        p = ParserWithRecovery(load_grammar(), code)
        _typing_module = p.module
    return _typing_module
コード例 #25
0
def test_hex_values_in_docstring():
    source = r'''
        def foo(object):
            """
             \xff
            """
            return 1
        '''

    doc = ParserWithRecovery(load_grammar(),
                             dedent(u(source))).module.subscopes[0].raw_doc
    if is_py3:
        assert doc == '\xff'
    else:
        assert doc == u('�')
コード例 #26
0
ファイル: stdlib.py プロジェクト: jiapei100/pythonVSCode
def collections_namedtuple(evaluator, obj, arguments):
    """
    Implementation of the namedtuple function.

    This has to be done by processing the namedtuple class template and
    evaluating the result.

    .. note:: |jedi| only supports namedtuples on Python >2.6.

    """
    # Namedtuples are not supported on Python 2.6
    if not hasattr(collections, '_class_template'):
        return set()

    # Process arguments
    # TODO here we only use one of the types, we should use all.
    name = list(_follow_param(evaluator, arguments, 0))[0].obj
    _fields = list(_follow_param(evaluator, arguments, 1))[0]
    if isinstance(_fields, compiled.CompiledObject):
        fields = _fields.obj.replace(',', ' ').split()
    elif isinstance(_fields, iterable.AbstractSequence):
        fields = [
            v.obj for lazy_context in _fields.py__iter__()
            for v in lazy_context.infer() if hasattr(v, 'obj')
        ]
    else:
        return set()

    # Build source
    source = collections._class_template.format(
        typename=name,
        field_names=fields,
        num_fields=len(fields),
        arg_list=', '.join(fields),
        repr_fmt=', '.join(
            collections._repr_template.format(name=name) for name in fields),
        field_defs='\n'.join(
            collections._field_template.format(index=index, name=name)
            for index, name in enumerate(fields)))

    # Parse source
    generated_class = ParserWithRecovery(evaluator.grammar,
                                         unicode(source)).module.subscopes[0]
    return set(
        [er.ClassContext(evaluator, generated_class, evaluator.BUILTINS)])
コード例 #27
0
    def test_quoted_strings(self):

        string_tokens = [
            'u"test"',
            'u"""test"""',
            'U"""test"""',
            "u'''test'''",
            "U'''test'''",
        ]

        for s in string_tokens:
            parsed = ParserWithRecovery(load_grammar(), u('''a = %s\n''' % s))
            simple_stmt = parsed.module.children[0]
            expr_stmt = simple_stmt.children[0]
            assert len(expr_stmt.children) == 3
            string_tok = expr_stmt.children[2]
            assert string_tok.type == 'string'
            assert string_tok.value == s
            assert string_tok.eval() == 'test'
コード例 #28
0
def test_path_from_sys_path_assignment():
    SRC = dedent(
        u("""
        #!/usr/bin/python

        import sys
        sys.path[0:0] = [
          '/usr/lib/python3.4/site-packages',
          '/home/test/.buildout/eggs/important_package.egg'
          ]

        path[0:0] = [1]

        import important_package

        if __name__ == '__main__':
            sys.exit(important_package.main())"""))
    grammar = load_grammar()
    p = ParserWithRecovery(grammar, SRC)
    paths = _check_module(Evaluator(grammar), p.module)
    assert 1 not in paths
    assert '/home/test/.buildout/eggs/important_package.egg' in paths
コード例 #29
0
ファイル: docstrings.py プロジェクト: sleepy771/jedi
def _evaluate_for_statement_string(module_context, string):
    code = dedent(
        u("""
    def pseudo_docstring_stuff():
        # Create a pseudo function for docstring statements.
    {0}
    """))
    if string is None:
        return []

    for element in re.findall('((?:\w+\.)*\w+)\.', string):
        # Try to import module part in dotted name.
        # (e.g., 'threading' in 'threading.Thread').
        string = 'import %s\n' % element + string

    # Take the default grammar here, if we load the Python 2.7 grammar here, it
    # will be impossible to use `...` (Ellipsis) as a token. Docstring types
    # don't need to conform with the current grammar.
    p = ParserWithRecovery(load_grammar(), code.format(indent_block(string)))
    try:
        funcdef = p.module.subscopes[0]
        # First pick suite, then simple_stmt and then the node,
        # which is also not the last item, because there's a newline.
        stmt = funcdef.children[-1].children[-1].children[-2]
    except (AttributeError, IndexError):
        return []

    from jedi.evaluate.param import ValuesArguments
    from jedi.evaluate.representation import FunctionContext
    function_context = FunctionContext(module_context.evaluator,
                                       module_context, funcdef)
    func_execution_context = function_context.get_function_execution(
        ValuesArguments([]))
    # Use the module of the param.
    # TODO this module is not the module of the param in case of a function
    # call. In that case it's the module of the function call.
    # stuffed with content from a function call.
    return list(_execute_types_in_stmt(func_execution_context, stmt))
コード例 #30
0
ファイル: diff.py プロジェクト: andrewmw94/configs
 def _try_parse_part(self, until_line):
     """
     Sets up a normal parser that uses a spezialized tokenizer to only parse
     until a certain position (or a bit longer if the statement hasn't
     ended.
     """
     self._parser_count += 1
     # TODO speed up, shouldn't copy the whole list all the time.
     # memoryview?
     parsed_until_line = self._nodes_stack.parsed_until_line
     lines_after = self._parser_lines_new[parsed_until_line:]
     #print('parse_content', parsed_until_line, lines_after, until_line)
     tokenizer = self._diff_tokenize(
         lines_after,
         until_line,
         line_offset=parsed_until_line
     )
     self._active_parser = ParserWithRecovery(
         self._grammar,
         source='\n',
         start_parsing=False
     )
     return self._active_parser.parse(tokenizer=tokenizer)
コード例 #31
0
 def get_import(self, source):
     return ParserWithRecovery(load_grammar(), source).module.imports[0]
コード例 #32
0
 def get_sub(self, source):
     return ParserWithRecovery(load_grammar(),
                               u(source)).module.subscopes[0]
コード例 #33
0
def test_started_lambda_stmt():
    p = ParserWithRecovery(load_grammar(), u'lambda a, b: a i')
    assert p.get_parsed_node().children[0].type == 'error_node'
コード例 #34
0
 def get_call(self, source):
     # Get the simple_stmt and then the first one.
     simple_stmt = ParserWithRecovery(load_grammar(),
                                      u(source)).module.children[0]
     return simple_stmt.children[0]
コード例 #35
0
ファイル: test_parser.py プロジェクト: bstaint/jedi
def test_started_lambda_stmt():
    p = ParserWithRecovery(load_grammar(), u'lambda a, b: a i')
    assert p.get_parsed_node().children[0].type == 'error_node'
コード例 #36
0
ファイル: test_parser.py プロジェクト: ANtlord/jedi
def test_newline_positions():
    endmarker = ParserWithRecovery(load_grammar(), u('a\n')).module.children[-1]
    assert endmarker.end_pos == (2, 0)
    new_line = endmarker.get_previous_leaf()
    assert new_line.start_pos == (1, 1)
    assert new_line.end_pos == (2, 0)
コード例 #37
0
ファイル: diff.py プロジェクト: andrewmw94/configs
class DiffParser(object):
    def __init__(self, parser):
        self._parser = parser
        self._grammar = self._parser._grammar
        self._module = parser.get_root_node()

    def _reset(self):
        self._copy_count = 0
        self._parser_count = 0

        self._copied_ranges = []
        self._new_used_names = {}
        self._nodes_stack = _NodesStack(self._module)

    def update(self, lines_new):
        '''
        The algorithm works as follows:

        Equal:
            - Assure that the start is a newline, otherwise parse until we get
              one.
            - Copy from parsed_until_line + 1 to max(i2 + 1)
            - Make sure that the indentation is correct (e.g. add DEDENT)
            - Add old and change positions
        Insert:
            - Parse from parsed_until_line + 1 to min(j2 + 1), hopefully not
              much more.

        Returns the new module node.
        '''
        debug.speed('diff parser start')
        self._parser_lines_new = lines_new
        self._added_newline = False
        if lines_new[-1] != '':
            # The Python grammar needs a newline at the end of a file, but for
            # everything else we keep working with lines_new here.
            self._parser_lines_new = list(lines_new)
            self._parser_lines_new[-1] += '\n'
            self._added_newline = True

        self._reset()

        line_length = len(lines_new)
        lines_old = splitlines(self._parser.source, keepends=True)
        sm = difflib.SequenceMatcher(None, lines_old, self._parser_lines_new)
        opcodes = sm.get_opcodes()
        debug.speed('diff parser calculated')
        debug.dbg('diff: line_lengths old: %s, new: %s' % (len(lines_old), line_length))

        if len(opcodes) == 1 and opcodes[0][0] == 'equal':
            self._copy_count = 1
            return self._module

        for operation, i1, i2, j1, j2 in opcodes:
            debug.dbg('diff %s old[%s:%s] new[%s:%s]',
                      operation, i1 + 1, i2, j1 + 1, j2)

            if j2 == line_length + int(self._added_newline):
                # The empty part after the last newline is not relevant.
                j2 -= 1

            if operation == 'equal':
                line_offset = j1 - i1
                self._copy_from_old_parser(line_offset, i2, j2)
            elif operation == 'replace':
                self._parse(until_line=j2)
            elif operation == 'insert':
                self._parse(until_line=j2)
            else:
                assert operation == 'delete'

        # With this action all change will finally be applied and we have a
        # changed module.
        self._nodes_stack.close()

        self._cleanup()
        if self._added_newline:
            self._parser.remove_last_newline()

        self._parser.source = ''.join(lines_new)

        # Good for debugging.
        if debug.debug_function:
            self._enable_debugging(lines_old, lines_new)
        last_pos = self._module.end_pos[0]
        if last_pos != line_length:
            current_lines = splitlines(self._module.get_code(), keepends=True)
            diff = difflib.unified_diff(current_lines, lines_new)
            raise Exception(
                "There's an issue (%s != %s) with the diff parser. Please report:\n%s"
                % (last_pos, line_length, ''.join(diff))
            )

        debug.speed('diff parser end')
        return self._module

    def _enable_debugging(self, lines_old, lines_new):
        if self._module.get_code() != ''.join(lines_new):
            debug.warning('parser issue:\n%s\n%s', repr(''.join(lines_old)),
                          repr(''.join(lines_new)))

    def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new):
        copied_nodes = [None]

        while until_line_new > self._nodes_stack.parsed_until_line:
            parsed_until_line_old = self._nodes_stack.parsed_until_line - line_offset
            line_stmt = self._get_old_line_stmt(parsed_until_line_old + 1)
            if line_stmt is None:
                # Parse 1 line at least. We don't need more, because we just
                # want to get into a state where the old parser has statements
                # again that can be copied (e.g. not lines within parentheses).
                self._parse(self._nodes_stack.parsed_until_line + 1)
            elif not copied_nodes:
                # We have copied as much as possible (but definitely not too
                # much). Therefore we just parse the rest.
                # We might not reach the end, because there's a statement
                # that is not finished.
                self._parse(until_line_new)
            else:
                p_children = line_stmt.parent.children
                index = p_children.index(line_stmt)

                copied_nodes = self._nodes_stack.copy_nodes(
                    p_children[index:],
                    until_line_old,
                    line_offset
                )
                # Match all the nodes that are in the wanted range.
                if copied_nodes:
                    self._copy_count += 1

                    from_ = copied_nodes[0].get_start_pos_of_prefix()[0] + line_offset
                    to = self._nodes_stack.parsed_until_line
                    self._copied_ranges.append((from_, to))

                    debug.dbg('diff actually copy %s to %s', from_, to)

    def _get_old_line_stmt(self, old_line):
        leaf = self._module.get_leaf_for_position((old_line, 0), include_prefixes=True)

        if _ends_with_newline(leaf):
            leaf = leaf.get_next_leaf()
        if leaf.get_start_pos_of_prefix()[0] == old_line:
            node = leaf
            # TODO use leaf.get_definition one day when that one is working
            # well.
            while node.parent.type not in ('file_input', 'suite'):
                node = node.parent
            return node
        # Must be on the same line. Otherwise we need to parse that bit.
        return None

    def _get_before_insertion_node(self):
        if self._nodes_stack.is_empty():
            return None

        line = self._nodes_stack.parsed_until_line + 1
        node = self._new_module.last_leaf()
        while True:
            parent = node.parent
            if parent.type in ('suite', 'file_input'):
                assert node.end_pos[0] <= line
                assert node.end_pos[1] == 0 or '\n' in self._prefix
                return node
            node = parent

    def _parse(self, until_line):
        """
        Parses at least until the given line, but might just parse more until a
        valid state is reached.
        """
        while until_line > self._nodes_stack.parsed_until_line:
            node = self._try_parse_part(until_line)
            nodes = self._get_children_nodes(node)
            #self._insert_nodes(nodes)

            self._nodes_stack.add_parsed_nodes(nodes)
            debug.dbg(
                'parse part %s to %s (to %s in parser)',
                nodes[0].get_start_pos_of_prefix()[0],
                self._nodes_stack.parsed_until_line,
                node.end_pos[0] - 1
            )
            _merge_used_names(
                self._new_used_names,
                node.used_names
            )

    def _get_children_nodes(self, node):
        nodes = node.children
        first_element = nodes[0]
        # TODO this looks very strange...
        if first_element.type == 'error_leaf' and \
                first_element.original_type == 'indent':
            assert False, str(nodes)

        return nodes

    def _try_parse_part(self, until_line):
        """
        Sets up a normal parser that uses a spezialized tokenizer to only parse
        until a certain position (or a bit longer if the statement hasn't
        ended.
        """
        self._parser_count += 1
        # TODO speed up, shouldn't copy the whole list all the time.
        # memoryview?
        parsed_until_line = self._nodes_stack.parsed_until_line
        lines_after = self._parser_lines_new[parsed_until_line:]
        #print('parse_content', parsed_until_line, lines_after, until_line)
        tokenizer = self._diff_tokenize(
            lines_after,
            until_line,
            line_offset=parsed_until_line
        )
        self._active_parser = ParserWithRecovery(
            self._grammar,
            source='\n',
            start_parsing=False
        )
        return self._active_parser.parse(tokenizer=tokenizer)

    def _cleanup(self):
        """Add the used names from the old parser to the new one."""
        copied_line_numbers = set()
        for l1, l2 in self._copied_ranges:
            copied_line_numbers.update(range(l1, l2 + 1))

        new_used_names = self._new_used_names
        for key, names in self._module.used_names.items():
            for name in names:
                if name.line in copied_line_numbers:
                    new_used_names.setdefault(key, []).append(name)
        self._module.used_names = new_used_names

    def _diff_tokenize(self, lines, until_line, line_offset=0):
        is_first_token = True
        omitted_first_indent = False
        indents = []
        l = iter(lines)
        tokens = generate_tokens(lambda: next(l, ''), use_exact_op_types=True)
        stack = self._active_parser.pgen_parser.stack
        for typ, string, start_pos, prefix in tokens:
            start_pos = start_pos[0] + line_offset, start_pos[1]
            if typ == INDENT:
                indents.append(start_pos[1])
                if is_first_token:
                    omitted_first_indent = True
                    # We want to get rid of indents that are only here because
                    # we only parse part of the file. These indents would only
                    # get parsed as error leafs, which doesn't make any sense.
                    is_first_token = False
                    continue
            is_first_token = False

            if typ == DEDENT:
                indents.pop()
                if omitted_first_indent and not indents:
                    # We are done here, only thing that can come now is an
                    # endmarker or another dedented code block.
                    typ, string, start_pos, prefix = next(tokens)
                    if '\n' in prefix:
                        prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix)
                    else:
                        prefix = ''
                    yield TokenInfo(ENDMARKER, '', (start_pos[0] + line_offset, 0), prefix)
                    break
            elif typ == NEWLINE and start_pos[0] >= until_line:
                yield TokenInfo(typ, string, start_pos, prefix)
                # Check if the parser is actually in a valid suite state.
                if suite_or_file_input_is_valid(self._grammar, stack):
                    start_pos = start_pos[0] + 1, 0
                    while len(indents) > int(omitted_first_indent):
                        indents.pop()
                        yield TokenInfo(DEDENT, '', start_pos, '')

                    yield TokenInfo(ENDMARKER, '', start_pos, '')
                    break
                else:
                    continue

            yield TokenInfo(typ, string, start_pos, prefix)
コード例 #38
0
ファイル: test_get_code.py プロジェクト: ANtlord/jedi
 def test(source, end_pos):
     module = ParserWithRecovery(load_grammar(), u(source)).module
     assert module.get_code() == source
     assert module.end_pos == end_pos