def test_python3_octal(): parser = ParserWithRecovery(load_grammar(), u'0o660') module = parser.get_parsed_node() if is_py3: assert module.children[0].children[0].type == 'number' else: assert module.children[0].type == 'error_node'
def test_python2_octal(): parser = ParserWithRecovery(load_grammar(), u'0660') first = parser.get_parsed_node().children[0] if is_py3: assert first.type == 'error_node' else: assert first.children[0].type == 'number'
def test_carriage_return_statements(): source = u(dedent(''' foo = 'ns1!' # this is a namespace package ''')) source = source.replace('\n', '\r\n') stmt = ParserWithRecovery(load_grammar(), source).module.statements[0] assert '#' not in stmt.get_code()
def _evaluate_for_statement_string(evaluator, string, module): code = dedent(""" def pseudo_docstring_stuff(): # Create a pseudo function for docstring statements. %s """) if string is None: return [] for element in re.findall('((?:\w+\.)*\w+)\.', string): # Try to import module part in dotted name. # (e.g., 'threading' in 'threading.Thread'). string = 'import %s\n' % element + string # Take the default grammar here, if we load the Python 2.7 grammar here, it # will be impossible to use `...` (Ellipsis) as a token. Docstring types # don't need to conform with the current grammar. p = ParserWithRecovery(load_grammar(), code % indent_block(string)) try: pseudo_cls = p.module.subscopes[0] # First pick suite, then simple_stmt (-2 for DEDENT) and then the node, # which is also not the last item, because there's a newline. stmt = pseudo_cls.children[-1].children[-2].children[-2] except (AttributeError, IndexError): return [] # Use the module of the param. # TODO this module is not the module of the param in case of a function # call. In that case it's the module of the function call. # stuffed with content from a function call. pseudo_cls.parent = module return list(_execute_types_in_stmt(evaluator, stmt))
def test_get_code(): """Use the same code that the parser also generates, to compare""" s = u('''"""a docstring""" class SomeClass(object, mixin): def __init__(self): self.xy = 3.0 """statement docstr""" def some_method(self): return 1 def yield_method(self): while hasattr(self, 'xy'): yield True for x in [1, 2]: yield x def empty(self): pass class Empty: pass class WithDocstring: """class docstr""" pass def method_with_docstring(): """class docstr""" pass ''') assert ParserWithRecovery(load_grammar(), s).module.get_code() == s
def _parse_part(self, source, parser_code, line_offset, nodes): """ Side effect: Alters the list of nodes. """ h = hash(source) for index, node in enumerate(nodes): if node.hash == h and node.source == source: node.reset_node() nodes.remove(node) parser_code = source break else: tokenizer = FastTokenizer(parser_code) self.number_parsers_used += 1 p = ParserWithRecovery(self._grammar, parser_code, self.module_path, tokenizer=tokenizer) end = line_offset + p.module.end_pos[0] used_lines = self._lines[line_offset:end - 1] code_part_actually_used = ''.join(used_lines) node = ParserNode(self.module, p, code_part_actually_used) indent = len(parser_code) - len(parser_code.lstrip('\t ')) self.current_node.add_node(node, line_offset, indent) self.current_node = node
def _load_faked_module(module): module_name = module.__name__ if module_name == '__builtin__' and not is_py3: module_name = 'builtins' try: return modules[module_name] except KeyError: path = os.path.dirname(os.path.abspath(__file__)) try: with open(os.path.join(path, 'fake', module_name) + '.pym') as f: source = f.read() except IOError: modules[module_name] = None return grammar = load_grammar(version='3.4') module = ParserWithRecovery(grammar, unicode(source), module_name).module modules[module_name] = module if module_name == 'builtins' and not is_py3: # There are two implementations of `open` for either python 2/3. # -> Rename the python2 version (`look at fake/builtins.pym`). open_func = search_scope(module, 'open') open_func.children[1] = FakeName('open_python3') open_func = search_scope(module, 'open_python2') open_func.children[1] = FakeName('open') return module
def test_end_pos_multi_line(self): parsed = ParserWithRecovery(load_grammar(), dedent(u(''' def testit(): a = """huhu asdfasdf""" + "h" '''))) tok = parsed.module.subscopes[0].statements[0].children[2].children[0] assert tok.end_pos == (4, 11)
def test_user_statement_on_import(): """github #285""" s = u("from datetime import (\n" " time)") for pos in [(2, 1), (2, 4)]: p = ParserWithRecovery(load_grammar(), s) stmt = p.module.get_statement_for_position(pos) assert isinstance(stmt, pt.Import) assert [str(n) for n in stmt.get_defined_names()] == ['time']
def test_sys_path_with_modifications(): SRC = dedent(u(""" import os """)) grammar = load_grammar() p = ParserWithRecovery(grammar, SRC) p.module.path = os.path.abspath(os.path.join(os.curdir, 'module_name.py')) paths = sys_path_with_modifications(Evaluator(grammar), p.module) assert '/tmp/.buildout/eggs/important_package.egg' in paths
def test_path_from_invalid_sys_path_assignment(): SRC = dedent(u(""" import sys sys.path = 'invalid'""")) grammar = load_grammar() p = ParserWithRecovery(grammar, SRC) paths = _check_module(Evaluator(grammar), p.module) assert len(paths) > 0 assert 'invalid' not in paths
def main(args): jedi.set_debug_function(notices=args['--debug']) with open(args['<file>']) as f: code = f.read() grammar = load_grammar() parser = ParserWithRecovery(grammar, u(code)) code = code + '\na\n' # Add something so the diff parser needs to run. lines = splitlines(code, keepends=True) cProfile.runctx('run(parser, lines)', globals(), locals(), sort=args['-s'])
def test_end_pos_one_line(self): parsed = ParserWithRecovery( load_grammar(), dedent( u(''' def testit(): a = "huhu" '''))) tok = parsed.module.subscopes[0].statements[0].children[2] assert tok.end_pos == (3, 14)
def _try_parse_part(self, until_line): """ Sets up a normal parser that uses a spezialized tokenizer to only parse until a certain position (or a bit longer if the statement hasn't ended. """ self._parser_count += 1 # TODO speed up, shouldn't copy the whole list all the time. # memoryview? parsed_until_line = self._nodes_stack.parsed_until_line lines_after = self._parser_lines_new[parsed_until_line:] #print('parse_content', parsed_until_line, lines_after, until_line) tokenizer = self._diff_tokenize(lines_after, until_line, line_offset=parsed_until_line) self._active_parser = ParserWithRecovery(self._grammar, source='\n', start_parsing=False) return self._active_parser.parse(tokenizer=tokenizer)
def __call__(self, grammar, source, module_path=None): pi = parser_cache.get(module_path, None) if pi is None or not settings.fast_parser: return ParserWithRecovery(grammar, source, module_path) parser = pi.parser d = DiffParser(parser) new_lines = splitlines(source, keepends=True) parser.module = parser._parsed = d.update(new_lines) return parser
def load(buildout_script): try: with open(buildout_script, 'rb') as f: source = common.source_to_unicode(f.read()) except IOError: debug.dbg('Error trying to read buildout_script: %s', buildout_script) return p = ParserWithRecovery(evaluator.grammar, source, buildout_script) save_parser(buildout_script, p) return p.module
def test_end_pos(): s = u( dedent(''' x = ['a', 'b', 'c'] def func(): y = None ''')) parser = ParserWithRecovery(load_grammar(), s) scope = parser.module.subscopes[0] assert scope.start_pos == (3, 0) assert scope.end_pos == (5, 0)
def check(src, result): # Python 2 tuple params should be ignored for now. grammar = load_grammar('%s.%s' % sys.version_info[:2]) m = ParserWithRecovery(grammar, u(src)).module if is_py3: assert not m.subscopes else: # We don't want b and c to be a part of the param enumeration. Just # ignore them, because it's not what we want to support in the # future. assert [str(param.name) for param in m.subscopes[0].params] == result
def test_end_pos_error_correction(): """ Source code without ending newline are given one, because the Python grammar needs it. However, they are removed again. We still want the right end_pos, even if something breaks in the parser (error correction). """ s = u('def x():\n .') m = ParserWithRecovery(load_grammar(), s).module func = m.children[0] assert func.type == 'funcdef' assert func.end_pos == (2, 2) assert m.end_pos == (2, 2)
def test_sys_path_with_modifications(): code = dedent(u(""" import os """)) path = os.path.abspath(os.path.join(os.curdir, 'module_name.py')) grammar = load_grammar() p = ParserWithRecovery(grammar, code, module_path=path) module_context = ModuleContext(Evaluator(grammar), p.module) paths = sys_path_with_modifications(module_context.evaluator, module_context) assert '/tmp/.buildout/eggs/important_package.egg' in paths
def __call__(self, grammar, source, module_path=None): if not settings.fast_parser: return ParserWithRecovery(grammar, source, module_path) pi = parser_cache.get(module_path, None) if pi is None or isinstance(pi.parser, ParserWithRecovery): p = super(CachedFastParser, self).__call__(grammar, source, module_path) else: p = pi.parser # pi is a `cache.ParserCacheItem` p.update(source) return p
def test_append_on_non_sys_path(): SRC = dedent( u(""" class Dummy(object): path = [] d = Dummy() d.path.append('foo')""")) grammar = load_grammar() p = ParserWithRecovery(grammar, SRC) paths = _check_module(Evaluator(grammar), p.module) assert len(paths) > 0 assert 'foo' not in paths
def test_end_pos_error_correction(): """ Source code without ending newline are given one, because the Python grammar needs it. However, they are removed again. We still want the right end_pos, even if something breaks in the parser (error correction). """ s = u('def x():\n .') m = ParserWithRecovery(load_grammar(), s).module func = m.children[0] assert func.type == 'funcdef' # This is not exactly correct, but ok, because it doesn't make a difference # at all. We just want to make sure that the module end_pos is correct! assert func.end_pos == (3, 0) assert m.end_pos == (2, 2)
def _get_typing_replacement_module(): """ The idea is to return our jedi replacement for the PEP-0484 typing module as discussed at https://github.com/davidhalter/jedi/issues/663 """ global _typing_module if _typing_module is None: typing_path = \ os.path.abspath(os.path.join(__file__, "../jedi_typing.py")) with open(typing_path) as f: code = _compatibility.unicode(f.read()) p = ParserWithRecovery(load_grammar(), code) _typing_module = p.module return _typing_module
def test_hex_values_in_docstring(): source = r''' def foo(object): """ \xff """ return 1 ''' doc = ParserWithRecovery(load_grammar(), dedent(u(source))).module.subscopes[0].raw_doc if is_py3: assert doc == '\xff' else: assert doc == u('�')
def collections_namedtuple(evaluator, obj, arguments): """ Implementation of the namedtuple function. This has to be done by processing the namedtuple class template and evaluating the result. .. note:: |jedi| only supports namedtuples on Python >2.6. """ # Namedtuples are not supported on Python 2.6 if not hasattr(collections, '_class_template'): return set() # Process arguments # TODO here we only use one of the types, we should use all. name = list(_follow_param(evaluator, arguments, 0))[0].obj _fields = list(_follow_param(evaluator, arguments, 1))[0] if isinstance(_fields, compiled.CompiledObject): fields = _fields.obj.replace(',', ' ').split() elif isinstance(_fields, iterable.AbstractSequence): fields = [ v.obj for lazy_context in _fields.py__iter__() for v in lazy_context.infer() if hasattr(v, 'obj') ] else: return set() # Build source source = collections._class_template.format( typename=name, field_names=fields, num_fields=len(fields), arg_list=', '.join(fields), repr_fmt=', '.join( collections._repr_template.format(name=name) for name in fields), field_defs='\n'.join( collections._field_template.format(index=index, name=name) for index, name in enumerate(fields))) # Parse source generated_class = ParserWithRecovery(evaluator.grammar, unicode(source)).module.subscopes[0] return set( [er.ClassContext(evaluator, generated_class, evaluator.BUILTINS)])
def test_quoted_strings(self): string_tokens = [ 'u"test"', 'u"""test"""', 'U"""test"""', "u'''test'''", "U'''test'''", ] for s in string_tokens: parsed = ParserWithRecovery(load_grammar(), u('''a = %s\n''' % s)) simple_stmt = parsed.module.children[0] expr_stmt = simple_stmt.children[0] assert len(expr_stmt.children) == 3 string_tok = expr_stmt.children[2] assert string_tok.type == 'string' assert string_tok.value == s assert string_tok.eval() == 'test'
def test_path_from_sys_path_assignment(): SRC = dedent( u(""" #!/usr/bin/python import sys sys.path[0:0] = [ '/usr/lib/python3.4/site-packages', '/home/test/.buildout/eggs/important_package.egg' ] path[0:0] = [1] import important_package if __name__ == '__main__': sys.exit(important_package.main())""")) grammar = load_grammar() p = ParserWithRecovery(grammar, SRC) paths = _check_module(Evaluator(grammar), p.module) assert 1 not in paths assert '/home/test/.buildout/eggs/important_package.egg' in paths
def _evaluate_for_statement_string(module_context, string): code = dedent( u(""" def pseudo_docstring_stuff(): # Create a pseudo function for docstring statements. {0} """)) if string is None: return [] for element in re.findall('((?:\w+\.)*\w+)\.', string): # Try to import module part in dotted name. # (e.g., 'threading' in 'threading.Thread'). string = 'import %s\n' % element + string # Take the default grammar here, if we load the Python 2.7 grammar here, it # will be impossible to use `...` (Ellipsis) as a token. Docstring types # don't need to conform with the current grammar. p = ParserWithRecovery(load_grammar(), code.format(indent_block(string))) try: funcdef = p.module.subscopes[0] # First pick suite, then simple_stmt and then the node, # which is also not the last item, because there's a newline. stmt = funcdef.children[-1].children[-1].children[-2] except (AttributeError, IndexError): return [] from jedi.evaluate.param import ValuesArguments from jedi.evaluate.representation import FunctionContext function_context = FunctionContext(module_context.evaluator, module_context, funcdef) func_execution_context = function_context.get_function_execution( ValuesArguments([])) # Use the module of the param. # TODO this module is not the module of the param in case of a function # call. In that case it's the module of the function call. # stuffed with content from a function call. return list(_execute_types_in_stmt(func_execution_context, stmt))
def _try_parse_part(self, until_line): """ Sets up a normal parser that uses a spezialized tokenizer to only parse until a certain position (or a bit longer if the statement hasn't ended. """ self._parser_count += 1 # TODO speed up, shouldn't copy the whole list all the time. # memoryview? parsed_until_line = self._nodes_stack.parsed_until_line lines_after = self._parser_lines_new[parsed_until_line:] #print('parse_content', parsed_until_line, lines_after, until_line) tokenizer = self._diff_tokenize( lines_after, until_line, line_offset=parsed_until_line ) self._active_parser = ParserWithRecovery( self._grammar, source='\n', start_parsing=False ) return self._active_parser.parse(tokenizer=tokenizer)
def get_import(self, source): return ParserWithRecovery(load_grammar(), source).module.imports[0]
def get_sub(self, source): return ParserWithRecovery(load_grammar(), u(source)).module.subscopes[0]
def test_started_lambda_stmt(): p = ParserWithRecovery(load_grammar(), u'lambda a, b: a i') assert p.get_parsed_node().children[0].type == 'error_node'
def get_call(self, source): # Get the simple_stmt and then the first one. simple_stmt = ParserWithRecovery(load_grammar(), u(source)).module.children[0] return simple_stmt.children[0]
def test_newline_positions(): endmarker = ParserWithRecovery(load_grammar(), u('a\n')).module.children[-1] assert endmarker.end_pos == (2, 0) new_line = endmarker.get_previous_leaf() assert new_line.start_pos == (1, 1) assert new_line.end_pos == (2, 0)
class DiffParser(object): def __init__(self, parser): self._parser = parser self._grammar = self._parser._grammar self._module = parser.get_root_node() def _reset(self): self._copy_count = 0 self._parser_count = 0 self._copied_ranges = [] self._new_used_names = {} self._nodes_stack = _NodesStack(self._module) def update(self, lines_new): ''' The algorithm works as follows: Equal: - Assure that the start is a newline, otherwise parse until we get one. - Copy from parsed_until_line + 1 to max(i2 + 1) - Make sure that the indentation is correct (e.g. add DEDENT) - Add old and change positions Insert: - Parse from parsed_until_line + 1 to min(j2 + 1), hopefully not much more. Returns the new module node. ''' debug.speed('diff parser start') self._parser_lines_new = lines_new self._added_newline = False if lines_new[-1] != '': # The Python grammar needs a newline at the end of a file, but for # everything else we keep working with lines_new here. self._parser_lines_new = list(lines_new) self._parser_lines_new[-1] += '\n' self._added_newline = True self._reset() line_length = len(lines_new) lines_old = splitlines(self._parser.source, keepends=True) sm = difflib.SequenceMatcher(None, lines_old, self._parser_lines_new) opcodes = sm.get_opcodes() debug.speed('diff parser calculated') debug.dbg('diff: line_lengths old: %s, new: %s' % (len(lines_old), line_length)) if len(opcodes) == 1 and opcodes[0][0] == 'equal': self._copy_count = 1 return self._module for operation, i1, i2, j1, j2 in opcodes: debug.dbg('diff %s old[%s:%s] new[%s:%s]', operation, i1 + 1, i2, j1 + 1, j2) if j2 == line_length + int(self._added_newline): # The empty part after the last newline is not relevant. j2 -= 1 if operation == 'equal': line_offset = j1 - i1 self._copy_from_old_parser(line_offset, i2, j2) elif operation == 'replace': self._parse(until_line=j2) elif operation == 'insert': self._parse(until_line=j2) else: assert operation == 'delete' # With this action all change will finally be applied and we have a # changed module. self._nodes_stack.close() self._cleanup() if self._added_newline: self._parser.remove_last_newline() self._parser.source = ''.join(lines_new) # Good for debugging. if debug.debug_function: self._enable_debugging(lines_old, lines_new) last_pos = self._module.end_pos[0] if last_pos != line_length: current_lines = splitlines(self._module.get_code(), keepends=True) diff = difflib.unified_diff(current_lines, lines_new) raise Exception( "There's an issue (%s != %s) with the diff parser. Please report:\n%s" % (last_pos, line_length, ''.join(diff)) ) debug.speed('diff parser end') return self._module def _enable_debugging(self, lines_old, lines_new): if self._module.get_code() != ''.join(lines_new): debug.warning('parser issue:\n%s\n%s', repr(''.join(lines_old)), repr(''.join(lines_new))) def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new): copied_nodes = [None] while until_line_new > self._nodes_stack.parsed_until_line: parsed_until_line_old = self._nodes_stack.parsed_until_line - line_offset line_stmt = self._get_old_line_stmt(parsed_until_line_old + 1) if line_stmt is None: # Parse 1 line at least. We don't need more, because we just # want to get into a state where the old parser has statements # again that can be copied (e.g. not lines within parentheses). self._parse(self._nodes_stack.parsed_until_line + 1) elif not copied_nodes: # We have copied as much as possible (but definitely not too # much). Therefore we just parse the rest. # We might not reach the end, because there's a statement # that is not finished. self._parse(until_line_new) else: p_children = line_stmt.parent.children index = p_children.index(line_stmt) copied_nodes = self._nodes_stack.copy_nodes( p_children[index:], until_line_old, line_offset ) # Match all the nodes that are in the wanted range. if copied_nodes: self._copy_count += 1 from_ = copied_nodes[0].get_start_pos_of_prefix()[0] + line_offset to = self._nodes_stack.parsed_until_line self._copied_ranges.append((from_, to)) debug.dbg('diff actually copy %s to %s', from_, to) def _get_old_line_stmt(self, old_line): leaf = self._module.get_leaf_for_position((old_line, 0), include_prefixes=True) if _ends_with_newline(leaf): leaf = leaf.get_next_leaf() if leaf.get_start_pos_of_prefix()[0] == old_line: node = leaf # TODO use leaf.get_definition one day when that one is working # well. while node.parent.type not in ('file_input', 'suite'): node = node.parent return node # Must be on the same line. Otherwise we need to parse that bit. return None def _get_before_insertion_node(self): if self._nodes_stack.is_empty(): return None line = self._nodes_stack.parsed_until_line + 1 node = self._new_module.last_leaf() while True: parent = node.parent if parent.type in ('suite', 'file_input'): assert node.end_pos[0] <= line assert node.end_pos[1] == 0 or '\n' in self._prefix return node node = parent def _parse(self, until_line): """ Parses at least until the given line, but might just parse more until a valid state is reached. """ while until_line > self._nodes_stack.parsed_until_line: node = self._try_parse_part(until_line) nodes = self._get_children_nodes(node) #self._insert_nodes(nodes) self._nodes_stack.add_parsed_nodes(nodes) debug.dbg( 'parse part %s to %s (to %s in parser)', nodes[0].get_start_pos_of_prefix()[0], self._nodes_stack.parsed_until_line, node.end_pos[0] - 1 ) _merge_used_names( self._new_used_names, node.used_names ) def _get_children_nodes(self, node): nodes = node.children first_element = nodes[0] # TODO this looks very strange... if first_element.type == 'error_leaf' and \ first_element.original_type == 'indent': assert False, str(nodes) return nodes def _try_parse_part(self, until_line): """ Sets up a normal parser that uses a spezialized tokenizer to only parse until a certain position (or a bit longer if the statement hasn't ended. """ self._parser_count += 1 # TODO speed up, shouldn't copy the whole list all the time. # memoryview? parsed_until_line = self._nodes_stack.parsed_until_line lines_after = self._parser_lines_new[parsed_until_line:] #print('parse_content', parsed_until_line, lines_after, until_line) tokenizer = self._diff_tokenize( lines_after, until_line, line_offset=parsed_until_line ) self._active_parser = ParserWithRecovery( self._grammar, source='\n', start_parsing=False ) return self._active_parser.parse(tokenizer=tokenizer) def _cleanup(self): """Add the used names from the old parser to the new one.""" copied_line_numbers = set() for l1, l2 in self._copied_ranges: copied_line_numbers.update(range(l1, l2 + 1)) new_used_names = self._new_used_names for key, names in self._module.used_names.items(): for name in names: if name.line in copied_line_numbers: new_used_names.setdefault(key, []).append(name) self._module.used_names = new_used_names def _diff_tokenize(self, lines, until_line, line_offset=0): is_first_token = True omitted_first_indent = False indents = [] l = iter(lines) tokens = generate_tokens(lambda: next(l, ''), use_exact_op_types=True) stack = self._active_parser.pgen_parser.stack for typ, string, start_pos, prefix in tokens: start_pos = start_pos[0] + line_offset, start_pos[1] if typ == INDENT: indents.append(start_pos[1]) if is_first_token: omitted_first_indent = True # We want to get rid of indents that are only here because # we only parse part of the file. These indents would only # get parsed as error leafs, which doesn't make any sense. is_first_token = False continue is_first_token = False if typ == DEDENT: indents.pop() if omitted_first_indent and not indents: # We are done here, only thing that can come now is an # endmarker or another dedented code block. typ, string, start_pos, prefix = next(tokens) if '\n' in prefix: prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix) else: prefix = '' yield TokenInfo(ENDMARKER, '', (start_pos[0] + line_offset, 0), prefix) break elif typ == NEWLINE and start_pos[0] >= until_line: yield TokenInfo(typ, string, start_pos, prefix) # Check if the parser is actually in a valid suite state. if suite_or_file_input_is_valid(self._grammar, stack): start_pos = start_pos[0] + 1, 0 while len(indents) > int(omitted_first_indent): indents.pop() yield TokenInfo(DEDENT, '', start_pos, '') yield TokenInfo(ENDMARKER, '', start_pos, '') break else: continue yield TokenInfo(typ, string, start_pos, prefix)
def test(source, end_pos): module = ParserWithRecovery(load_grammar(), u(source)).module assert module.get_code() == source assert module.end_pos == end_pos