def test_str_lineno_trailing_1(): block = PythonBlock(dedent(r''' one """ three four """.split() six ''').lstrip(), startpos=(101, 1)) expected_statements = ( PythonStatement('one\n', startpos=(101, 1)), PythonStatement('"""\nthree\nfour\n""".split()\n', startpos=(102, 1)), PythonStatement('six\n', startpos=(106, 1)), ) assert block.statements == expected_statements literals = [(f.s, f.startpos) for f in block.string_literals()] expected_literals = [("\nthree\nfour\n", FilePos(102, 1))] assert literals == expected_literals
def test_FilePos_from_None_1(): pos = FilePos(None) assert pos.lineno == 1 assert pos.colno == 1
def test_FilePos_from_tuple_intint_1(): pos = FilePos((66,77)) assert pos.lineno == 66 assert pos.colno == 77
def test_FilePos_from_FilePos_1(): pos = FilePos(55,66) assert FilePos(pos) is pos
def test_FilePos_from_intint_1(): pos = FilePos(55,66) assert pos.lineno == 55 assert pos.colno == 66
def test_FilePos_bad_other_1(): with pytest.raises(TypeError): FilePos(object())
def test_FileText_endpos_offset_1(): text = FileText("foo\nbar\n", startpos=(101,55)) assert text.endpos == FilePos(103,1)
def _split_code_lines(ast_nodes, text): """ Split the given C{ast_nodes} and corresponding C{text} by code/noncode statement. Yield tuples of (nodes, subtext). C{nodes} is a list of C{ast.AST} nodes, length 0 or 1; C{subtext} is a L{FileText} sliced from C{text}. FileText(...))} for code lines and C{(None, FileText(...))} for non-code lines (comments and blanks). @type ast_nodes: sequence of C{ast.AST} nodes @type text: L{FileText} """ if not ast_nodes: yield ([], text) return assert text.startpos <= ast_nodes[0].startpos assert ast_nodes[-1].startpos < text.endpos if text.startpos != ast_nodes[0].startpos: # Starting noncode lines. yield ([], text[text.startpos:ast_nodes[0].startpos]) end_sentinel = _DummyAst_Node() end_sentinel.startpos = text.endpos for node, next_node in zip(ast_nodes, ast_nodes[1:] + [end_sentinel]): startpos = node.startpos next_startpos = next_node.startpos assert startpos < next_startpos # We have the start position of this node. Figure out the end # position, excluding noncode lines (standalone comments and blank # lines). if hasattr(node, 'endpos'): # We have an endpos for the node because this was a multi-line # string. Start with the node endpos. endpos = node.endpos assert startpos < endpos <= next_startpos # enpos points to the character *after* the ending quote, so we # know that this is never at the beginning of the line. assert endpos.colno != 1 # Advance past whitespace an inline comment, if any. Do NOT # advance past other code that could be on the same line, nor past # blank lines and comments on subsequent lines. line = text[endpos:min(text.endpos, FilePos(endpos.lineno + 1, 1))] if _is_comment_or_blank(line): endpos = FilePos(endpos.lineno + 1, 1) else: endpos = next_startpos assert endpos <= text.endpos # We don't have an endpos yet; what we do have is the next node's # startpos (or the position at the end of the text). Start there # and work backward. if endpos.colno != 1: if endpos == text.endpos: # There could be a comment on the last line and no # trailing newline. # TODO: do this in a more principled way. if _is_comment_or_blank(text[endpos.lineno]): assert startpos.lineno < endpos.lineno if not text[endpos.lineno - 1].endswith("\\"): endpos = FilePos(endpos.lineno, 1) else: # We're not at end of file, yet the next node starts in # the middle of the line. This should only happen with if # we're not looking at a comment. [The first character in # the line could still be "#" if we're inside a multiline # string that's the last child of the parent node. # Therefore we don't assert 'not # _is_comment_or_blank(...)'.] pass if endpos.colno == 1: while (endpos.lineno - 1 > startpos.lineno and _is_comment_or_blank(text[endpos.lineno - 1]) and (not text[endpos.lineno - 2].endswith("\\") or _is_comment_or_blank(text[endpos.lineno - 2]))): endpos = FilePos(endpos.lineno - 1, 1) assert startpos < endpos <= next_startpos yield ([node], text[startpos:endpos]) if endpos != next_startpos: yield ([], text[endpos:next_startpos])
def test_FileText_endpos_1(): text = FileText("foo\nbar\n") assert text.endpos == FilePos(3,1)
def test_FileText_endpos_trailing_partial_line_1(): text = FileText("foo\nbar") assert text.endpos == FilePos(2,4)
def test_FilePos_bad_type_1(): with pytest.raises(TypeError): FilePos("5","6") with pytest.raises(TypeError): FilePos(5.0, 6.0)
def test_FilePos_bad_too_many_args_1(): with pytest.raises(TypeError): FilePos(5,6,7) with pytest.raises(TypeError): FilePos((5,6,7))
def test_FilePos_bad_too_few_args_1(): with pytest.raises(TypeError): FilePos(5) with pytest.raises(TypeError): FilePos((5,))
def test_str_lineno_concatenated_1(): code = ''' "A" "a" "B" 'b' 'C' 'c' 'D' "d" """E e""" 'E' x = """F""" 'f' \'G\'\'\'\'g \'\'\' "G" x = """H h H""" 'h' \'\'\'H h\'\'\' "I" 'i'.split() "J" """j J""" 'j'.split() 'K' 'L' r"M" u'm' "N" ''"" "n" """"""\'\'\'\'\'\'\'N\'\'\' """ O """ """ P """ "Q" "q""" "R" "r""" + "S""""s""S""""s"""" S""" ''' if PY2: # ur"" is not valid syntax in Python 3 code += """\ Ur''' t''' """ # Implicit string concatenation of non-bytes and bytes literals is not # valid syntax in Python 3 code += '''\ r"U" u'u' b"""U""" ''' block = PythonBlock(dedent(code).lstrip(), startpos=(101,1)) expected_statements = ( PythonStatement('''"A" "a"\n''' , startpos=(101,1)), PythonStatement('''"B" 'b'\n''' , startpos=(102,1)), PythonStatement(''''C' 'c'\n''' , startpos=(103,1)), PythonStatement(''''D' "d"\n''' , startpos=(104,1)), PythonStatement('''"""E\ne""" 'E'\n''' , startpos=(105,1)), PythonStatement('''x = """F""" 'f'\n''' , startpos=(107,1)), PythonStatement("""'G''''g\n\n''' "G"\n""" , startpos=(108,1)), PythonStatement('''x = """H\nh\nH""" 'h' \'\'\'H\n\n\nh\'\'\'\n''', startpos=(111,1)), PythonStatement('''"I" 'i'.split()\n''' , startpos=(117,1)), PythonStatement('''"J" """j\nJ""" 'j'.split()\n''' , startpos=(118,1)), PythonStatement("""'K'\n""" , startpos=(120,1)), PythonStatement("""'L'\n""" , startpos=(121,1)), PythonStatement('''r"M" u\'m\'\n''' , startpos=(122,1)), PythonStatement('''"N" ''"" "n" """"""\'\'\'\'\'\'\'N\'\'\'\n''' , startpos=(123,1)), PythonStatement('''"""\nO\n"""\n''' , startpos=(124,1)), PythonStatement('''"""\nP\n"""\n''' , startpos=(127,1)), PythonStatement('''"Q" "q"""\n''' , startpos=(130,1)), PythonStatement('''"R" "r""" + "S""""s""S""""s""""\nS"""\n''' , startpos=(131,1)), ) if PY2: expected_statements += ( PythonStatement("""Ur'''\nt'''\n""" , startpos=(133,1)), PythonStatement('''r"U" u'u' b"""U"""\n''' , startpos=(135,1)), ) assert block.statements == expected_statements literals = [(f.s, f.startpos) for f in block.string_literals()] expected_literals = [ ("Aa", FilePos(101,1)), ("Bb", FilePos(102,1)), ("Cc", FilePos(103,1)), ("Dd", FilePos(104,1)), ("E\neE", FilePos(105,1)), ("Ff", FilePos(107,5)), ("Gg\n\nG", FilePos(108,1)), ("H\nh\nHhH\n\n\nh", FilePos(111,5)), ("Ii", FilePos(117,1)), ("Jj\nJj", FilePos(118,1)), ("K", FilePos(120,1)), ("L", FilePos(121,1)), ("Mm", FilePos(122,1)), ("NnN", FilePos(123,1)), ("\nO\n", FilePos(124,1)), ("\nP\n", FilePos(127,1)), ("Qq", FilePos(130,1)), ("Rr", FilePos(131,1)), ('Ss""Ss\nS', FilePos(131,13)), ] if PY2: expected_literals += [ ('\nt', FilePos(133, 1)), ("UuU", FilePos(135,1)), ] assert literals == expected_literals
def test_FileText_empty_1(): text = FileText("", startpos=(5,5)) assert text.lines == ("",) assert text.joined == "" assert text.startpos == text.endpos == FilePos(5,5)
def _annotate_ast_startpos(ast_node, parent_ast_node, minpos, text, flags): """ Annotate C{ast_node}. Set C{ast_node.startpos} to the starting position of the node within C{text}. For "typical" nodes, i.e. those other than multiline strings, this is simply FilePos(ast_node.lineno, ast_node.col_offset+1), but taking C{text.startpos} into account. For multiline string nodes, this function works by trying to parse all possible subranges of lines until finding the range that is syntactically valid and matches C{value}. The candidate range is text[min_start_lineno:lineno+text.startpos.lineno+1]. This function is unfortunately necessary because of a flaw in the output produced by the Python built-in parser. For some crazy reason, the C{ast_node.lineno} attribute represents something different for multiline string literals versus all other statements. For multiline string literal nodes and statements that are just a string expression (or more generally, nodes where the first descendant leaf node is a multiline string literal), the compiler attaches the ending line number as the value of the C{lineno} attribute. For all other than AST nodes, the compiler attaches the starting line number as the value of the C{lineno} attribute. This means e.g. the statement "'''foo\nbar'''" has a lineno value of 2, but the statement "x='''foo\nbar'''" has a lineno value of 1. @type ast_node: C{ast.AST} @type minpos: L{FilePos} @param minpos: Earliest position to check, in the number space of C{text}. @type text: L{FileText} @param text: Source text that was used to parse the AST, whose C{startpos} should be used in interpreting C{ast_node.lineno} (which always starts at 1 for the subset that was parsed). @type flags: C{CompilerFlags} @param flags: Compiler flags to use when re-compiling code. @return: C{True} if this node is a multiline string literal or the first child is such a node (recursively); C{False} otherwise. @raise ValueError: Could not find the starting line number. """ # First, traverse child nodes. If the first child node (recursively) is a # multiline string, then we need to transfer its information to this node. # Walk all nodes/fields of the AST. We implement this as a custom # depth-first search instead of using ast.walk() or ast.NodeVisitor # so that we can easily keep track of the preceding node's lineno. child_minpos = minpos is_first_child = True leftstr_node = None for child_node in _iter_child_nodes_in_order(ast_node): leftstr = _annotate_ast_startpos(child_node, ast_node, child_minpos, text, flags) if is_first_child and leftstr: leftstr_node = child_node if hasattr(child_node, 'lineno'): if child_node.startpos < child_minpos: raise AssertionError( "Got out-of-order AST node(s):\n" " parent minpos=%s\n" % minpos + " node: %s\n" % ast.dump(ast_node) + " fields: %s\n" % (" ".join(ast_node._fields)) + " children:\n" + ''.join(" %s %9s: %s\n" % (("==>" if cn is child_node else " "), getattr(cn, 'startpos', ""), ast.dump(cn)) for cn in _iter_child_nodes_in_order(ast_node)) + "\n" "This indicates a bug in pyflyby._\n" "\n" "pyflyby developer: Check if there's a bug or missing ast node handler in " "pyflyby._parse._iter_child_nodes_in_order() - " "probably the handler for ast.%s." % type(ast_node).__name__) child_minpos = child_node.startpos is_first_child = False # If the node has no lineno at all, then skip it. This should only happen # for nodes we don't care about, e.g. C{ast.Module} or C{ast.alias}. if not hasattr(ast_node, 'lineno'): return False # If col_offset is set then the lineno should be correct also. if ast_node.col_offset >= 0: # Not a multiline string literal. (I.e., it could be a non-string or # a single-line string.) # Easy. delta = (ast_node.lineno - 1, ast_node.col_offset) startpos = text.startpos + delta # Special case for 'with' statements. Consider the code: # with X: pass # ^0 ^5 # In python2.6, col_offset is 0. # In python2.7, col_offset is 5. # This is because python2.7 allows for multiple clauses: # with X, Y: pass # Since 'Y's col_offset isn't the beginning of the line, the authors # of Python presumably changed 'X's col_offset to also not be the # beginning of the line. If they had made the With ast node support # multiple clauses, they wouldn't have needed to do that, but then # that would introduce an API change in the AST. So it's # understandable that they did that. # Since we use startpos for breaking lines, we need to set startpos to # the beginning of the line. if (isinstance(ast_node, ast.With) and not isinstance(parent_ast_node, ast.With) and sys.version_info >= (2, 7)): assert ast_node.col_offset >= 5 if startpos.lineno == text.startpos.lineno: linestart = text.startpos.colno else: linestart = 1 line = text[(startpos.lineno, linestart):startpos] m = re.search(r"\bwith\s+$", str(line)) assert m lk = len(m.group()) # length of 'with ' including spaces startpos = FilePos(startpos.lineno, startpos.colno - lk) assert str(text[startpos:(startpos + (0, 4))]) == "with" ast_node.startpos = startpos return False assert ast_node.col_offset == -1 if leftstr_node: # This is an ast node where the leftmost deepest leaf is a # multiline string. The bug that multiline strings have broken # lineno/col_offset infects ancestors up the tree. # # If the leftmost leaf is a multi-line string, then C{lineno} # contains the ending line number, and col_offset is -1: # >>> ast.parse("""'''foo\nbar'''+blah""").body[0].lineno # 2 # But if the leftmost leaf is not a multi-line string, then # C{lineno} contains the starting line number: # >>> ast.parse("""'''foobar'''+blah""").body[0].lineno # 1 # >>> ast.parse("""blah+'''foo\nbar'''+blah""").body[0].lineno # 1 # # To fix that, we copy start_lineno and start_colno from the Str # node once we've corrected the values. assert not isinstance(ast_node, ast.Str) assert leftstr_node.lineno == ast_node.lineno assert leftstr_node.col_offset == -1 ast_node.startpos = leftstr_node.startpos return True # It should now be the case that we are looking at a multi-line string # literal. if not isinstance(ast_node, ast.Str): raise ValueError("got a non-string col_offset=-1: %s" % (ast.dump(ast_node))) # The C{lineno} attribute gives the ending line number of the multiline # string ... unless it's multiple multiline strings that are concatenated # by adjacency, in which case it's merely the end of the first one of # them. At least we know that the start lineno is definitely not later # than the C{lineno} attribute. first_end_lineno = text.startpos.lineno + ast_node.lineno - 1 # Compute possible start positions. # The starting line number of this string could be anywhere between the # end of the previous expression and C{first_end_lineno}. startpos_candidates = [] assert minpos.lineno <= first_end_lineno for start_lineno in range(minpos.lineno, first_end_lineno + 1): start_line = text[start_lineno] start_line_colno = (text.startpos.colno if start_lineno == text.startpos.lineno else 1) startpos_candidates.extend([ (m.group()[-1], FilePos(start_lineno, m.start() + start_line_colno)) for m in re.finditer("[bBrRuU]*[\"\']", start_line) ]) target_str = ast_node.s # Loop over possible end_linenos. The first one we've identified is the # by far most likely one, but in theory it could be anywhere later in the # file. This could be because of a dastardly concatenated string like # this: # """ # L1 # two # L2 # """ """ # L3 # four # L4 # five # L5 # six # L6 # """ # L7 # There are two substrings on L1:L3 and L3:L7. The parser gives us a # single concatenated string, but sets lineno to 3 instead of 7. We don't # have much to go on to figure out that the real end_lineno is 7. If we # don't find the string ending on L3, then search forward looking for the # real end of the string. Yuck! for end_lineno in range(first_end_lineno, text.endpos.lineno + 1): # Compute possible end positions. We're given the line we're ending # on, but not the column position. Note that the ending line could # contain more than just the string we're looking for -- including # possibly other strings or comments. end_line = text[end_lineno] end_line_startcol = (text.startpos.colno if end_lineno == text.startpos.lineno else 1) endpos_candidates = [(m.group(), FilePos(end_lineno, m.start() + end_line_startcol + 1)) for m in re.finditer("[\"\']", end_line)] if not endpos_candidates: # We found no endpos_candidates. This should not happen for # first_end_lineno because there should be _some_ string that ends # there. if end_lineno == first_end_lineno: raise AssertionError( "No quote char found on line with supposed string") continue # Filter and sort the possible startpos candidates given this endpos # candidate. It's possible for the starting quotechar and ending # quotechar to be different in case of adjacent string concatenation, # e.g. "foo"'''bar'''. That said, it's an unlikely case, so # deprioritize checking them. likely_candidates = [] unlikely_candidates = [] for end_quotechar, endpos in reversed(endpos_candidates): for start_quotechar, startpos in startpos_candidates: if not startpos < endpos: continue if start_quotechar == end_quotechar: candidate_list = likely_candidates else: candidate_list = unlikely_candidates candidate_list.append((startpos, endpos)) # Loop over sorted candidates. matched_prefix = set() for (startpos, endpos) in likely_candidates + unlikely_candidates: # Try to parse the given range and see if it matches the target # string literal. subtext = text[startpos:endpos] candidate_str = _test_parse_string_literal(subtext, flags) if candidate_str is None: continue elif target_str == candidate_str: # Success! ast_node.startpos = startpos ast_node.endpos = endpos # This node is a multiline string; and, it's a leaf, so by # definition it is the leftmost node. return True # all done elif target_str.startswith(candidate_str): matched_prefix.add(startpos) # We didn't find a string given the current end_lineno candidate. # Only continue checking the startpos candidates that so far produced # prefixes of the string we're looking for. if not matched_prefix: break startpos_candidates = [(sq, sp) for (sq, sp) in startpos_candidates if sp in matched_prefix] raise ValueError("Couldn't find exact position of %s" % (ast.dump(ast_node)))
def test_FileText_one_full_line_offset_1(): text = FileText("foo\n", startpos=(101,55)) assert text.endpos == FilePos(102,1)
def test_PythonStatement_startpos_1(): stmt = PythonStatement('foo()', startpos=(20, 30)) assert stmt.startpos == FilePos(20, 30) assert stmt.block.startpos == FilePos(20, 30) assert stmt.block.text.startpos == FilePos(20, 30)
def test_FileText_one_partial_line_offset_1(): text = FileText("foo", startpos=(101,55)) assert text.endpos == FilePos(101,58)
def test_str_lineno_concatenated_1(): block = PythonBlock(dedent(''' "A" "a" "B" 'b' 'C' 'c' 'D' "d" """E e""" 'E' x = """F""" 'f' \'G\'\'\'\'g \'\'\' "G" x = """H h H""" 'h' \'\'\'H h\'\'\' "I" 'i'.split() "J" """j J""" 'j'.split() 'K' 'L' r"M" u'm' b"""M""" Ur\'\'\' m\'\'\' "N" ''"" "n" """"""\'\'\'\'\'\'\'N\'\'\' """ O """ """ P """ "Q" "q""" "R" "r""" + "S""""s""S""""s"""" S""" ''').lstrip(), startpos=(101, 1)) expected_statements = ( PythonStatement('''"A" "a"\n''', startpos=(101, 1)), PythonStatement('''"B" 'b'\n''', startpos=(102, 1)), PythonStatement(''''C' 'c'\n''', startpos=(103, 1)), PythonStatement(''''D' "d"\n''', startpos=(104, 1)), PythonStatement('''"""E\ne""" 'E'\n''', startpos=(105, 1)), PythonStatement('''x = """F""" 'f'\n''', startpos=(107, 1)), PythonStatement("""'G''''g\n\n''' "G"\n""", startpos=(108, 1)), PythonStatement('''x = """H\nh\nH""" 'h' \'\'\'H\n\n\nh\'\'\'\n''', startpos=(111, 1)), PythonStatement('''"I" 'i'.split()\n''', startpos=(117, 1)), PythonStatement('''"J" """j\nJ""" 'j'.split()\n''', startpos=(118, 1)), PythonStatement("""'K'\n""", startpos=(120, 1)), PythonStatement("""'L'\n""", startpos=(121, 1)), PythonStatement('''r"M" u'm' b"""M""" Ur\'\'\'\nm\'\'\'\n''', startpos=(122, 1)), PythonStatement('''"N" ''"" "n" """"""\'\'\'\'\'\'\'N\'\'\'\n''', startpos=(124, 1)), PythonStatement('''"""\nO\n"""\n''', startpos=(125, 1)), PythonStatement('''"""\nP\n"""\n''', startpos=(128, 1)), PythonStatement('''"Q" "q"""\n''', startpos=(131, 1)), PythonStatement('''"R" "r""" + "S""""s""S""""s""""\nS"""\n''', startpos=(132, 1)), ) assert block.statements == expected_statements literals = [(f.s, f.startpos) for f in block.string_literals()] expected_literals = [ ("Aa", FilePos(101, 1)), ("Bb", FilePos(102, 1)), ("Cc", FilePos(103, 1)), ("Dd", FilePos(104, 1)), ("E\neE", FilePos(105, 1)), ("Ff", FilePos(107, 5)), ("Gg\n\nG", FilePos(108, 1)), ("H\nh\nHhH\n\n\nh", FilePos(111, 5)), ("Ii", FilePos(117, 1)), ("Jj\nJj", FilePos(118, 1)), ("K", FilePos(120, 1)), ("L", FilePos(121, 1)), ("MmM\nm", FilePos(122, 1)), ("NnN", FilePos(124, 1)), ("\nO\n", FilePos(125, 1)), ("\nP\n", FilePos(128, 1)), ("Qq", FilePos(131, 1)), ("Rr", FilePos(132, 1)), ('Ss""Ss\nS', FilePos(132, 13)), ] assert literals == expected_literals
def test_FilePos_from_empty_1(): pos = FilePos() assert pos.lineno == 1 assert pos.colno == 1