def _raw_parse(self): """Parse the source to find the interesting facts about its lines. A handful of member fields are updated. """ if self.exclude: self.excluded = self.lines_matching(self.exclude) indent = 0 exclude_indent = 0 excluding = False prev_toktype = token.INDENT first_line = None empty = True tokgen = tokenize.generate_tokens(StringIO(self.text).readline) for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: if self.show_tokens: print '%10s %5s %-20r %r' % (tokenize.tok_name.get( toktype, toktype), nice_pair( (slineno, elineno)), ttext, ltext) if toktype == token.INDENT: indent += 1 elif toktype == token.DEDENT: indent -= 1 elif toktype == token.NAME and ttext == 'class': self.classdefs.add(slineno) elif toktype == token.OP and ttext == ':': if not excluding and elineno in self.excluded: exclude_indent = indent excluding = True elif toktype == token.STRING and prev_toktype == token.INDENT: self.docstrings.update(range(slineno, elineno + 1)) elif toktype == token.NEWLINE: if first_line is not None and elineno != first_line: rng = (first_line, elineno) for l in range(first_line, elineno + 1): self.multiline[l] = rng first_line = None if ttext.strip() and toktype != tokenize.COMMENT: empty = False if first_line is None: first_line = slineno if excluding and indent <= exclude_indent: excluding = False if excluding: self.excluded.add(elineno) prev_toktype = toktype if not empty: self.statement_starts.update(self.byte_parser._find_statements())
def _handle_decorated(self, node): """Add arcs for things that can be decorated (classes and functions).""" main_line = last = node.lineno if node.decorator_list: if env.PYBEHAVIOR.trace_decorated_def: last = None for dec_node in node.decorator_list: dec_start = self.line_for_node(dec_node) if last is not None and dec_start != last: self.add_arc(last, dec_start) last = dec_start if env.PYBEHAVIOR.trace_decorated_def: self.add_arc(last, main_line) last = main_line # The definition line may have been missed, but we should have it # in `self.statements`. For some constructs, `line_for_node` is # not what we'd think of as the first line in the statement, so map # it to the first one. if node.body: body_start = self.line_for_node(node.body[0]) body_start = self.multiline.get(body_start, body_start) for lineno in range(last + 1, body_start): if lineno in self.statements: self.add_arc(last, lineno) last = lineno # The body is handled in collect_arcs. return {ArcStart(last)}
def _handle_decorated(self, node): """Add arcs for things that can be decorated (classes and functions).""" main_line = last = node.lineno if node.decorator_list: if env.PYBEHAVIOR.trace_decorated_def: last = None for dec_node in node.decorator_list: dec_start = self.line_for_node(dec_node) if last is not None and dec_start != last: self.add_arc(last, dec_start) last = dec_start if env.PYBEHAVIOR.trace_decorated_def: self.add_arc(last, main_line) last = main_line # The definition line may have been missed, but we should have it # in `self.statements`. For some constructs, `line_for_node` is # not what we'd think of as the first line in the statement, so map # it to the first one. if node.body: body_start = self.line_for_node(node.body[0]) body_start = self.multiline.get(body_start, body_start) for lineno in range(last+1, body_start): if lineno in self.statements: self.add_arc(last, lineno) last = lineno # The body is handled in collect_arcs. return set([ArcStart(last)])
def _raw_parse(self): if self.exclude: self.excluded = self.lines_matching(self.exclude) indent = 0 exclude_indent = 0 excluding = False prev_toktype = token.INDENT first_line = None empty = True tokgen = tokenize.generate_tokens(StringIO(self.text).readline) for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: if self.show_tokens: print '%10s %5s %-20r %r' % (tokenize.tok_name.get(toktype, toktype), nice_pair((slineno, elineno)), ttext, ltext) if toktype == token.INDENT: indent += 1 elif toktype == token.DEDENT: indent -= 1 elif toktype == token.NAME and ttext == 'class': self.classdefs.add(slineno) elif toktype == token.OP and ttext == ':': if not excluding and elineno in self.excluded: exclude_indent = indent excluding = True elif toktype == token.STRING and prev_toktype == token.INDENT: self.docstrings.update(range(slineno, elineno + 1)) elif toktype == token.NEWLINE: if first_line is not None and elineno != first_line: rng = (first_line, elineno) for l in range(first_line, elineno + 1): self.multiline[l] = rng first_line = None if ttext.strip() and toktype != tokenize.COMMENT: empty = False if first_line is None: first_line = slineno if excluding and indent <= exclude_indent: excluding = False if excluding: self.excluded.add(elineno) prev_toktype = toktype if not empty: self.statement_starts.update(self.byte_parser._find_statements())
def abs_file_dict(d): """Return a dict like d, but with keys modified by `abs_file`.""" # The call to litems() ensures that the GIL protects the dictionary # iterator against concurrent modifications by tracers running # in other threads. We try three times in case of concurrent # access, hoping to get a clean copy. runtime_err = None for _ in range(3): try: items = litems(d) except RuntimeError as ex: runtime_err = ex else: break else: raise runtime_err # pylint: disable=raising-bad-type return dict((abs_file(k), v) for k, v in items)
def _handle_decorated(self, node): """Add arcs for things that can be decorated (classes and functions).""" last = self.line_for_node(node) if node.decorator_list: for dec_node in node.decorator_list: dec_start = self.line_for_node(dec_node) if dec_start != last: self.add_arc(last, dec_start) last = dec_start # The definition line may have been missed, but we should have it # in `self.statements`. For some constructs, `line_for_node` is # not what we'd think of as the first line in the statement, so map # it to the first one. body_start = self.line_for_node(node.body[0]) body_start = self.multiline.get(body_start, body_start) for lineno in range(last + 1, body_start): if lineno in self.statements: self.add_arc(last, lineno) last = lineno # The body is handled in collect_arcs. return set([ArcStart(last, cause=None)])
def _handle_decorated(self, node): """Add arcs for things that can be decorated (classes and functions).""" last = self.line_for_node(node) if node.decorator_list: for dec_node in node.decorator_list: dec_start = self.line_for_node(dec_node) if dec_start != last: self.arcs.add((last, dec_start)) last = dec_start # The definition line may have been missed, but we should have it # in `self.statements`. For some constructs, `line_for_node` is # not what we'd think of as the first line in the statement, so map # it to the first one. body_start = self.line_for_node(node.body[0]) body_start = self.multiline.get(body_start, body_start) for lineno in range(last+1, body_start): if lineno in self.statements: self.arcs.add((last, lineno)) last = lineno # The body is handled in collect_arcs. return set([last])
def _raw_parse(self): """Parse the source to find the interesting facts about its lines. A handful of attributes are updated. """ # Find lines which match an exclusion pattern. if self.exclude: self.raw_excluded = self.lines_matching(self.exclude) # Tokenize, to find excluded suites, to find docstrings, and to find # multi-line statements. indent = 0 exclude_indent = 0 excluding = False excluding_decorators = False prev_toktype = token.INDENT first_line = None empty = True first_on_line = True tokgen = generate_tokens(self.text) for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: if self.show_tokens: # pragma: debugging print("%10s %5s %-20r %r" % (tokenize.tok_name.get( toktype, toktype), nice_pair( (slineno, elineno)), ttext, ltext)) if toktype == token.INDENT: indent += 1 elif toktype == token.DEDENT: indent -= 1 elif toktype == token.NAME: if ttext == 'class': # Class definitions look like branches in the bytecode, so # we need to exclude them. The simplest way is to note the # lines with the 'class' keyword. self.raw_classdefs.add(slineno) elif toktype == token.OP: if ttext == ':': should_exclude = ( elineno in self.raw_excluded) or excluding_decorators if not excluding and should_exclude: # Start excluding a suite. We trigger off of the colon # token so that the #pragma comment will be recognized on # the same line as the colon. self.raw_excluded.add(elineno) exclude_indent = indent excluding = True excluding_decorators = False elif ttext == '@' and first_on_line: # A decorator. if elineno in self.raw_excluded: excluding_decorators = True if excluding_decorators: self.raw_excluded.add(elineno) elif toktype == token.STRING and prev_toktype == token.INDENT: # Strings that are first on an indented line are docstrings. # (a trick from trace.py in the stdlib.) This works for # 99.9999% of cases. For the rest (!) see: # http://stackoverflow.com/questions/1769332/x/1769794#1769794 self.raw_docstrings.update(range(slineno, elineno + 1)) elif toktype == token.NEWLINE: if first_line is not None and elineno != first_line: # We're at the end of a line, and we've ended on a # different line than the first line of the statement, # so record a multi-line range. for l in range(first_line, elineno + 1): self._multiline[l] = first_line first_line = None first_on_line = True if ttext.strip() and toktype != tokenize.COMMENT: # A non-whitespace token. empty = False if first_line is None: # The token is not whitespace, and is the first in a # statement. first_line = slineno # Check whether to end an excluded suite. if excluding and indent <= exclude_indent: excluding = False if excluding: self.raw_excluded.add(elineno) first_on_line = False prev_toktype = toktype # Find the starts of the executable statements. if not empty: self.raw_statements.update(self.byte_parser._find_statements()) # The first line of modules can lie and say 1 always, even if the first # line of code is later. If so, map 1 to the actual first line of the # module. if env.PYBEHAVIOR.module_firstline_1 and self._multiline: self._multiline[1] = min(self.raw_statements)
def _split_into_chunks(self): """Split the code object into a list of `Chunk` objects. Each chunk is only entered at its first instruction, though there can be many exits from a chunk. Returns a list of `Chunk` objects. """ # The list of chunks so far, and the one we're working on. chunks = [] chunk = None # A dict mapping byte offsets of line starts to the line numbers. bytes_lines_map = dict(self._bytes_lines()) # The block stack: loops and try blocks get pushed here for the # implicit jumps that can occur. # Each entry is a tuple: (block type, destination) block_stack = [] # Some op codes are followed by branches that should be ignored. This # is a count of how many ignores are left. ignore_branch = 0 # We have to handle the last two bytecodes specially. ult = penult = None # Get a set of all of the jump-to points. jump_to = set() bytecodes = list(ByteCodes(self.code.co_code)) for bc in bytecodes: if bc.jump_to >= 0: jump_to.add(bc.jump_to) chunk_lineno = 0 # Walk the byte codes building chunks. for bc in bytecodes: # Maybe have to start a new chunk start_new_chunk = False first_chunk = False if bc.offset in bytes_lines_map: # Start a new chunk for each source line number. start_new_chunk = True chunk_lineno = bytes_lines_map[bc.offset] first_chunk = True elif bc.offset in jump_to: # To make chunks have a single entrance, we have to make a new # chunk when we get to a place some bytecode jumps to. start_new_chunk = True elif bc.op in OPS_CHUNK_BEGIN: # Jumps deserve their own unnumbered chunk. This fixes # problems with jumps to jumps getting confused. start_new_chunk = True if not chunk or start_new_chunk: if chunk: chunk.exits.add(bc.offset) chunk = Chunk(bc.offset, chunk_lineno, first_chunk) chunks.append(chunk) # Look at the opcode if bc.jump_to >= 0 and bc.op not in OPS_NO_JUMP: if ignore_branch: # Someone earlier wanted us to ignore this branch. ignore_branch -= 1 else: # The opcode has a jump, it's an exit for this chunk. chunk.exits.add(bc.jump_to) if bc.op in OPS_CODE_END: # The opcode can exit the code object. chunk.exits.add(-self.code.co_firstlineno) if bc.op in OPS_PUSH_BLOCK: # The opcode adds a block to the block_stack. block_stack.append((bc.op, bc.jump_to)) if bc.op in OPS_POP_BLOCK: # The opcode pops a block from the block stack. block_stack.pop() if bc.op in OPS_CHUNK_END: # This opcode forces the end of the chunk. if bc.op == OP_BREAK_LOOP: # A break is implicit: jump where the top of the # block_stack points. chunk.exits.add(block_stack[-1][1]) chunk = None if bc.op == OP_END_FINALLY: # For the finally clause we need to find the closest exception # block, and use its jump target as an exit. for block in reversed(block_stack): if block[0] in OPS_EXCEPT_BLOCKS: chunk.exits.add(block[1]) break if bc.op == OP_COMPARE_OP and bc.arg == COMPARE_EXCEPTION: # This is an except clause. We want to overlook the next # branch, so that except's don't count as branches. ignore_branch += 1 penult = ult ult = bc if chunks: # The last two bytecodes could be a dummy "return None" that # shouldn't be counted as real code. Every Python code object seems # to end with a return, and a "return None" is inserted if there # isn't an explicit return in the source. if ult and penult: if penult.op == OP_LOAD_CONST and ult.op == OP_RETURN_VALUE: if self.code.co_consts[penult.arg] is None: # This is "return None", but is it dummy? A real line # would be a last chunk all by itself. if chunks[-1].byte != penult.offset: ex = -self.code.co_firstlineno # Split the last chunk last_chunk = chunks[-1] last_chunk.exits.remove(ex) last_chunk.exits.add(penult.offset) chunk = Chunk( penult.offset, last_chunk.line, False ) chunk.exits.add(ex) chunks.append(chunk) # Give all the chunks a length. chunks[-1].length = bc.next_offset - chunks[-1].byte # pylint: disable=W0631,C0301 for i in range(len(chunks)-1): chunks[i].length = chunks[i+1].byte - chunks[i].byte #self.validate_chunks(chunks) return chunks
def _raw_parse(self): """Parse the source to find the interesting facts about its lines. A handful of member fields are updated. """ # Find lines which match an exclusion pattern. if self.exclude: self.excluded = self.lines_matching(self.exclude) # Tokenize, to find excluded suites, to find docstrings, and to find # multi-line statements. indent = 0 exclude_indent = 0 excluding = False prev_toktype = token.INDENT first_line = None empty = True tokgen = generate_tokens(self.text) for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: if self.show_tokens: # pragma: not covered print("%10s %5s %-20r %r" % ( tokenize.tok_name.get(toktype, toktype), nice_pair((slineno, elineno)), ttext, ltext )) if toktype == token.INDENT: indent += 1 elif toktype == token.DEDENT: indent -= 1 elif toktype == token.NAME and ttext == 'class': # Class definitions look like branches in the byte code, so # we need to exclude them. The simplest way is to note the # lines with the 'class' keyword. self.classdefs.add(slineno) elif toktype == token.OP and ttext == ':': if not excluding and elineno in self.excluded: # Start excluding a suite. We trigger off of the colon # token so that the #pragma comment will be recognized on # the same line as the colon. exclude_indent = indent excluding = True elif toktype == token.STRING and prev_toktype == token.INDENT: # Strings that are first on an indented line are docstrings. # (a trick from trace.py in the stdlib.) This works for # 99.9999% of cases. For the rest (!) see: # http://stackoverflow.com/questions/1769332/x/1769794#1769794 self.docstrings.update(range(slineno, elineno+1)) elif toktype == token.NEWLINE: if first_line is not None and elineno != first_line: # We're at the end of a line, and we've ended on a # different line than the first line of the statement, # so record a multi-line range. for l in range(first_line, elineno+1): self.multiline[l] = first_line first_line = None if ttext.strip() and toktype != tokenize.COMMENT: # A non-whitespace token. empty = False if first_line is None: # The token is not whitespace, and is the first in a # statement. first_line = slineno # Check whether to end an excluded suite. if excluding and indent <= exclude_indent: excluding = False if excluding: self.excluded.add(elineno) prev_toktype = toktype # Find the starts of the executable statements. if not empty: self.statement_starts.update(self.byte_parser._find_statements())
def _split_into_chunks(self): chunks = [] chunk = None bytes_lines_map = dict(self._bytes_lines()) block_stack = [] ignore_branch = 0 ult = penult = None jump_to = set() for bc in ByteCodes(self.code.co_code): if bc.jump_to >= 0: jump_to.add(bc.jump_to) chunk_lineno = 0 for bc in ByteCodes(self.code.co_code): start_new_chunk = False first_chunk = False if bc.offset in bytes_lines_map: start_new_chunk = True chunk_lineno = bytes_lines_map[bc.offset] first_chunk = True elif bc.offset in jump_to: start_new_chunk = True elif bc.op in OPS_CHUNK_BEGIN: start_new_chunk = True if not chunk or start_new_chunk: if chunk: chunk.exits.add(bc.offset) chunk = Chunk(bc.offset, chunk_lineno, first_chunk) chunks.append(chunk) if bc.jump_to >= 0 and bc.op not in OPS_NO_JUMP: if ignore_branch: ignore_branch -= 1 else: chunk.exits.add(bc.jump_to) if bc.op in OPS_CODE_END: chunk.exits.add(-self.code.co_firstlineno) if bc.op in OPS_PUSH_BLOCK: block_stack.append((bc.op, bc.jump_to)) if bc.op in OPS_POP_BLOCK: block_stack.pop() if bc.op in OPS_CHUNK_END: if bc.op == OP_BREAK_LOOP: chunk.exits.add(block_stack[-1][1]) chunk = None if bc.op == OP_END_FINALLY: for block in reversed(block_stack): if block[0] in OPS_EXCEPT_BLOCKS: chunk.exits.add(block[1]) break if bc.op == OP_COMPARE_OP and bc.arg == COMPARE_EXCEPTION: ignore_branch += 1 penult = ult ult = bc if chunks: if ult and penult: if penult.op == OP_LOAD_CONST and ult.op == OP_RETURN_VALUE: if self.code.co_consts[penult.arg] is None: if chunks[-1].byte != penult.offset: ex = -self.code.co_firstlineno last_chunk = chunks[-1] last_chunk.exits.remove(ex) last_chunk.exits.add(penult.offset) chunk = Chunk(penult.offset, last_chunk.line, False) chunk.exits.add(ex) chunks.append(chunk) chunks[-1].length = bc.next_offset - chunks[-1].byte for i in range(len(chunks) - 1): chunks[i].length = chunks[i + 1].byte - chunks[i].byte return chunks