class ModuleAnalyzer(object): # cache for analyzer objects -- caches both by module and file name cache = {} @classmethod def for_string(cls, string, modname, srcname='<string>'): if isinstance(string, bytes): return cls(BytesIO(string), modname, srcname) return cls(StringIO(string), modname, srcname, decoded=True) @classmethod def for_file(cls, filename, modname): if ('file', filename) in cls.cache: return cls.cache['file', filename] try: fileobj = open(filename, 'rb') except Exception as err: raise PycodeError('error opening %r' % filename, err) obj = cls(fileobj, modname, filename) cls.cache['file', filename] = obj return obj @classmethod def for_module(cls, modname): if ('module', modname) in cls.cache: entry = cls.cache['module', modname] if isinstance(entry, PycodeError): raise entry return entry try: type, source = get_module_source(modname) if type == 'string': obj = cls.for_string(source, modname) else: obj = cls.for_file(source, modname) except PycodeError as err: cls.cache['module', modname] = err raise cls.cache['module', modname] = obj return obj def __init__(self, source, modname, srcname, decoded=False): # name of the module self.modname = modname # name of the source file self.srcname = srcname # file-like object yielding source lines self.source = source # cache the source code as well pos = self.source.tell() if not decoded: self.encoding = detect_encoding(self.source.readline) self.source.seek(pos) self.code = self.source.read().decode(self.encoding) self.source.seek(pos) self.source = TextIOWrapper(self.source, self.encoding) else: self.encoding = None self.code = self.source.read() self.source.seek(pos) # will be filled by tokenize() self.tokens = None # will be filled by parse() self.parsetree = None # will be filled by find_attr_docs() self.attr_docs = None self.tagorder = None # will be filled by find_tags() self.tags = None def tokenize(self): """Generate tokens from the source.""" if self.tokens is not None: return try: self.tokens = list(tokenize.generate_tokens(self.source.readline)) except tokenize.TokenError as err: raise PycodeError('tokenizing failed', err) self.source.close() def parse(self): """Parse the generated source tokens.""" if self.parsetree is not None: return self.tokenize() try: self.parsetree = pydriver.parse_tokens(self.tokens) except parse.ParseError as err: raise PycodeError('parsing failed', err) def find_attr_docs(self, scope=''): """Find class and module-level attributes and their documentation.""" if self.attr_docs is not None: return self.attr_docs self.parse() attr_visitor = AttrDocVisitor(number2name, scope, self.encoding) attr_visitor.visit(self.parsetree) self.attr_docs = attr_visitor.collected self.tagorder = attr_visitor.tagorder # now that we found everything we could in the tree, throw it away # (it takes quite a bit of memory for large modules) self.parsetree = None return attr_visitor.collected def find_tags(self): """Find class, function and method definitions and their location.""" if self.tags is not None: return self.tags self.tokenize() result = {} namespace = [] stack = [] indent = 0 defline = False expect_indent = False def tokeniter(ignore=(token.COMMENT, token.NL)): for tokentup in self.tokens: if tokentup[0] not in ignore: yield tokentup tokeniter = tokeniter() for type, tok, spos, epos, line in tokeniter: if expect_indent: if type != token.INDENT: # no suite -- one-line definition assert stack dtype, fullname, startline, _ = stack.pop() endline = epos[0] namespace.pop() result[fullname] = (dtype, startline, endline) expect_indent = False if tok in ('def', 'class'): name = next(tokeniter)[1] namespace.append(name) fullname = '.'.join(namespace) stack.append((tok, fullname, spos[0], indent)) defline = True elif type == token.INDENT: expect_indent = False indent += 1 elif type == token.DEDENT: indent -= 1 # if the stacklevel is the same as it was before the last # def/class block, this dedent closes that block if stack and indent == stack[-1][3]: dtype, fullname, startline, _ = stack.pop() endline = spos[0] namespace.pop() result[fullname] = (dtype, startline, endline) elif type == token.NEWLINE: # if this line contained a definition, expect an INDENT # to start the suite; if there is no such INDENT # it's a one-line definition if defline: defline = False expect_indent = True self.tags = result return result
class ModuleAnalyzer(object): # cache for analyzer objects -- caches both by module and file name cache = {} @classmethod def for_string(cls, string, modname, srcname='<string>'): if isinstance(string, bytes): return cls(BytesIO(string), modname, srcname) return cls(StringIO(string), modname, srcname, decoded=True) @classmethod def for_file(cls, filename, modname): if ('file', filename) in cls.cache: return cls.cache['file', filename] try: fileobj = open(filename, 'rb') except Exception as err: raise PycodeError('error opening %r' % filename, err) obj = cls(fileobj, modname, filename) cls.cache['file', filename] = obj return obj @classmethod def for_module(cls, modname): if ('module', modname) in cls.cache: entry = cls.cache['module', modname] if isinstance(entry, PycodeError): raise entry return entry try: type, source = get_module_source(modname) if type == 'string': obj = cls.for_string(source, modname) else: obj = cls.for_file(source, modname) except PycodeError as err: cls.cache['module', modname] = err raise cls.cache['module', modname] = obj return obj def __init__(self, source, modname, srcname, decoded=False): # name of the module self.modname = modname # name of the source file self.srcname = srcname # file-like object yielding source lines self.source = source # cache the source code as well pos = self.source.tell() if not decoded: self.encoding = detect_encoding(self.source.readline) self.source.seek(pos) self.code = self.source.read().decode(self.encoding) self.source.seek(pos) self.source = TextIOWrapper(self.source, self.encoding) else: self.encoding = None self.code = self.source.read() self.source.seek(pos) # will be filled by tokenize() self.tokens = None # will be filled by parse() self.parsetree = None # will be filled by find_attr_docs() self.attr_docs = None self.tagorder = None # will be filled by find_tags() self.tags = None def tokenize(self): """Generate tokens from the source.""" if self.tokens is not None: return try: self.tokens = list(tokenize.generate_tokens(self.source.readline)) except tokenize.TokenError as err: raise PycodeError('tokenizing failed', err) self.source.close() def parse(self): """Parse the generated source tokens.""" if self.parsetree is not None: return self.tokenize() try: self.parsetree = pydriver.parse_tokens(self.tokens) except parse.ParseError as err: raise PycodeError('parsing failed', err) def find_attr_docs(self, scope=''): """Find class and module-level attributes and their documentation.""" if self.attr_docs is not None: return self.attr_docs self.parse() attr_visitor = AttrDocVisitor(number2name, scope, self.encoding) attr_visitor.visit(self.parsetree) self.attr_docs = attr_visitor.collected self.tagorder = attr_visitor.tagorder # now that we found everything we could in the tree, throw it away # (it takes quite a bit of memory for large modules) self.parsetree = None return attr_visitor.collected def find_tags(self): """Find class, function and method definitions and their location.""" if self.tags is not None: return self.tags self.tokenize() result = {} namespace = [] stack = [] indent = 0 defline = False expect_indent = False emptylines = 0 def tokeniter(ignore = (token.COMMENT,)): for tokentup in self.tokens: if tokentup[0] not in ignore: yield tokentup tokeniter = tokeniter() for type, tok, spos, epos, line in tokeniter: if expect_indent: if type != token.INDENT: # no suite -- one-line definition assert stack dtype, fullname, startline, _ = stack.pop() endline = epos[0] namespace.pop() result[fullname] = (dtype, startline, endline - emptylines) expect_indent = False if tok in ('def', 'class'): name = next(tokeniter)[1] namespace.append(name) fullname = '.'.join(namespace) stack.append((tok, fullname, spos[0], indent)) defline = True elif type == token.INDENT: expect_indent = False indent += 1 elif type == token.DEDENT: indent -= 1 # if the stacklevel is the same as it was before the last # def/class block, this dedent closes that block if stack and indent == stack[-1][3]: dtype, fullname, startline, _ = stack.pop() endline = spos[0] namespace.pop() result[fullname] = (dtype, startline, endline - emptylines) elif type == token.NEWLINE: # if this line contained a definition, expect an INDENT # to start the suite; if there is no such INDENT # it's a one-line definition if defline: defline = False expect_indent = True emptylines = 0 elif type == token.NL: # count up if line is empty or comment only if emptyline_re.match(line): emptylines += 1 else: emptylines = 0 self.tags = result return result