예제 #1
0
class ModuleAnalyzer(object):
    # cache for analyzer objects -- caches both by module and file name
    cache = {}

    @classmethod
    def for_string(cls, string, modname, srcname='<string>'):
        if isinstance(string, bytes):
            return cls(BytesIO(string), modname, srcname)
        return cls(StringIO(string), modname, srcname, decoded=True)

    @classmethod
    def for_file(cls, filename, modname):
        if ('file', filename) in cls.cache:
            return cls.cache['file', filename]
        try:
            fileobj = open(filename, 'rb')
        except Exception as err:
            raise PycodeError('error opening %r' % filename, err)
        obj = cls(fileobj, modname, filename)
        cls.cache['file', filename] = obj
        return obj

    @classmethod
    def for_module(cls, modname):
        if ('module', modname) in cls.cache:
            entry = cls.cache['module', modname]
            if isinstance(entry, PycodeError):
                raise entry
            return entry

        try:
            type, source = get_module_source(modname)
            if type == 'string':
                obj = cls.for_string(source, modname)
            else:
                obj = cls.for_file(source, modname)
        except PycodeError as err:
            cls.cache['module', modname] = err
            raise
        cls.cache['module', modname] = obj
        return obj

    def __init__(self, source, modname, srcname, decoded=False):
        # name of the module
        self.modname = modname
        # name of the source file
        self.srcname = srcname
        # file-like object yielding source lines
        self.source = source

        # cache the source code as well
        pos = self.source.tell()
        if not decoded:
            self.encoding = detect_encoding(self.source.readline)
            self.source.seek(pos)
            self.code = self.source.read().decode(self.encoding)
            self.source.seek(pos)
            self.source = TextIOWrapper(self.source, self.encoding)
        else:
            self.encoding = None
            self.code = self.source.read()
            self.source.seek(pos)

        # will be filled by tokenize()
        self.tokens = None
        # will be filled by parse()
        self.parsetree = None
        # will be filled by find_attr_docs()
        self.attr_docs = None
        self.tagorder = None
        # will be filled by find_tags()
        self.tags = None

    def tokenize(self):
        """Generate tokens from the source."""
        if self.tokens is not None:
            return
        try:
            self.tokens = list(tokenize.generate_tokens(self.source.readline))
        except tokenize.TokenError as err:
            raise PycodeError('tokenizing failed', err)
        self.source.close()

    def parse(self):
        """Parse the generated source tokens."""
        if self.parsetree is not None:
            return
        self.tokenize()
        try:
            self.parsetree = pydriver.parse_tokens(self.tokens)
        except parse.ParseError as err:
            raise PycodeError('parsing failed', err)

    def find_attr_docs(self, scope=''):
        """Find class and module-level attributes and their documentation."""
        if self.attr_docs is not None:
            return self.attr_docs
        self.parse()
        attr_visitor = AttrDocVisitor(number2name, scope, self.encoding)
        attr_visitor.visit(self.parsetree)
        self.attr_docs = attr_visitor.collected
        self.tagorder = attr_visitor.tagorder
        # now that we found everything we could in the tree, throw it away
        # (it takes quite a bit of memory for large modules)
        self.parsetree = None
        return attr_visitor.collected

    def find_tags(self):
        """Find class, function and method definitions and their location."""
        if self.tags is not None:
            return self.tags
        self.tokenize()
        result = {}
        namespace = []
        stack = []
        indent = 0
        defline = False
        expect_indent = False

        def tokeniter(ignore=(token.COMMENT, token.NL)):
            for tokentup in self.tokens:
                if tokentup[0] not in ignore:
                    yield tokentup

        tokeniter = tokeniter()
        for type, tok, spos, epos, line in tokeniter:
            if expect_indent:
                if type != token.INDENT:
                    # no suite -- one-line definition
                    assert stack
                    dtype, fullname, startline, _ = stack.pop()
                    endline = epos[0]
                    namespace.pop()
                    result[fullname] = (dtype, startline, endline)
                expect_indent = False
            if tok in ('def', 'class'):
                name = next(tokeniter)[1]
                namespace.append(name)
                fullname = '.'.join(namespace)
                stack.append((tok, fullname, spos[0], indent))
                defline = True
            elif type == token.INDENT:
                expect_indent = False
                indent += 1
            elif type == token.DEDENT:
                indent -= 1
                # if the stacklevel is the same as it was before the last
                # def/class block, this dedent closes that block
                if stack and indent == stack[-1][3]:
                    dtype, fullname, startline, _ = stack.pop()
                    endline = spos[0]
                    namespace.pop()
                    result[fullname] = (dtype, startline, endline)
            elif type == token.NEWLINE:
                # if this line contained a definition, expect an INDENT
                # to start the suite; if there is no such INDENT
                # it's a one-line definition
                if defline:
                    defline = False
                    expect_indent = True
        self.tags = result
        return result
예제 #2
0
class ModuleAnalyzer(object):
    # cache for analyzer objects -- caches both by module and file name
    cache = {}

    @classmethod
    def for_string(cls, string, modname, srcname='<string>'):
        if isinstance(string, bytes):
            return cls(BytesIO(string), modname, srcname)
        return cls(StringIO(string), modname, srcname, decoded=True)

    @classmethod
    def for_file(cls, filename, modname):
        if ('file', filename) in cls.cache:
            return cls.cache['file', filename]
        try:
            fileobj = open(filename, 'rb')
        except Exception as err:
            raise PycodeError('error opening %r' % filename, err)
        obj = cls(fileobj, modname, filename)
        cls.cache['file', filename] = obj
        return obj

    @classmethod
    def for_module(cls, modname):
        if ('module', modname) in cls.cache:
            entry = cls.cache['module', modname]
            if isinstance(entry, PycodeError):
                raise entry
            return entry

        try:
            type, source = get_module_source(modname)
            if type == 'string':
                obj = cls.for_string(source, modname)
            else:
                obj = cls.for_file(source, modname)
        except PycodeError as err:
            cls.cache['module', modname] = err
            raise
        cls.cache['module', modname] = obj
        return obj

    def __init__(self, source, modname, srcname, decoded=False):
        # name of the module
        self.modname = modname
        # name of the source file
        self.srcname = srcname
        # file-like object yielding source lines
        self.source = source

        # cache the source code as well
        pos = self.source.tell()
        if not decoded:
            self.encoding = detect_encoding(self.source.readline)
            self.source.seek(pos)
            self.code = self.source.read().decode(self.encoding)
            self.source.seek(pos)
            self.source = TextIOWrapper(self.source, self.encoding)
        else:
            self.encoding = None
            self.code = self.source.read()
            self.source.seek(pos)

        # will be filled by tokenize()
        self.tokens = None
        # will be filled by parse()
        self.parsetree = None
        # will be filled by find_attr_docs()
        self.attr_docs = None
        self.tagorder = None
        # will be filled by find_tags()
        self.tags = None

    def tokenize(self):
        """Generate tokens from the source."""
        if self.tokens is not None:
            return
        try:
            self.tokens = list(tokenize.generate_tokens(self.source.readline))
        except tokenize.TokenError as err:
            raise PycodeError('tokenizing failed', err)
        self.source.close()

    def parse(self):
        """Parse the generated source tokens."""
        if self.parsetree is not None:
            return
        self.tokenize()
        try:
            self.parsetree = pydriver.parse_tokens(self.tokens)
        except parse.ParseError as err:
            raise PycodeError('parsing failed', err)

    def find_attr_docs(self, scope=''):
        """Find class and module-level attributes and their documentation."""
        if self.attr_docs is not None:
            return self.attr_docs
        self.parse()
        attr_visitor = AttrDocVisitor(number2name, scope, self.encoding)
        attr_visitor.visit(self.parsetree)
        self.attr_docs = attr_visitor.collected
        self.tagorder = attr_visitor.tagorder
        # now that we found everything we could in the tree, throw it away
        # (it takes quite a bit of memory for large modules)
        self.parsetree = None
        return attr_visitor.collected

    def find_tags(self):
        """Find class, function and method definitions and their location."""
        if self.tags is not None:
            return self.tags
        self.tokenize()
        result = {}
        namespace = []
        stack = []
        indent = 0
        defline = False
        expect_indent = False
        emptylines = 0

        def tokeniter(ignore = (token.COMMENT,)):
            for tokentup in self.tokens:
                if tokentup[0] not in ignore:
                    yield tokentup
        tokeniter = tokeniter()
        for type, tok, spos, epos, line in tokeniter:
            if expect_indent:
                if type != token.INDENT:
                    # no suite -- one-line definition
                    assert stack
                    dtype, fullname, startline, _ = stack.pop()
                    endline = epos[0]
                    namespace.pop()
                    result[fullname] = (dtype, startline, endline - emptylines)
                expect_indent = False
            if tok in ('def', 'class'):
                name = next(tokeniter)[1]
                namespace.append(name)
                fullname = '.'.join(namespace)
                stack.append((tok, fullname, spos[0], indent))
                defline = True
            elif type == token.INDENT:
                expect_indent = False
                indent += 1
            elif type == token.DEDENT:
                indent -= 1
                # if the stacklevel is the same as it was before the last
                # def/class block, this dedent closes that block
                if stack and indent == stack[-1][3]:
                    dtype, fullname, startline, _ = stack.pop()
                    endline = spos[0]
                    namespace.pop()
                    result[fullname] = (dtype, startline, endline - emptylines)
            elif type == token.NEWLINE:
                # if this line contained a definition, expect an INDENT
                # to start the suite; if there is no such INDENT
                # it's a one-line definition
                if defline:
                    defline = False
                    expect_indent = True
                emptylines = 0
            elif type == token.NL:
                # count up if line is empty or comment only
                if emptyline_re.match(line):
                    emptylines += 1
                else:
                    emptylines = 0
        self.tags = result
        return result