Exemple #1
0
class Genxref(object):
    def __init__(self, project_name, project_path, start_path='/'):
        self.files = Files(project_path)
        self.filestype = {}
        self.project_name = project_name
        self.project_path = project_path
        self.commit_cnt = 0
        self.MAX_COMMIT = 1000

        self.start_path = start_path

        self.sym_filetype = {}

        self.track_info = {}

    def main(self):

        self.session = create_session(self.project_name)

        self.symid = 1  # Symbol.next_symid()

        from simpleparse import parses

        self.parses = {}
        for k, v in parses.items():
            self.parses[k] = v(self.project_name, self.project_path)

        self.pathname_to_obj = {}

        self.init_files(self.start_path)

        t0 = time.time()
        # ctags 符号
        self.symbols(self.start_path)
        t1 = time.time()

        # sym ref
        self.symref(self.start_path)
        t2 = time.time()
        self.track_info['t1'] = int(t1 - t0)
        self.track_info['t2'] = int(t2 - t1)
        return self.track_info

    def init_files(self, pathname):

        _files = [pathname]
        file_count = 0
        line_count = 0
        while _files:
            pathname = _files.pop(0)

            if self.files.isdir(pathname):
                dirs, files = self.files.getdir(pathname)
                for i in dirs + files:
                    _files.append(os.path.join(pathname, i))
            else:
                f = File(pathname)
                cnt = self.files.getlinecount(pathname)
                f.filetype = self.files.gettype(pathname)
                f.linecount = cnt
                self.session.add(f)
                file_count += 1
                line_count += cnt
                self.pathname_to_obj[pathname] = f
        self.session.commit()

        self.track_info['file_count'] = file_count
        self.track_info['line_count'] = line_count

    def symbols(self, pathname):

        total_commit = 0
        _files = [pathname]
        exist_syms = {}
        while _files:
            pathname = _files.pop(0)
            if self.files.isdir(pathname):
                dirs, files = self.files.getdir(pathname)
                for i in dirs + files:
                    _files.append(os.path.join(pathname, i))
            else:
                o = self.pathname_to_obj[pathname]
                if o.filetype in self.parses and not o.has_indexed():
                    logger.info('find tags: %s' % pathname)
                    tags = find_tags(self.files.toreal(pathname), o.filetype)
                    for tag in tags:
                        sym, line, lang_typeid = tag

                        if sym in exist_syms:
                            sym_id = exist_syms[sym]
                        else:
                            symbol_obj = Symbol(sym, self.symid)
                            sym_id = self.symid
                            exist_syms[sym] = sym_id
                            self.symid += 1
                            self.session.add(symbol_obj)

                        defin = Definitions(sym_id, o.fileid, line,
                                            lang_typeid)
                        self.sym_filetype[sym_id] = o.filetype
                        self.session.add(defin)

                        total_commit += 1
                        if total_commit % 1000 == 0:
                            self.session.commit()

                    o.set_indexed()
                    self.session.add(o)
                    logger.info('find %s tags: %s' % (len(tags), pathname))

        self.session.commit()
        self.track_info['total_symbol'] = total_commit
        logger.info('finish tags, total = %s' % total_commit)

    def symref(self, pathname):
        from dbcache import symbolcache

        total_commit = 0
        _files = [pathname]
        while _files:
            pathname = _files.pop(0)
            if self.files.isdir(pathname):
                dirs, files = self.files.getdir(pathname)
                for i in dirs + files:
                    _files.append(os.path.join(pathname, i))
            else:
                o = self.pathname_to_obj[pathname]
                if o.filetype in self.parses and not o.has_refered():
                    with open(self.files.toreal(pathname),
                              encoding="utf8",
                              errors='ignore') as _fp:
                        _buf = _fp.read()
                    words = self.parses[o.filetype].get_idents(_buf)
                    for word, line in words:
                        if o.filetype == 'asm':
                            if word[0] == '_':
                                # 汇编调用C语言函数
                                _symid = symbolcache.get_symid(
                                    self.project_name, word[1:])
                            else:
                                _symid = symbolcache.get_symid(
                                    self.project_name, word)
                            if _symid is None:
                                continue
                        else:
                            _symid = symbolcache.get_symid(
                                self.project_name, word)
                            if _symid is None:
                                continue

                            if not self.files.is_same_filetype(
                                    o.filetype, self.sym_filetype.get(_symid)):
                                continue

                        ref = Ref(_symid, o.fileid, line)
                        self.session.add(ref)
                        total_commit += 1
                        if total_commit % 1000 == 0:
                            self.session.commit()
                            print(total_commit)
                    o.set_refered()
                    self.session.add(o)
                    total_commit += 1
                    if total_commit % 1000 == 0:
                        self.session.commit()
                        print(total_commit)

                    if total_commit % 10000 == 0:
                        print(total_commit)
        self.session.commit()
        self.track_info['total_ref'] = total_commit
        print()
Exemple #2
0
class Genxref(object):

    def __init__(self, config, tree):
        self.files = Files(tree)
        self.filestype = {}
        self.tree = tree
        self.commit_cnt = 0
        self.MAX_COMMIT = 1000        
        self.config = config
        self.symid = Symbol.next_symid()


    def main(self, version):
        self.init_tree()
        self.init_lang()
        self.pathname_to_obj = {}
        
        self.init_files('/', version)

        # 建立swish
        # self.gensearch(version)
        # ctags 符号
        self.symbols('/', version)
        # sym ref
        self.symref('/', version)


    def init_tree(self):
        self.treeid = treecache.get_treeid(self.tree['name'], tree['version'])
        if self.treeid is None:
            self.treeid = Tree.query.get_treeid(tree['name'], tree['version'])
            assert self.treeid is not None
            treecache.load()


    def init_lang(self):
        self.parses = {}
        for k, v in parses.iteritems():
            self.parses[k] = v(self.config, self.tree)

            assert LangType.query.get_or_create(k, '') is not None
            for desc in v.typemap.values():
                assert LangType.query.get_or_create(k, desc) is not None
        print self.parses
        langcache.load()
        

    def init_files(self, pathname, version):

        _files = [(pathname, version)]
        
        while _files:
            pathname, version = _files.pop(0)

            if self.files.isdir(pathname, version):            
                dirs, files = self.files.getdir(pathname, version)
                for i in dirs + files:
                    _files.append((os.path.join(pathname, i), version))
            else:
                f = File(self.treeid, pathname)
                f.filetype = self.files.gettype(pathname, version)
                db.session.add(f)
                self.pathname_to_obj[pathname] = f
        db.session.commit()
        filecache.load(self.treeid)

        
    def feedswish(self, pathname, version, swish):
        if self.files.isdir(pathname, version):
            dirs, files = self.files.getdir(pathname, version)
            for i in dirs + files:
                self.feedswish(os.path.join(pathname, i),
                               version,
                               swish)
        else:
            _realfile = self.files.toreal(pathname, version)
            if _realfile in self.filestype:
                if self.filestype[_realfile] not in self.parses:
                    return

            # filelist.write('%s\n' % pathname)
            if self.files.getsize(pathname, version) > 0:
                fp = self.files.getfp(pathname, version)
                content = fp.read()
                swish_input = [
                    "Path-Name: %s\n" % pathname,
                    "Content-Length:  %s\n" % len(content),
                    "Document-Type: TXT\n",
                    "\n",
                    content]
            
                swish.stdin.write(''.join(swish_input))
                fp.close()
        
                
    def gensearch(self, version):
        index_file = "%s.%s.index" % (self.tree['name'], version)
        index_file = os.path.join(self.config['swishdirbase'], index_file)
        cmd = '%s -S prog -i stdin -v 1 -c %s -f %s' % (
            self.config['swishbin'],
            self.config['swishconf'],
            index_file)
        swish = subprocess.Popen(cmd, stdin=subprocess.PIPE, shell=True)
        self.feedswish('.', version, swish)
        out, err = swish.communicate()


    def symbols(self, pathname, version):
        total_commit = 0
        _files = [(pathname, version)]
        while _files:
            pathname, version = _files.pop(0)
            if self.files.isdir(pathname, version):
                dirs, files = self.files.getdir(pathname, version)
                for i in dirs + files:
                    _files.append((os.path.join(pathname, i), version))
            else:
                o = self.pathname_to_obj[pathname]
                if o.filetype in self.parses and not o.has_indexed():
                    tags = ctags(self.files.toreal(pathname, version), o.filetype)
                    for tag in tags:
                        sym, line, lang_type, ext = tag
                        lang_typeid = langcache.get_typeid(o.filetype, self.parses[o.filetype].typemap[lang_type])
                        symbol_obj = Symbol(self.treeid, sym, self.symid)
                        defin = Definitions(self.symid, o.fileid, line, lang_typeid)
                        db.session.add(symbol_obj)
                        db.session.add(defin)
                        self.symid += 1
                    o.set_indexed()
                    db.session.add(o)
                    total_commit += 1
                    if total_commit % 1000 == 0:
                        print total_commit
                        db.session.commit()
        db.session.commit()
        print 
        print
        symbolcache.load(self.treeid)
        

    def symref(self, pathname, version):
        total_commit = 0
        _files = [(pathname, version)]
        while _files:
            pathname, version = _files.pop(0)
            if self.files.isdir(pathname, version):
                dirs, files = self.files.getdir(pathname, version)
                for i in dirs + files:
                    _files.append((os.path.join(pathname, i), version))
            else:
                o = self.pathname_to_obj[pathname]
                if o.filetype in self.parses and not o.has_refered():
                    _fp = open(self.files.toreal(pathname, version))
                    _buf = _fp.read()
                    _fp.close()
                    words = self.parses[o.filetype].get_idents(_buf)
                    for word, line in words:
                        _symid = symbolcache.get_symid(self.treeid, word)
                        if _symid is None:
                            continue
                        ref = Ref(_symid, o.fileid, line)
                        db.session.add(ref)
                        total_commit += 1
                        if total_commit % 1000 == 0:
                            db.session.commit()
                            print total_commit
                    o.set_refered()
                    db.session.add(o)
                    total_commit += 1
                    if total_commit % 1000 == 0:
                        db.session.commit()
                        print total_commit

                    if total_commit % 10000 == 0:
                        print total_commit
        db.session.commit()
        print