class SimpleParse(object): def __init__(self, config, tree): self.config = config self.tree = tree self.files = Files(tree) self.treeid = Tree.query.get_treeid(tree['name'], tree['version']) self.filename = '' self.release_id = '' self.buf = [] self.pos = 0 self.start = 0 self.end = 0 self.maxchar = 0 self.frags = [] self.open_re = re.compile( "|".join(['(%s)' % i['open'] for i in self.spec]), re.M) def parse_file(self, filename, release_id): fp = self.files.getfp(filename, release_id) buf = fp.read() fp.close() self.parse(buf, filename, release_id) def get_line_html(self, line, width=4): line = '%04d' % line html = '''<a class='fline' name="%s">%s</a> ''' % (line, line) return html def _multilinetwist(self, frag, css): if css == 'string' or css == 'comment': frag = frag.replace("<", "<").replace(">", ">") ss = '''<span class="%s">%s</span>''' % (css, frag) ss = ss.replace("\n", '</span>\n<span class="%s">' % css) ss = ss.replace('<span class="%s"></span>' % css, '') return ss def get_include_link(self, word, path): html = '''<a class='include' href="/lxr/source/%s%s">%s</a>''' % ( self.tree['name'], path, word) return html def get_ident_link(self, ident): html = '''<a class='fid' href="/lxr/ident/%s?_i=%s">%s</a>''' % ( self.tree['name'], ident, ident) return html def get_reserved_link(self, word): if self.is_reserved(word): return '<span class="reserved">%s</span>' % word return word def is_ident(self, word): rv = symbolcache.get_symid(self.treeid, word) if rv is None: return False return True def is_reserved(self, word): return word in self.reserved def get_idents(self, buf): lines = buf.split('\n') line_no = 0 kk = [] for li in lines: line_no += 1 if not li: continue ss = self.identdef.split(li) for i in ss: if not i: continue if self.is_reserved(i): continue kk.append((i, line_no)) return kk def _parse_code(self, frag): raise Exception("Not Impl") def _is_package(self, word): return word != 'from' and word != 'import' and ( word[0] == '.' or word[0] in string.letters) def parse(self, buf, filename='', release_id=''): if filename and release_id: self.filename = filename self.release_id = release_id self.buf = buf self.pos = 0 self.start = 0 self.end = 0 self.maxchar = len(buf) self.frags = [] while self.pos < self.maxchar: open_match = self.open_re.search(self.buf, self.pos, self.maxchar) if open_match: left, right = open_match.start(), open_match.end() if self.pos < left: frag = self.buf[self.pos:left] self.frags.append(('code', frag)) match_groups = open_match.groups() i = 0 while i < len(match_groups): if match_groups[i]: break i += 1 fragtype = self.spec[i]['type'] close_re = self.spec[i]['close'] close_left = self.buf.find(close_re, right, self.maxchar) # last line without newline if close_left < 0: frag = self.buf[left:] self.frags.append((fragtype, frag)) break close_right = close_left + len(close_re) if close_re == '"' or close_re == "'": while find_escape_char(self.buf, close_left - 1, right - 1): close_left = self.buf.find(close_re, close_right, self.maxchar) # ERROR, break if close_left < 0: close_right = self.maxchar print 'ERROR.' break close_right = close_left + len(close_re) frag = self.buf[left:close_right] self.frags.append((fragtype, frag)) self.pos = close_right else: frag = self.buf[self.pos:] self.frags.append(('code', frag)) self.pos = self.maxchar _result = ''.join([i[1] for i in self.frags]) assert _result == buf def _parse_include(self, frag): raise Exception("Not Impl") def out(self): head = '<pre class="filecontent">' tail = '</pre>' htmls = [] for fragtype, frag in self.frags: if fragtype == 'comment': htmls.append(self._multilinetwist(frag, fragtype)) elif fragtype == 'string': htmls.append(self._multilinetwist(frag, fragtype)) elif fragtype == 'include': htmls.append(self._parse_include(frag)) elif fragtype == 'code': htmls.append(self._parse_code(frag)) else: htmls.append(self._parse_code(frag)) htmls = [html.decode("utf8") for html in htmls] tt = ''.join(htmls).split("\n") while tt and (tt[-1] == '' or tt[-1] == '\n'): tt.pop() linewidth = max(len(str(len(tt))), 4) line = 1 htmls = [self.get_line_html(line, linewidth)] for i in tt: htmls.append(i) htmls.append('\n') line += 1 htmls.append(self.get_line_html(line, linewidth)) htmls.insert(0, head) htmls.append(tail) return ''.join(htmls)
class Genxref(object): def __init__(self, config, tree): self.files = Files(tree) self.filestype = {} self.tree = tree self.commit_cnt = 0 self.MAX_COMMIT = 1000 self.config = config self.symid = Symbol.next_symid() def main(self, version): self.init_tree() self.init_lang() self.pathname_to_obj = {} self.init_files('/', version) # 建立swish # self.gensearch(version) # ctags 符号 self.symbols('/', version) # sym ref self.symref('/', version) def init_tree(self): self.treeid = treecache.get_treeid(self.tree['name'], tree['version']) if self.treeid is None: self.treeid = Tree.query.get_treeid(tree['name'], tree['version']) assert self.treeid is not None treecache.load() def init_lang(self): self.parses = {} for k, v in parses.iteritems(): self.parses[k] = v(self.config, self.tree) assert LangType.query.get_or_create(k, '') is not None for desc in v.typemap.values(): assert LangType.query.get_or_create(k, desc) is not None print self.parses langcache.load() def init_files(self, pathname, version): _files = [(pathname, version)] while _files: pathname, version = _files.pop(0) if self.files.isdir(pathname, version): dirs, files = self.files.getdir(pathname, version) for i in dirs + files: _files.append((os.path.join(pathname, i), version)) else: f = File(self.treeid, pathname) f.filetype = self.files.gettype(pathname, version) db.session.add(f) self.pathname_to_obj[pathname] = f db.session.commit() filecache.load(self.treeid) def feedswish(self, pathname, version, swish): if self.files.isdir(pathname, version): dirs, files = self.files.getdir(pathname, version) for i in dirs + files: self.feedswish(os.path.join(pathname, i), version, swish) else: _realfile = self.files.toreal(pathname, version) if _realfile in self.filestype: if self.filestype[_realfile] not in self.parses: return # filelist.write('%s\n' % pathname) if self.files.getsize(pathname, version) > 0: fp = self.files.getfp(pathname, version) content = fp.read() swish_input = [ "Path-Name: %s\n" % pathname, "Content-Length: %s\n" % len(content), "Document-Type: TXT\n", "\n", content] swish.stdin.write(''.join(swish_input)) fp.close() def gensearch(self, version): index_file = "%s.%s.index" % (self.tree['name'], version) index_file = os.path.join(self.config['swishdirbase'], index_file) cmd = '%s -S prog -i stdin -v 1 -c %s -f %s' % ( self.config['swishbin'], self.config['swishconf'], index_file) swish = subprocess.Popen(cmd, stdin=subprocess.PIPE, shell=True) self.feedswish('.', version, swish) out, err = swish.communicate() def symbols(self, pathname, version): total_commit = 0 _files = [(pathname, version)] while _files: pathname, version = _files.pop(0) if self.files.isdir(pathname, version): dirs, files = self.files.getdir(pathname, version) for i in dirs + files: _files.append((os.path.join(pathname, i), version)) else: o = self.pathname_to_obj[pathname] if o.filetype in self.parses and not o.has_indexed(): tags = ctags(self.files.toreal(pathname, version), o.filetype) for tag in tags: sym, line, lang_type, ext = tag lang_typeid = langcache.get_typeid(o.filetype, self.parses[o.filetype].typemap[lang_type]) symbol_obj = Symbol(self.treeid, sym, self.symid) defin = Definitions(self.symid, o.fileid, line, lang_typeid) db.session.add(symbol_obj) db.session.add(defin) self.symid += 1 o.set_indexed() db.session.add(o) total_commit += 1 if total_commit % 1000 == 0: print total_commit db.session.commit() db.session.commit() print print symbolcache.load(self.treeid) def symref(self, pathname, version): total_commit = 0 _files = [(pathname, version)] while _files: pathname, version = _files.pop(0) if self.files.isdir(pathname, version): dirs, files = self.files.getdir(pathname, version) for i in dirs + files: _files.append((os.path.join(pathname, i), version)) else: o = self.pathname_to_obj[pathname] if o.filetype in self.parses and not o.has_refered(): _fp = open(self.files.toreal(pathname, version)) _buf = _fp.read() _fp.close() words = self.parses[o.filetype].get_idents(_buf) for word, line in words: _symid = symbolcache.get_symid(self.treeid, word) if _symid is None: continue ref = Ref(_symid, o.fileid, line) db.session.add(ref) total_commit += 1 if total_commit % 1000 == 0: db.session.commit() print total_commit o.set_refered() db.session.add(o) total_commit += 1 if total_commit % 1000 == 0: db.session.commit() print total_commit if total_commit % 10000 == 0: print total_commit db.session.commit() print