def list_files(self, realpath, path_parts, page) : path_tuple = tuple(path_parts) MindIndex.update_entry_index(self.server.dir2doclist, DATA_DIR, realpath) # get dir doclist try : doc_num, page_num, page_idx, doclist = self.server.dir2doclist[path_tuple].get_page(page) except KeyError : doc_num = 0 page_num = 0 page_idx = 1 doclist = [] data = '<?xml version="1.0" encoding="GBK"?>' data += '<entrylist>' data += '<page><total>%d</total><pages>%d</pages><ipage>%d</ipage></page>' % (doc_num, page_num, page_idx) for doc in doclist : data += '<file>' data += '<rlt_path>%s</rlt_path>' % cgi.escape(doc.rlt_path()) data += '<url_path>%s</url_path>' % cgi.escape(doc.url_path()) data += '<create>%s</create>' % doc.ctime_str() data += '</file>' data += '</entrylist>' self.send_response(200) self.send_header('Content-type', 'text/xml') self.send_header('Content-Length', str(len(data))) self.end_headers() return StringIO(data)
def list_search(self, path_parts, page, query) : print >> sys.stderr, '\n-\n-\n-\nDEBUG: list_search: query = %s\n-\n-\n-\n' % repr(query) try : uquery = query.decode('utf8') except UnicodeDecodeError : uquery = query.decode('gbk') srch_words = self.server.ws.get_dict_words(uquery) result = MindIndex.get_search_result(self.server.doc_profile, self.server.word_docset, self.server.word_idf, srch_words) # TODO: search result cache doc_num, page_num, page_idx, doc_list = result.get_page(page) data = '<?xml version="1.0" encoding="GBK"?>' data += '<entrylist>' data += '<page><total>%d</total><pages>%d</pages><ipage>%d</ipage></page>' % (doc_num, page_num, page_idx) for doc in doc_list : data += '<file>' data += '<rlt_path>%s</rlt_path>' % cgi.escape(doc.rlt_path()) data += '<path>%s</path>' % cgi.escape(doc.url_path()) data += '<create>%s</create>' % doc.ctime_str() data += '<score>%f</score>' % doc.score data += '</file>' data += '</entrylist>' self.send_response(200) self.send_header('Content-type', 'text/xml') self.send_header('Content-Length', str(len(data))) self.end_headers() return StringIO(data)
def __init__(self, server_address, RequestHandlerClass) : ThreadingTCPServer.__init__(self, server_address, RequestHandlerClass) self.server_name = 'MindWiki' self.server_port = SERVER_PORT self.page_head = open(os.path.join(MAIN_DIR, 'index_1.html'), 'r').read() self.page_tail = open(os.path.join(MAIN_DIR, 'index_2.html'), 'r').read() self.file_cache = {} # path -> (modify, head-content) # Entry Index self.dir2doclist = {} MindIndex.build_entry_index(self.dir2doclist, DATA_DIR) print "[%s] built entries' index." % time.strftime(TIME_FMT) # WORD INDEX self.ws = MindIndex.Word.WordSeg() self.doc_profile = [] # profile item: (entry_tuple, ctime, mtime, words) self.word_docset = {} # word->doc_id_set self.word_idf = {} #self.build_word_index() t = threading.Thread(target=self.build_word_index, args=()) #t.setDaemon(True) t.start()