Exemple #1
0
    def list_files(self, realpath, path_parts, page) :
        path_tuple = tuple(path_parts)

        MindIndex.update_entry_index(self.server.dir2doclist, DATA_DIR, realpath)

        # get dir doclist
        try :
            doc_num, page_num, page_idx, doclist = self.server.dir2doclist[path_tuple].get_page(page)
        except KeyError :
            doc_num = 0
            page_num = 0
            page_idx = 1
            doclist = []

        data = '<?xml version="1.0" encoding="GBK"?>'
        data += '<entrylist>'

        data += '<page><total>%d</total><pages>%d</pages><ipage>%d</ipage></page>' % (doc_num, page_num, page_idx)

        for doc in doclist :
            data += '<file>'
            data += '<rlt_path>%s</rlt_path>' % cgi.escape(doc.rlt_path())
            data += '<url_path>%s</url_path>' % cgi.escape(doc.url_path())
            data += '<create>%s</create>' % doc.ctime_str()
            data += '</file>'

        data += '</entrylist>'

        self.send_response(200)
        self.send_header('Content-type', 'text/xml')
        self.send_header('Content-Length', str(len(data)))
        self.end_headers()
        return StringIO(data)
Exemple #2
0
    def list_search(self, path_parts, page, query) :
        print >> sys.stderr, '\n-\n-\n-\nDEBUG: list_search: query = %s\n-\n-\n-\n' % repr(query)
        try :
            uquery = query.decode('utf8')
        except UnicodeDecodeError :
            uquery = query.decode('gbk')
        srch_words = self.server.ws.get_dict_words(uquery)
        result = MindIndex.get_search_result(self.server.doc_profile, self.server.word_docset, self.server.word_idf, srch_words)
        # TODO: search result cache
        doc_num, page_num, page_idx, doc_list = result.get_page(page)

        data = '<?xml version="1.0" encoding="GBK"?>'
        data += '<entrylist>'

        data += '<page><total>%d</total><pages>%d</pages><ipage>%d</ipage></page>' % (doc_num, page_num, page_idx)

        for doc in doc_list :
            data += '<file>'
            data += '<rlt_path>%s</rlt_path>' % cgi.escape(doc.rlt_path())
            data += '<path>%s</path>' % cgi.escape(doc.url_path())
            data += '<create>%s</create>' % doc.ctime_str()
            data += '<score>%f</score>' % doc.score
            data += '</file>'

        data += '</entrylist>'

        self.send_response(200)
        self.send_header('Content-type', 'text/xml')
        self.send_header('Content-Length', str(len(data)))
        self.end_headers()
        return StringIO(data)
Exemple #3
0
    def __init__(self, server_address, RequestHandlerClass) :
        ThreadingTCPServer.__init__(self, server_address, RequestHandlerClass)
        self.server_name = 'MindWiki'
        self.server_port = SERVER_PORT

        self.page_head = open(os.path.join(MAIN_DIR, 'index_1.html'), 'r').read()
        self.page_tail = open(os.path.join(MAIN_DIR, 'index_2.html'), 'r').read()

        self.file_cache = {}    # path -> (modify, head-content)

        # Entry Index
        self.dir2doclist = {}
        MindIndex.build_entry_index(self.dir2doclist, DATA_DIR)
        print "[%s] built entries' index." % time.strftime(TIME_FMT)

        # WORD INDEX
        self.ws = MindIndex.Word.WordSeg()
        self.doc_profile = []   # profile item: (entry_tuple, ctime, mtime, words)
        self.word_docset = {}   # word->doc_id_set
        self.word_idf = {}
        #self.build_word_index()
        t = threading.Thread(target=self.build_word_index, args=())
        #t.setDaemon(True)
        t.start()