Ejemplo n.º 1
0
def make_query(keywords):
    if type(keywords) is unicode:
        keywords = keywords.encode('utf-8', 'ignore')

    and_query_list = []
    keywords = keywords.split(' ')

    for keyword in keywords:
        if len(keyword) > 2 and keyword.startswith('"') and keyword.endswith(
                '"'):
            and_query_list.append(xapian.Query(keyword[1:-1], 1))
        else:
            t = []
            word2dict = seg_txt_2_dict(keyword)
            for word, value in word2dict.iteritems():
                if word != keyword:
                    t.append(xapian.Query(word, 1))
            kt = xapian.Query(keyword, 1)
            if t:
                if len(t) > 1:
                    query = xapian.Query(xapian.Query.OP_AND, t)
                    query = xapian.Query(xapian.Query.OP_OR, [kt, query])
                else:
                    query = xapian.Query(xapian.Query.OP_OR, [kt, t[0]])
            else:
                query = kt
            and_query_list.append(query)
    #for i in and_query_list:
    #print "!!!",i
    if len(and_query_list) > 1:
        query = xapian.Query(xapian.Query.OP_AND, and_query_list)
    else:
        query = and_query_list[0]

    return query
Ejemplo n.º 2
0
    def update_index(self, id, text=None, values=None, data=None):
        """更新索引

        :id: 要替换的id
        :doc: 新的doc
        """
        try:
            doc = self.get_document(id)
        except:
            return False

        if text:
            doc.clear_terms()#清除terms
            for word, value in seg_txt_2_dict(text).iteritems():
                doc.add_term(word)

        if values:
            doc.clear_values()
            for key, value in values.iteritems():
                doc.add_value(key, value)

        if data:
            doc.set_data(data)

        try:
            self.db.replace_document(id, doc)
            return True
        except:
            return False
Ejemplo n.º 3
0
    def index(self, id, text, values={}, data=''):
        """index to xapian

        :id: data id
        :text: search content is utf-8
        :returns: boolean

        """
        doc = xapian.Document()
        for word, value in seg_txt_2_dict(text).iteritems():
            print word, value
            doc.add_term(word)

        #添加value用于排序,key似乎只能是数字
        for key, value in values.iteritems():
            doc.add_value(key, value)

        if data:
            doc.set_data(data)

        try:
            self.db.replace_document(id, doc)
            return True
        except:
            return False
Ejemplo n.º 4
0
def search(keywords,offset=0,limit=35,enquire=SEARCH_ENQUIRE):
    import pdb
    print keywords
    #pdb.set_trace()
    query_list = []
    for word,value in seg_txt_2_dict(keywords).iteritems():
        query = xapian.Query(word,value)
        query_list.append(query)

    if len(query_list) != 1:
        query = xapian.Query(xapian.Query.OP_AND,query_list)

    else:
        query = query_list[0]

    enquire.set_query(query)
    matches = enquire.get_mset(offset,limit,None)
    
    dictsort = {}
     
    for m in matches:
        dictsort[m.docid] = m.rank
        print m.docid
        print dir(m)
        print m.get_docid()
        print dir(m.document)
        print m.document.get_docid()
    
    #print dictsort
    ids = sorted(dictsort,key=dictsort.get)
    ids.reverse()
    print ids
    return ids
Ejemplo n.º 5
0
    def search(self, keywords, start_offset=0, end_offset=None):
        query_list = []
        if isinstance(keywords, unicode):
            keywords = keywords.encode('utf8')
        for word, value in seg_txt_2_dict(keywords).iteritems():
            query = xapian.Query(word, value)
            query_list.append(query)
        if len(query_list) != 1:
            query = xapian.Query(xapian.Query.OP_OR, query_list)
        else:
            query = query_list[0]


        self.SEARCH_ENQUIRE.set_query(query)
        count = self.SEARCH_DB.get_doccount()
        if not end_offset:
            end_offset = count - start_offset

        matches = self._get_enquire_mset(start_offset, end_offset)
        
        results = []
        for match in matches:
            data = self._get_document_data(match.document)
            data = simplejson.loads(data, encoding='utf8')
            results.append(data)
        
        return {'count': self._get_hit_count(), 'object_list':results}
Ejemplo n.º 6
0
def index_txt(id,txt):
    print id
    doc = xapian.Document()
    for word,value in seg_txt_2_dict(txt).iteritems():
        doc.add_term(word,value)
        key = ":%s"%id
        doc.add_term(key)
        SEARCH_DB.replace_document(key,doc)
Ejemplo n.º 7
0
    def __getTop(self, content, num):
		content = str(content)
		result = {}
		if not content:
			wlog.warning("Can not splite empty String.")
		else:
			items = seg_txt_2_dict(content.decode("gbk").encode("utf-8"))
			for k,v in sorted(items.items(), key = lambda x:x[1], reverse = True)[0:num+1]:
				k = k.decode("utf-8").encode("gbk")
				result[k]=v
		return result
Ejemplo n.º 8
0
def index_txt(tid,txt):
    doc = xapian.Document()
    for word,value in seg_txt_2_dict(txt).iteritems():
        if word:
            doc.add_term(word,value)
        else:
            pass
    key = ":%s"%str(tid)
    doc.add_term(key)
    print dir(doc)
    SEARCH_DB.replace_document(key,doc)
Ejemplo n.º 9
0
def index(msg):
    # create document
    doc = xapian.Document()
    doc.set_data(msg)

    # index msg title
    msg_dict = json.loads(msg)
    msg_title = msg_dict.get("title")
    for word, value in seg_txt_2_dict(msg_title.encode("utf-8")).iteritems():
        doc.add_term(word, value)

    # add document to xapian database
    MASTER_DB.add_document(doc)
Ejemplo n.º 10
0
def search(keywords,offset=0,limit=35,enquire=SEARCH_ENQUIRE):
    query_list = []
    for word,value in seg_txt_2_dict(keywords).iteritems():
        print word
        query = xapian.Query(word,value)
        query_list.append(query)

    if len(query_list) != 1:
        query = xapian.Query(xapian.Query.OP_AND,query_list)

    else:
        query = query_list[0]

    enquire.set_query(query)
    matches = enquire.get_mset(offset,limit,None)
    return matches
Ejemplo n.º 11
0
    def _index_text(self, doc, termgenerator):
        try:
            text = open('text').read()
        except IOError as err:
            logger.error(str(err))
            return

        lang = guess_language(text[:1024*100])
        logger.debug('lanuage is %s' % lang)

        if lang == 'chinese':
            for word, value in seg_txt_2_dict(text).iteritems():
                if word:
                    doc.add_term(word, value)
        else:
            termgenerator.index_text(text)
Ejemplo n.º 12
0
    def _index_text(self, doc, termgenerator):
        try:
            text = open('text').read()
        except IOError as err:
            logger.error(str(err))
            return

        lang = guess_language(text[:1024 * 100])
        logger.debug('lanuage is %s' % lang)

        if lang == 'chinese':
            for word, value in seg_txt_2_dict(text).iteritems():
                if word:
                    doc.add_term(word, value)
        else:
            termgenerator.index_text(text)
Ejemplo n.º 13
0
def make_query(keywords):
    if type(keywords) is unicode:
        keywords = keywords.encode('utf-8', 'ignore')

    and_query_list = []
    keywords = keywords.split(' ')

    for keyword in keywords:
        if len(keyword) > 2 and keyword.startswith('"') and keyword.endswith('"'):
            and_query_list.append(
                xapian.Query(
                    keyword[1:-1],
                    1
                )
            )
        else:
            t = []
            word2dict = seg_txt_2_dict(keyword)
            for word, value in word2dict.iteritems():
                if word != keyword:
                    t.append(
                        xapian.Query(
                            word,
                            1
                        )
                    )
            kt = xapian.Query(keyword, 1)
            if t:
                if len(t) > 1:
                    query = xapian.Query(xapian.Query.OP_AND, t)
                    query = xapian.Query(xapian.Query.OP_OR, [kt, query])
                else:
                    query = xapian.Query(xapian.Query.OP_OR, [kt, t[0]])
            else:
                query = kt
            and_query_list.append(query)
    #for i in and_query_list:
    #print "!!!",i
    if len(and_query_list) > 1:
        query = xapian.Query(xapian.Query.OP_AND, and_query_list)
    else:
        query = and_query_list[0]

    return query
Ejemplo n.º 14
0
    def search(self, keywords, offset=0, limit=10):
        """search xapian

        :keywords: 搜索的关键字
        :offset: 起始位置
        :limit: 结束位置
        :returns: matches对象

        """
        query_list = []
        for word, value in seg_txt_2_dict(keywords.encode('utf-8')).iteritems():
            query = xapian.Query(word)
            query_list.append(query)

        if len(query_list) != 1:
            query = xapian.Query(xapian.Query.OP_AND, query_list)
        else:
            query = query_list[0]

        self.enquire.set_query(query)
        matches = self.enquire.get_mset(offset, limit, 10000)
        return matches
Ejemplo n.º 15
0
    def search_software(self, keyword):
        """search interface"""
        
#*****************************************************************************
        try:
        
            from mmseg.search import seg_txt_search,seg_txt_2_dict
            query_string = str(keyword)
            enquire = xapian.Enquire(self.db.xapiandb)
            
            query_list = []
            for word, value in seg_txt_2_dict(query_string).iteritems():
                query = xapian.Query(word, value)
#               print word,value
                query_list.append(query)
            if len(query_list) != 1:
                query = xapian.Query(xapian.Query.OP_AND, query_list)
            else:
                query = query_list[0]
#            print "*** Useing Chinese Segmentation method MMSEG to segment the input keywords ***"

#*********************************************************************************
        except:   

            Info = """
*********************************************************
There is no Chinese Segmentation method MMSEG in
your system.For better useing of ubuntu-kylin-software-center,
please install chinese Segmentation method MMSEG .
*********************************************************
"""
            print Info
            query_string = self.db.get_query_list_from_search_entry(str(keyword))
            enquire = xapian.Enquire(self.db.xapiandb)
            query = query_string[1]

#            enquire = xapian.Enquire(self.db.xapiandb)
#            qp = xapian.QueryParser()
#            qp.set_database(self.db.xapiandb)

#            query = qp.parse_query(str(keyword))
#            print "Parsed query is: %s"% str(query)

        enquire.set_query(query)
        matches = enquire.get_mset(0, len(self.db))
#        print "res len=",len(self.db),len(matches)
        pkgnamelist = []
        for m in matches:
            doc = m.document
#            print m.docid
#            print '************************************'
            pkgname = doc.get_value(XapianValues.PKGNAME)

            if not pkgname:
                pkgname = doc.get_data()

            if pkgname:
                #check weather exist in the list
                try:
                    index = pkgnamelist.index(pkgname)
                #not exist will raise ValueError
                except ValueError:
                    pkgnamelist.append(pkgname)
        
                    
#        print pkgnamelist        
        return pkgnamelist
    def update_xapiandb(self, kwargs):
        database = xapian.WritableDatabase(XAPIAN_DB_PATH, xapian.DB_OPEN)
        DB = xapian.Database(XAPIAN_DB_PATH)
        enquire = xapian.Enquire(database)
        indexer = xapian.TermGenerator()

        if "" == kwargs["pkgname"]:
            modified_num = 0
            add_num = 0
            xapiandb_update = "No"

            query_xapiandb_version = xapian.Query("the_#ukxapiandb#_version")
            enquire.set_query(query_xapiandb_version)
            matches = enquire.get_mset(0, 1)
            for re in matches:
                docid_for_xapiandb_version = re.document.get_docid()
                doc_for_xapiandb_version = re.document
                doc_data = doc_for_xapiandb_version.get_data()
                if (isinstance(doc_data, bytes)):
                    doc_data = doc_data.decode(encoding='utf-8')
                if ("XAPIANDB_VERSION" == doc_data):
                    the_latest_update_time = doc_for_xapiandb_version.get_value(
                        2)  #valueslot:2 xapiandb update time
                    if (isinstance(the_latest_update_time, bytes)):
                        the_latest_update_time = the_latest_update_time.decode(
                            encoding='utf-8')
                else:
                    the_latest_update_time = time.strftime(
                        '%Y-%m-%dT%H:%M:%S', time.localtime())
                    if (Globals.DEBUG_SWITCH):
                        print(
                            "Failed to get the latest update time from client xapiandb,use default time.localtime()"
                        )
            reslist = self.premoter.newerapp_for_xapianupdate(
                the_latest_update_time)

            for app in reslist:
                app_name = str(app["app_name"])
                display_name_cn = str(app["display_name_cn"])
                keywords_for_search = str(app["keywords_for_search"])

                query = xapian.Query(app_name)
                enquire.set_query(query)
                doccount = DB.get_doccount()
                matches = enquire.get_mset(0, doccount)
                if matches.size() != 0:
                    for re in matches:
                        get_name = re.document.get_data()
                        if (isinstance(get_name, bytes)):
                            get_name = get_name.decode(encoding='utf-8')
                        if get_name == app_name:
                            docid = re.docid
                            doc = re.document
                            doc.clear_terms()
                            indexer.set_document(doc)
                            doc.add_term(app_name, 10)
                            if keywords_for_search != "None":
                                keywords = display_name_cn + ";" + keywords_for_search + ";" + app_name
                            else:
                                keywords = display_name_cn + ";" + app_name
                            indexer.index_text(keywords, 10)

                            try:
                                from mmseg.search import seg_txt_search, seg_txt_2_dict
                                for word, value in seg_txt_2_dict(
                                        keywords).items():
                                    if word != "none":
                                        doc.add_term(word, 10)
                                    else:
                                        pass
                            except:
                                if (Globals.DEBUG_SWITCH):
                                    print("----No mmseg model---")

                            database.replace_document(docid, doc)
                            xapiandb_update = "Yes"
                            modified_num = modified_num + 1

                        else:
                            continue
                else:
                    doc = xapian.Document()
                    doc.set_data(app_name)
                    doc.add_term(app_name, 10)
                    indexer.set_document(doc)
                    if keywords_for_search != "None":
                        keywords = display_name_cn + ";" + keywords_for_search + ";" + app_name
                    else:
                        keywords = display_name_cn + ";" + app_name
                    indexer.index_text(keywords, 10)

                    try:
                        for word, value in seg_txt_2_dict(keywords).items():
                            if word != "none":
                                doc.add_term(word, 10)
                            else:
                                pass
                    except:
                        pass
                    database.add_document(doc)
                    add_num = add_num + 1
                    if (Globals.DEBUG_SWITCH):
                        print("App:", doc.get_data(), "  ", "terms:", end=' ')
                    for itr in doc.termlist():
                        if (Globals.DEBUG_SWITCH):
                            print(itr.term, end=' ')
                    xapiandb_update = "Yes"
                    if (Globals.DEBUG_SWITCH):
                        print("  ")

            try:
                if xapiandb_update == "Yes":
                    now = time.strftime('%Y-%m-%dT%H:%M:%S', time.localtime())
                    doc_for_xapiandb_version.add_value(2, now)
                    database.replace_document(docid_for_xapiandb_version,
                                              doc_for_xapiandb_version)
                    database.commit()
                    if (Globals.DEBUG_SWITCH):
                        print(
                            "Xapiandb has updated . %d app modified, %d app add.  Tatal: %d app updated"
                            % (modified_num, add_num, len(reslist)))
            except:
                if (Globals.DEBUG_SWITCH):
                    print(
                        "The xapian database (/home/ice_bird/.cache/uksc/xapiandb) is crashed,please remove it and install a new one!"
                    )
            if (Globals.DEBUG_SWITCH):
                print("update uksc xapiandb over")

        else:
            appinfo_query = xapian.Query(kwargs["pkgname"])
            enquire.set_query(appinfo_query)
            matches = enquire.get_mset(0, DB.get_doccount())
            for re in matches:
                doc_for_appinfo = re.document
                doc_data = doc_for_appinfo.get_data()
                if kwargs["pkgname"] == doc_data:
                    return

            doc = xapian.Document()
            doc.set_data(kwargs["pkgname"])
            doc.add_term(kwargs["pkgname"], 10)
            if (Globals.DEBUG_SWITCH):
                print("debfile path:", kwargs["path"])

            deb = DebFile(kwargs["path"])
            terms = kwargs["pkgname"]
            try:
                terms = terms + " " + deb.description
            except:
                if (Globals.DEBUG_SWITCH):
                    print("Failed to get app description")
            indexer.set_document(doc)
            indexer.index_text(terms)
            database.add_document(doc)
            database.commit()
            if (Globals.DEBUG_SWITCH):
                print("update xapiandb over: ",
                      kwargs["pkgname"],
                      "terms:",
                      end=' ')
            for itr in doc.termlist():
                if (Globals.DEBUG_SWITCH):
                    print(itr.term, end=' ')
            if (Globals.DEBUG_SWITCH):
                print(" ")
Ejemplo n.º 17
0
 def _add_hanzi(self, doc, data):
     if not data:
         return 
     for word, value in seg_txt_2_dict(data).iteritems():
         doc.add_term(word, value)