Example #1
0
def saveAppToDB(appinfo):
    post = {}
    post["catagory"]=appinfo.cata
    post["appname"]=appinfo.name
    # post["installnum"]=appinfo.installnum
    post["url"]=appinfo.url
    post["descripe"]=appinfo.descripe
    post["apk"]=appinfo.apk
    post["date"]=time.strftime('%Y-%m-%d',time.localtime(time.time()))
    # print(post)
    if not MongoUtil.isExist("app_table", {"catagory":appinfo.cata, "appname":appinfo.name}):
        MongoUtil.insert("app_table", post)
    print(appinfo.cata + appinfo.name)
Example #2
0
def deliveryWords(appinfo,filename):
    print(appinfo.name)
    contents = [line.strip() for line in open(filename)]
    wordlist = []
    line_num = 0
    result = MongoUtil.find_one("app_table", {"catagory":appinfo.cata, "appname":appinfo.name})
    if result==None:
        print("\""+appinfo.cata+" "+appinfo.name+"\" 未存入数据库中,请先存储")
        return
    appid = result['_id']

    result = MongoUtil.find_one(appinfo.cata, {"appid":appid})
    # result = MongoUtil.find_one("wordlocation_table",{"appid":appid})
    if result!=None:
        print("\""+appinfo.cata+" "+appinfo.name+"\" 已经分词存入数据库,不必重复")
        return

    for line in contents:
        time.sleep(0.1)
        line_num+=1
        # 去除乱码
        line = re.compile('[\\x00-\\x08\\x0b-\\x0c\\x0e-\\x1f]').sub(' ', line)
        # 使用全模式
        seglist = jieba.cut(line,cut_all=False)
        wordlist.append(seglist)
        for word in seglist:
            if word not in stopWords and word not in punctuations and word != '\n' and word!=' ' and not word.isdigit():
                # print(word,end=",")
                post_word = {}
                post_word["word"]=word
                if not MongoUtil.isExist("word_table", post_word):
                    MongoUtil.insert("word_table", post_word)

                result = MongoUtil.find_one("word_table", post_word)

                wordid = result['_id']
                if wordid==None:
                    print(post_word)
                post_location ={}
                post_location["appid"]=appid
                post_location["wordid"]=wordid
                post_location["location"]=line_num
                MongoUtil.insert(appinfo.cata, post_location)
def delivery_words(appid,content):
    # 去除乱码
    content = re.compile('[\\x00-\\x08\\x0b-\\x0c\\x0e-\\x1f]').sub(' ', content)
    # 使用全模式
    seglist = jieba.cut(content,cut_all=False)
    for word in seglist:
        if word not in stopWords and word not in punctuations and word != '\n' and word!=' ' and not word.isdigit():
            post_word = {}
            post_word["word"]=word
            if not MongoUtil.isExist("word_table", post_word):
                MongoUtil.insert("word_table", post_word)

            result = MongoUtil.find_one("word_table", post_word)

            wordid = result['_id']
            if wordid==None:
                print(post_word)

            post_location ={}
            post_location["appid"]=appid
            post_location["wordid"]=wordid
            posts.append(post_location)