예제 #1
0
def saveCommentEmotionData(model,best_words,app):

    time.sleep(1)
    appid = app["_id"]
    appname = app["appname"]
    cataname = app["catagory"]

    if MongoUtil.isExist("emotion_comment",{"appid":appid}):
        print(appname+"已经存在了")
        print()
        return

    results = MongoUtil.find(cataname,{"appid":appid})
    print(cataname,appname)
    comments = {}
    pos_count = 0
    neg_count = 0

    for item in results:
        word_id = item["wordid"]
        location = item["location"]
        word = MongoUtil.find_one("word_table",{"_id":word_id})["word"]
        comments.setdefault(location,[])
        comments[location].append(word)

    for key in comments.keys():
        comment_words = comments[key]
        pred = predict(model,comment_words,best_words)
        emotion = judgeCommentEmotion(pred.prob('pos'),pred.prob('neg'))
        if emotion == 1 : pos_count += 1
        if emotion == 2 : neg_count += 1

    savetoDB(appid,len(comments),pos_count,neg_count)
예제 #2
0
def saveAppToDB(appinfo):
    post = {}
    post["catagory"]=appinfo.cata
    post["appname"]=appinfo.name
    # post["installnum"]=appinfo.installnum
    post["url"]=appinfo.url
    post["descripe"]=appinfo.descripe
    post["apk"]=appinfo.apk
    post["date"]=time.strftime('%Y-%m-%d',time.localtime(time.time()))
    # print(post)
    if not MongoUtil.isExist("app_table", {"catagory":appinfo.cata, "appname":appinfo.name}):
        MongoUtil.insert("app_table", post)
    print(appinfo.cata + appinfo.name)
예제 #3
0
def saveAllComentEmotionData():

    begin = False
    model,best_words = load_model()
    catas = json.load(open(const.WANDOUJIA_CATA_JSON_FILE))
    for cataname in catas:

        apps = MongoUtil.find("app_table",{"catagory":cataname})
        code = 0
        for app in apps:
            code+=1
            print(code,end=" ")
            if MongoUtil.isExist("emotion_comment",{"appid":appid}):
                print(appname+"已经存在了")
                break
            saveCommentEmotionData(model,best_words,app)
예제 #4
0
def deliveryWords(appinfo,filename):
    print(appinfo.name)
    contents = [line.strip() for line in open(filename)]
    wordlist = []
    line_num = 0
    result = MongoUtil.find_one("app_table", {"catagory":appinfo.cata, "appname":appinfo.name})
    if result==None:
        print("\""+appinfo.cata+" "+appinfo.name+"\" 未存入数据库中,请先存储")
        return
    appid = result['_id']

    result = MongoUtil.find_one(appinfo.cata, {"appid":appid})
    # result = MongoUtil.find_one("wordlocation_table",{"appid":appid})
    if result!=None:
        print("\""+appinfo.cata+" "+appinfo.name+"\" 已经分词存入数据库,不必重复")
        return

    for line in contents:
        time.sleep(0.1)
        line_num+=1
        # 去除乱码
        line = re.compile('[\\x00-\\x08\\x0b-\\x0c\\x0e-\\x1f]').sub(' ', line)
        # 使用全模式
        seglist = jieba.cut(line,cut_all=False)
        wordlist.append(seglist)
        for word in seglist:
            if word not in stopWords and word not in punctuations and word != '\n' and word!=' ' and not word.isdigit():
                # print(word,end=",")
                post_word = {}
                post_word["word"]=word
                if not MongoUtil.isExist("word_table", post_word):
                    MongoUtil.insert("word_table", post_word)

                result = MongoUtil.find_one("word_table", post_word)

                wordid = result['_id']
                if wordid==None:
                    print(post_word)
                post_location ={}
                post_location["appid"]=appid
                post_location["wordid"]=wordid
                post_location["location"]=line_num
                MongoUtil.insert(appinfo.cata, post_location)
예제 #5
0
def saveRecommendApps(date):
    apps = MongoUtil.find("app_table", {})
    recommendApps = []
    tem = []
    for app in apps:
        tem.append(app)
    for app in tem:
        recommend_info = getRecommendInfo(app, date)
        if recommend_info is None:
            continue
        if MongoUtil.isExist("recommend_table", {
                "appid": app["_id"],
                "date": date
        }):
            print(date + " " + app["appname"] + " 已经存在")
            continue
        print(app["appname"])
        recommendApps.append(recommend_info)
    MongoUtil.upsert_mary("recommend_table", recommendApps)
예제 #6
0
def delivery_words(appid,content):
    # 去除乱码
    content = re.compile('[\\x00-\\x08\\x0b-\\x0c\\x0e-\\x1f]').sub(' ', content)
    # 使用全模式
    seglist = jieba.cut(content,cut_all=False)
    for word in seglist:
        if word not in stopWords and word not in punctuations and word != '\n' and word!=' ' and not word.isdigit():
            post_word = {}
            post_word["word"]=word
            if not MongoUtil.isExist("word_table", post_word):
                MongoUtil.insert("word_table", post_word)

            result = MongoUtil.find_one("word_table", post_word)

            wordid = result['_id']
            if wordid==None:
                print(post_word)

            post_location ={}
            post_location["appid"]=appid
            post_location["wordid"]=wordid
            posts.append(post_location)
예제 #7
0
def scan_cata_app(cata):
    posts.clear()
    results = MongoUtil.find("app_table",{"catagory":cata})
    code = 0
    apps = []
    for item in results:
        apps.append(item)
    for app in apps:
        code+=1
        posts.clear()

        print(code,end=" ")
        print(app["appname"])

        if MongoUtil.isExist("app_detail_descripe",{"appid":app["_id"]}):
            continue
        content = read_descripe(cata,app["appname"])
        if content is not None:
            delivery_words(app["_id"],content)
        print(len(posts))
        # print(posts)
        print()
        if(len(posts) > 0):
            MongoUtil.upsert_mary("app_detail_descripe",posts)
예제 #8
0
def saveAppCapacityToDB(appid, date, capacity):
    post = {"appid": appid, "date": date}
    if not MongoUtil.isExist("capacity_table", post):
        post["capacity"] = capacity
        MongoUtil.save("capacity_table", post)