예제 #1
0
def getDowloadCapacity(appname, cataname=""):
    app_capacity = {}

    capacity_table = "capacity_table"
    app_table = "app_table"
    if cataname == "":
        app = MongoUtil.find_one(app_table, {"appname": appname})
    else:
        app = MongoUtil.find_one(app_table, {
            "catagory": cataname,
            "appname": appname
        })
    if app == None:
        print(cataname + appname + "不存在")
        return
    else:
        app_id = app["_id"]
        cur = MongoUtil.find(capacity_table, {"appid": app_id})
        for item in cur:
            date = item["date"]
            capacity = item["capacity"]
            if not capacity.isdigit():
                capacity = install2num(capacity)
            app_capacity[date] = capacity

    return app_capacity
예제 #2
0
def saveCommentEmotionData(model,best_words,app):

    time.sleep(1)
    appid = app["_id"]
    appname = app["appname"]
    cataname = app["catagory"]

    if MongoUtil.isExist("emotion_comment",{"appid":appid}):
        print(appname+"已经存在了")
        print()
        return

    results = MongoUtil.find(cataname,{"appid":appid})
    print(cataname,appname)
    comments = {}
    pos_count = 0
    neg_count = 0

    for item in results:
        word_id = item["wordid"]
        location = item["location"]
        word = MongoUtil.find_one("word_table",{"_id":word_id})["word"]
        comments.setdefault(location,[])
        comments[location].append(word)

    for key in comments.keys():
        comment_words = comments[key]
        pred = predict(model,comment_words,best_words)
        emotion = judgeCommentEmotion(pred.prob('pos'),pred.prob('neg'))
        if emotion == 1 : pos_count += 1
        if emotion == 2 : neg_count += 1

    savetoDB(appid,len(comments),pos_count,neg_count)
예제 #3
0
def scanAppInfo(appname,catagory=""):
    if catagory=="":
        apps = MongoUtil.find("app_table", {"appname":appname})
    else:
        apps = MongoUtil.find("app_table", {"catagory":catagory, "appname":appname})

    for appinfo in apps:
        print("基本信息: ")
        print(appinfo)
        catagory = appinfo["catagory"]
        dir = "../file/apps_detail_descripe/"+catagory+"/"+appinfo["appname"]+".json"

        if os.path.exists(dir):
            f = open(dir)
            print("应用描述:")
            print(f.read())
        print()
예제 #4
0
def showData(cataname):
    print("总app数量:" + str(MongoUtil.count("app_table")))
    print("word数量:" + str(MongoUtil.count("word_table")))
    appCount = MongoUtil.find("app_table", {"catagory":cataname}).count()
    print(cataname+"的 app数量: "+str(appCount))
    locationCount = 0
    cataname = cataname.strip()
    print(cataname +"的 location 数量:" + str(MongoUtil.count(cataname)))
    locationCount += len(MongoUtil.distinct_count(cataname, "appid"))
    print("已获取评论的 app数量:"+str(locationCount))
    print("未获取评论的 app数量:"+str(appCount-locationCount))
예제 #5
0
def scanCatagorys():
    catas = json.load(open(const.WANDOUJIA_CATA_JSON_FILE))
    print("所有目录信息:")
    code = 0
    for cataname in catas:
        code+=1
        cataname = cataname.strip()
        print(str(code)+". "+cataname,end=" ")
        scanCatagoryInfo(cataname)
    print()
    count = MongoUtil.find("app_table",{}).count()
    print("总数:" + str(count))
예제 #6
0
 def frequencyscore(self):
     worddict = {}
     wordcount = 0
     cur = MongoUtil.find(self.app["catagory"], {"appid": self.app["_id"]})
     for locationinfo in cur:
         wordinfo = MongoUtil.find_one("word_table",
                                       {"_id": locationinfo["wordid"]})
         word = wordinfo["word"]
         worddict.setdefault(word, 0)
         worddict[word] += 1
         wordcount += 1
     return worddict, wordcount
예제 #7
0
def saveAllComentEmotionData():

    begin = False
    model,best_words = load_model()
    catas = json.load(open(const.WANDOUJIA_CATA_JSON_FILE))
    for cataname in catas:

        apps = MongoUtil.find("app_table",{"catagory":cataname})
        code = 0
        for app in apps:
            code+=1
            print(code,end=" ")
            if MongoUtil.isExist("emotion_comment",{"appid":appid}):
                print(appname+"已经存在了")
                break
            saveCommentEmotionData(model,best_words,app)
예제 #8
0
def saveRecommendApps(date):
    apps = MongoUtil.find("app_table", {})
    recommendApps = []
    tem = []
    for app in apps:
        tem.append(app)
    for app in tem:
        recommend_info = getRecommendInfo(app, date)
        if recommend_info is None:
            continue
        if MongoUtil.isExist("recommend_table", {
                "appid": app["_id"],
                "date": date
        }):
            print(date + " " + app["appname"] + " 已经存在")
            continue
        print(app["appname"])
        recommendApps.append(recommend_info)
    MongoUtil.upsert_mary("recommend_table", recommendApps)
예제 #9
0
def get_app_each_comment(appname,cataname =""):
    if cataname == "":
        app = MongoUtil.find_one("app_table", {"appname":appname})
    else:
        app = MongoUtil.find_one("app_table", {"catagory":cataname, "appname":appname})
    print(app)
    if app is None:
        return
    app_id = app["_id"]
    app_cata = app["catagory"]
    results = MongoUtil.find(app_cata,{"appid":app_id})
    comments = {}

    for item in results:
        word_id = item["wordid"]
        location = item["location"]
        word = MongoUtil.find_one("word_table",{"_id":word_id})["word"]
        comments.setdefault(location,[])
        comments[location].append(word)
    return comments
예제 #10
0
def scan_cata_app(cata):
    posts.clear()
    results = MongoUtil.find("app_table",{"catagory":cata})
    code = 0
    apps = []
    for item in results:
        apps.append(item)
    for app in apps:
        code+=1
        posts.clear()

        print(code,end=" ")
        print(app["appname"])

        if MongoUtil.isExist("app_detail_descripe",{"appid":app["_id"]}):
            continue
        content = read_descripe(cata,app["appname"])
        if content is not None:
            delivery_words(app["_id"],content)
        print(len(posts))
        # print(posts)
        print()
        if(len(posts) > 0):
            MongoUtil.upsert_mary("app_detail_descripe",posts)
예제 #11
0
def getCapacityCount(date):
    return MongoUtil.find("capacity_table", {"date": date}).count()
예제 #12
0
#{'appid': ObjectId('58648f1282939b10b3d46b88'), 'wilson_lower_score': 0.3208923096194997, 'comment_count': 499, 'neg_count': 254, 'applause_rate': 0.31956521739130433, 'pos_count': 147, 'wilson_top_score': 0.4148067884968993}


#将文本转化为安装数量
def install2num(install):
    result = (float)(re.findall(r"\d+\.?\d*",install)[0])
    if result==0:
        return 0
    if '亿' in install:
        result*=100000000
    if '万' in install:
        result*=10000
    return int(result)

allApps = MongoUtil.find("capacity_table",{})

datas = []
code = 0
for appinfo in allApps:
    code += 1
    _id = appinfo["_id"]
    appid = appinfo["appid"]
    date = appinfo["date"]
    capacity = appinfo["capacity"]
    capacity_num = install2num(capacity)

    data = {"_id":_id,"appid":appid,"date":date,"capacity":capacity, "capacity_num":capacity_num}
    print(data)
    datas.append(data)
예제 #13
0
def scanCatagoryInfo(catagory):
    count = MongoUtil.find("app_table",{"catagory":catagory}).count()
    print("app数量: "+str(count))
예제 #14
0
            return

        if incre[1] <= 0:
            post["wilson_lower_rate"] = -WilsonScoreUtil.confidence_2(
                -incre[1], capacity)
        else:
            post["wilson_lower_rate"] = WilsonScoreUtil.confidence_2(
                incre[1], capacity)
        # print(post)
        posts.append(post)
    MongoUtil.upsert_mary("capacity_rate_table", posts)


if __name__ == '__main__':
    #聊天社交
    catas = json.load(open(const.WANDOUJIA_CATA_JSON_FILE))
    for cata in catas:
        #     cata = "生活服务"
        posts.clear()
        is_not_exist.clear()
        if cata in []:
            continue

        print("目录:" + cata)
        appinfo_list = MongoUtil.find("app_table", {"catagory": cata})
        for appinfo in appinfo_list:
            getChainRateStore(appinfo)

        print(len(posts))
        pickle.dump(is_not_exist,
                    open('../file/not_exist/not_exist_appid', 'wb'))