def getDowloadCapacity(appname, cataname=""): app_capacity = {} capacity_table = "capacity_table" app_table = "app_table" if cataname == "": app = MongoUtil.find_one(app_table, {"appname": appname}) else: app = MongoUtil.find_one(app_table, { "catagory": cataname, "appname": appname }) if app == None: print(cataname + appname + "不存在") return else: app_id = app["_id"] cur = MongoUtil.find(capacity_table, {"appid": app_id}) for item in cur: date = item["date"] capacity = item["capacity"] if not capacity.isdigit(): capacity = install2num(capacity) app_capacity[date] = capacity return app_capacity
def saveCommentEmotionData(model,best_words,app): time.sleep(1) appid = app["_id"] appname = app["appname"] cataname = app["catagory"] if MongoUtil.isExist("emotion_comment",{"appid":appid}): print(appname+"已经存在了") print() return results = MongoUtil.find(cataname,{"appid":appid}) print(cataname,appname) comments = {} pos_count = 0 neg_count = 0 for item in results: word_id = item["wordid"] location = item["location"] word = MongoUtil.find_one("word_table",{"_id":word_id})["word"] comments.setdefault(location,[]) comments[location].append(word) for key in comments.keys(): comment_words = comments[key] pred = predict(model,comment_words,best_words) emotion = judgeCommentEmotion(pred.prob('pos'),pred.prob('neg')) if emotion == 1 : pos_count += 1 if emotion == 2 : neg_count += 1 savetoDB(appid,len(comments),pos_count,neg_count)
def scanAppInfo(appname,catagory=""): if catagory=="": apps = MongoUtil.find("app_table", {"appname":appname}) else: apps = MongoUtil.find("app_table", {"catagory":catagory, "appname":appname}) for appinfo in apps: print("基本信息: ") print(appinfo) catagory = appinfo["catagory"] dir = "../file/apps_detail_descripe/"+catagory+"/"+appinfo["appname"]+".json" if os.path.exists(dir): f = open(dir) print("应用描述:") print(f.read()) print()
def showData(cataname): print("总app数量:" + str(MongoUtil.count("app_table"))) print("word数量:" + str(MongoUtil.count("word_table"))) appCount = MongoUtil.find("app_table", {"catagory":cataname}).count() print(cataname+"的 app数量: "+str(appCount)) locationCount = 0 cataname = cataname.strip() print(cataname +"的 location 数量:" + str(MongoUtil.count(cataname))) locationCount += len(MongoUtil.distinct_count(cataname, "appid")) print("已获取评论的 app数量:"+str(locationCount)) print("未获取评论的 app数量:"+str(appCount-locationCount))
def scanCatagorys(): catas = json.load(open(const.WANDOUJIA_CATA_JSON_FILE)) print("所有目录信息:") code = 0 for cataname in catas: code+=1 cataname = cataname.strip() print(str(code)+". "+cataname,end=" ") scanCatagoryInfo(cataname) print() count = MongoUtil.find("app_table",{}).count() print("总数:" + str(count))
def frequencyscore(self): worddict = {} wordcount = 0 cur = MongoUtil.find(self.app["catagory"], {"appid": self.app["_id"]}) for locationinfo in cur: wordinfo = MongoUtil.find_one("word_table", {"_id": locationinfo["wordid"]}) word = wordinfo["word"] worddict.setdefault(word, 0) worddict[word] += 1 wordcount += 1 return worddict, wordcount
def saveAllComentEmotionData(): begin = False model,best_words = load_model() catas = json.load(open(const.WANDOUJIA_CATA_JSON_FILE)) for cataname in catas: apps = MongoUtil.find("app_table",{"catagory":cataname}) code = 0 for app in apps: code+=1 print(code,end=" ") if MongoUtil.isExist("emotion_comment",{"appid":appid}): print(appname+"已经存在了") break saveCommentEmotionData(model,best_words,app)
def saveRecommendApps(date): apps = MongoUtil.find("app_table", {}) recommendApps = [] tem = [] for app in apps: tem.append(app) for app in tem: recommend_info = getRecommendInfo(app, date) if recommend_info is None: continue if MongoUtil.isExist("recommend_table", { "appid": app["_id"], "date": date }): print(date + " " + app["appname"] + " 已经存在") continue print(app["appname"]) recommendApps.append(recommend_info) MongoUtil.upsert_mary("recommend_table", recommendApps)
def get_app_each_comment(appname,cataname =""): if cataname == "": app = MongoUtil.find_one("app_table", {"appname":appname}) else: app = MongoUtil.find_one("app_table", {"catagory":cataname, "appname":appname}) print(app) if app is None: return app_id = app["_id"] app_cata = app["catagory"] results = MongoUtil.find(app_cata,{"appid":app_id}) comments = {} for item in results: word_id = item["wordid"] location = item["location"] word = MongoUtil.find_one("word_table",{"_id":word_id})["word"] comments.setdefault(location,[]) comments[location].append(word) return comments
def scan_cata_app(cata): posts.clear() results = MongoUtil.find("app_table",{"catagory":cata}) code = 0 apps = [] for item in results: apps.append(item) for app in apps: code+=1 posts.clear() print(code,end=" ") print(app["appname"]) if MongoUtil.isExist("app_detail_descripe",{"appid":app["_id"]}): continue content = read_descripe(cata,app["appname"]) if content is not None: delivery_words(app["_id"],content) print(len(posts)) # print(posts) print() if(len(posts) > 0): MongoUtil.upsert_mary("app_detail_descripe",posts)
def getCapacityCount(date): return MongoUtil.find("capacity_table", {"date": date}).count()
#{'appid': ObjectId('58648f1282939b10b3d46b88'), 'wilson_lower_score': 0.3208923096194997, 'comment_count': 499, 'neg_count': 254, 'applause_rate': 0.31956521739130433, 'pos_count': 147, 'wilson_top_score': 0.4148067884968993} #将文本转化为安装数量 def install2num(install): result = (float)(re.findall(r"\d+\.?\d*",install)[0]) if result==0: return 0 if '亿' in install: result*=100000000 if '万' in install: result*=10000 return int(result) allApps = MongoUtil.find("capacity_table",{}) datas = [] code = 0 for appinfo in allApps: code += 1 _id = appinfo["_id"] appid = appinfo["appid"] date = appinfo["date"] capacity = appinfo["capacity"] capacity_num = install2num(capacity) data = {"_id":_id,"appid":appid,"date":date,"capacity":capacity, "capacity_num":capacity_num} print(data) datas.append(data)
def scanCatagoryInfo(catagory): count = MongoUtil.find("app_table",{"catagory":catagory}).count() print("app数量: "+str(count))
return if incre[1] <= 0: post["wilson_lower_rate"] = -WilsonScoreUtil.confidence_2( -incre[1], capacity) else: post["wilson_lower_rate"] = WilsonScoreUtil.confidence_2( incre[1], capacity) # print(post) posts.append(post) MongoUtil.upsert_mary("capacity_rate_table", posts) if __name__ == '__main__': #聊天社交 catas = json.load(open(const.WANDOUJIA_CATA_JSON_FILE)) for cata in catas: # cata = "生活服务" posts.clear() is_not_exist.clear() if cata in []: continue print("目录:" + cata) appinfo_list = MongoUtil.find("app_table", {"catagory": cata}) for appinfo in appinfo_list: getChainRateStore(appinfo) print(len(posts)) pickle.dump(is_not_exist, open('../file/not_exist/not_exist_appid', 'wb'))