def saveAppToDB(appinfo): post = {} post["catagory"]=appinfo.cata post["appname"]=appinfo.name # post["installnum"]=appinfo.installnum post["url"]=appinfo.url post["descripe"]=appinfo.descripe post["apk"]=appinfo.apk post["date"]=time.strftime('%Y-%m-%d',time.localtime(time.time())) # print(post) if not MongoUtil.isExist("app_table", {"catagory":appinfo.cata, "appname":appinfo.name}): MongoUtil.insert("app_table", post) print(appinfo.cata + appinfo.name)
def deliveryWords(appinfo,filename): print(appinfo.name) contents = [line.strip() for line in open(filename)] wordlist = [] line_num = 0 result = MongoUtil.find_one("app_table", {"catagory":appinfo.cata, "appname":appinfo.name}) if result==None: print("\""+appinfo.cata+" "+appinfo.name+"\" 未存入数据库中,请先存储") return appid = result['_id'] result = MongoUtil.find_one(appinfo.cata, {"appid":appid}) # result = MongoUtil.find_one("wordlocation_table",{"appid":appid}) if result!=None: print("\""+appinfo.cata+" "+appinfo.name+"\" 已经分词存入数据库,不必重复") return for line in contents: time.sleep(0.1) line_num+=1 # 去除乱码 line = re.compile('[\\x00-\\x08\\x0b-\\x0c\\x0e-\\x1f]').sub(' ', line) # 使用全模式 seglist = jieba.cut(line,cut_all=False) wordlist.append(seglist) for word in seglist: if word not in stopWords and word not in punctuations and word != '\n' and word!=' ' and not word.isdigit(): # print(word,end=",") post_word = {} post_word["word"]=word if not MongoUtil.isExist("word_table", post_word): MongoUtil.insert("word_table", post_word) result = MongoUtil.find_one("word_table", post_word) wordid = result['_id'] if wordid==None: print(post_word) post_location ={} post_location["appid"]=appid post_location["wordid"]=wordid post_location["location"]=line_num MongoUtil.insert(appinfo.cata, post_location)
def delivery_words(appid,content): # 去除乱码 content = re.compile('[\\x00-\\x08\\x0b-\\x0c\\x0e-\\x1f]').sub(' ', content) # 使用全模式 seglist = jieba.cut(content,cut_all=False) for word in seglist: if word not in stopWords and word not in punctuations and word != '\n' and word!=' ' and not word.isdigit(): post_word = {} post_word["word"]=word if not MongoUtil.isExist("word_table", post_word): MongoUtil.insert("word_table", post_word) result = MongoUtil.find_one("word_table", post_word) wordid = result['_id'] if wordid==None: print(post_word) post_location ={} post_location["appid"]=appid post_location["wordid"]=wordid posts.append(post_location)