def classifyMethod(aa,trainData,model,stop): simples=[] for line in aa: html=line[4] soup=BeautifulSoup(html).contents[0].contents[0] for singleSentence in soup: singleSentence=dealSentence(singleSentence.get_text()) simples.append(singleSentence) a=[] nn=0 for line in simples: # res=classifyCate(line,trainData,model,stop) scores=[] for type in trainData: tempScore=2 for sentence in type: sc=similarSentence(sentence,line,distance,model,stop) if sc<tempScore: tempScore=sc scores.append(tempScore) #print res [2,1,2,3] a.append(scores) print time.strftime("%X",time.localtime())+"\t"+str(nn) nn=nn+1 #gc.collect() print time.strftime("%X",time.localtime())+"\t end of the train process" return a
def falseReviewSimilar(typereviews,review,model): stop=createStopword() review=dealSentence(review) savedArray=[] for rr in typereviews: rr=dealSentence(rr) savedArray.append(similarSentence(review,rr,distance,model,stop)) print time.strftime("%X",time.localtime())+"\tend of deal similar false reviews" return savedArray
def classifyCate(line,trainData,model,stop): scores=[] for type in trainData: tempScore=2 for sentence in type: sc=similarSentence(sentence,line,distance,model,stop) if sc<tempScore: tempScore=sc scores.append(tempScore) return scores
def falseReviews(reviews,model): reviewsArray=json.loads(reviews) ll=len(reviewsArray) stop=createStopword() b=dealSentence(reviewsArray[ll-1]) savedArray=[] for i in range(ll-1): a=dealSentence(reviewsArray[i]) savedArray.append(similarSentence(a,b,distance,model,stop)) print time.strftime("%X",time.localtime())+"\tend of deal false review" return savedArray
def reclassifyMethod(aa,trainData,model,stop): print time.strftime("%X",time.localtime())+"\tReclassify the reviews data" #trainData是被训练的句子,aa是整个评论数据集 simples=[] for line in aa: html=line[4] soup=BeautifulSoup(html).contents[0].contents[0] for singleSentence in soup: singleSentence=dealSentence(singleSentence.get_text()) simples.append(singleSentence) scores=[] for line in simples: res=similarSentence(trainData,line,distance,model,stop) if res<2: scores.append(res) else: scores.append(2) return scores