def bins_svm_approach(llimit,ulimit,isphrase,pathname): posbinlist=[] negbinlist=[] trainingdata=[] trainingclass=[] bin_train_set=[] totalcount=ulimit-llimit lpcount=0 cnt_var=0 print '\nNo of +ve reviews scanned for training : ' for fid in movie_reviews.fileids(categories=['pos'])[llimit:ulimit]: testbin=proximity_tagger.bin_list(movie_reviews.abspath(fid),isphrase,cnt_var,0,pathname) posbinlist.append(testbin) lpcount+=1 cnt_var+=1 print 'Scanning +ve review ',lpcount,'.'*10,(float(lpcount)*100/float(totalcount)),'%' lpcount=0 cnt_var=0 print '\nNo of -ve reviews scanned for training : ' for fid in movie_reviews.fileids(categories=['neg'])[llimit:ulimit]: testbin=proximity_tagger.bin_list(movie_reviews.abspath(fid),isphrase,cnt_var,1,pathname) negbinlist.append(testbin) lpcount+=1 cnt_var+=1 print 'Scanning -ve review ',lpcount,'.'*10,(float(lpcount)*100/float(totalcount)),'%' lpcount=0 totalcount=len(posbinlist) print '\nNo of +ve reviews trained : ' trainingdata.extend(posbinlist) for i in range(totalcount): trainingclass.append(1) lpcount+=1 print 'Training +ve review ',lpcount,'.'*10,(float(lpcount)*100/float(totalcount)),'%' lpcount=0 totalcount=len(negbinlist) print '\nNo of -ve reviews trained : ' trainingdata.extend(negbinlist) for i in range(totalcount): trainingclass.append(0) lpcount+=1 print 'Training -ve review ',lpcount,'.'*10,(float(lpcount)*100/float(totalcount)),'%' bin_train_set.append(trainingdata) bin_train_set.append(trainingclass) f = open('train_result\proximity_bin_train_result_'+str(isphrase),'w') json.dump(bin_train_set,f) f.close()
def review_test(isphrase): bin_testset=[] f = open('train_result/proximity_bin_train_result_'+str(isphrase),'r') bin_testset=json.load(f) f.close() #clf=SVC() clf=LinearSVC() clf.fit(bin_testset[0],bin_testset[1]) pattern_testset=[] f = open('train_result/proximity_pattern_train_result_'+str(isphrase),'r') pattern_testset=json.load(f) f.close() #clf2=SVC() clf2=LinearSVC() clf2.fit(pattern_testset[0],pattern_testset[1]) count=0 cnt_var=0 pat_val=-1 bin_val=-1 med_val = median_result('samplereview.txt',isphrase) temp_class1= clf.predict(proximity_tagger.bin_list('samplereview.txt',isphrase)) if temp_class1 == [1]: bin_val=1 temp_class2= clf2.predict(proximity_tagger.pattern_list('samplereview.txt',isphrase)) numsum=sum(temp_class2) if numsum > (len(temp_class2)/2): pat_val=1 isreviewpositive = 0 if (med_val+bin_val+pat_val)>0: isreviewpositive = 1 return isreviewpositive