def Advanced(self,string,html,link): start_time = time.time() global i global j global data2 global class2 if(i==0): stemmed_text,link_stemmed_text,c = ContentAnalyzer_Utils(string,html,link).read_text() path = directory.path() doc = open(path+'/text/'+'news.txt', 'r').read() self.news_list = Stemming().stem(doc) doc = open(path+'/text/'+'games.txt', 'r').read() self.games_list = Stemming().stem(doc) doc = open(path+'/text/'+'medicine.txt', 'r').read() self.med_list = Stemming().stem(doc) pickle.dump(stemmed_text, open('pfile_text.p','wb')) pickle.dump(link_stemmed_text,open('pfile_link.p','wb')) region_text,region_link,length_list_link_text,length_link_text = Bayesian(i,c).learning() p = Content_Classifier_2(region_text,region_link,length_list_link_text,length_link_text) k = 0 sitecounter = 0 for u in range(1,7): data2[k] = p.classify_text(u) k = k + 1 data2[k] = p.classify_link(u) k = k + 1 i = 1 elif(i==1): stemmed_text,link_stemmed_text,c = ContentAnalyzer_Utils(string,html,link).read_text() pickle.dump(stemmed_text, open('pfile_text.p','wb')) pickle.dump(link_stemmed_text,open('pfile_link.p','wb')) region_text,region_link,length_list_link_text,length_link_text = Bayesian(i,c).learning() p = Content_Classifier_2(region_text,region_link,length_list_link_text,length_link_text ) k = 0 sitecounter = 0 for u in range(1,7): data2[k] = p.classify_text(u) k = k + 1 data2[k] = p.classify_link(u) k = k + 1 s = [] s[:] = data2 value = self.check_advanced(data2,stemmed_text) if(sum(data2) !=12): value = value + ' ' +self.reason(data2) elif(sum(data2) == 12): value = 'SAFE' if sum(data2)==0 and len(stemmed_text) <=20: value = "Not-Enough-Text-on-Site" elif len(stemmed_text)==0 : value = "ERROR" return s, value
def openandstem(self,file1): path = directory.path() doc = open(path+'/text/'+file1, 'r').read() return Stemming().stem(doc)