def initialize(self, article): similarity_array = [] # similarity_array.append(article) test = self.articleSummerization(article, 1) # in one line # for i in summerizedSentence: # test=str(i) print('-------Summerized Title-------') print(test) sitesContainingArticle, scrapId = self.googleSearch(article) print('sites_length_after_google search', len(sitesContainingArticle)) for index, url in enumerate(sitesContainingArticle): print('URL ', url, scrapId[index], '\n') raw_html = self.simple_get(url) #full page site content try: soup = BeautifulSoup( raw_html, 'html.parser') #proper formattinh raw_html # print('hua idhar') # print(soup) except Exception as e: print(e) return 0, [] _ = [s.extract() for s in soup('script')] soup_article = soup.find_all('div', {"class": scrapId[index]}) # print(soup_article) article_string = '' for data in soup_article: # print(data) article_string += data.text # article_string += data.text # print(article_string) if not article_string == '': # print('aaya\n') similarity_array.append( self.articleSummerization(article_string, 5)) else: print('nahi aaya\n') pass # for c in similarity_array: # print('\n\n\n',c) mylsa = LSA() wmdinit = WordMoverDistance() length = len(similarity_array) # print(length) if length == 0: return 0, sitesContainingArticle else: count = 0 score_array = [] while (count < length): print('\n\n', similarity_array[count]) lsa_similarity = mylsa.start([article + ' ' + article] + similarity_array, count + 1) wmdinit.data_accept(similarity_array[count], article) wmddistance = wmdinit.model() print('wordmover distance is', wmddistance) fuzzy = Fuzzy(lsa_similarity, wmddistance) score = fuzzy.get_score_data() # score = score/10 print('final score ', score) score_array.append(score) count = count + 1 score_array = sorted(score_array, key=lambda x: x, reverse=True) return min(100, np.around(sum(score_array[:2]), decimals=2) * 100), sitesContainingArticle # wmdinit=wordmover.WordMoverDistance(titles[count],titles[0]) # wmddistance=wmdinit.model()