def calculate_hoax_score(url, query, searched_content): stop_count = 0 hoax_score = 0 if (stop_count > 10): hoax_score += 0 stop_count += 1 # search for similar content from RSS feed sites for url_rss in getTrusted.get_hoax_links(): rss_limit = 0 d = feedparser.parse(url_rss) for news in d['entries']: # TODO: replace with gensim or other method rss_limit += 1 if (rss_limit > 10): break if (fuzz.token_set_ratio(query, news['title_detail']['value']) > 80): link = news['link'] paper = Article(link, language='en') paper.download() paper.parse() text = ' '.join(paper.text[:].split('\n')) # TODO: replace with gensim or other method for efficient comparison if (fuzz.token_set_ratio(text, searched_content) > 80): hoax_score += 0 else: hoax_score += 1 else: pass hoax_score += url_check(trusted, hoax, url) hoax_score += sentiment_check(url, sid) return hoax_score
def calculate_hoax_score(url, query, searched_content): stop_count = 0 # for url in search(query, stop=1): hoax_score = 0 if (stop_count > 10): hoax_score += 0 #break stop_count += 1 # print("Searched query: "+url) # gets searched query's article's content # searched_url_content = Article(url, language='en') # searched_url_content.download() # searched_url_content.parse() # searched_content = ' '.join(searched_url_content.text[:].split('\n')) # search for similar content from RSS feed sites for url_rss in getTrusted.get_hoax_links(): # print("RSS url: "+url_rss) rss_limit = 0 d = feedparser.parse(url_rss) for news in d['entries']: # TODO: replace with gensim or other method rss_limit += 1 if (rss_limit > 10): break if (fuzz.token_set_ratio(query, news['title_detail']['value']) > 80): # print("title matches") link = news['link'] paper = Article(link, language='en') paper.download() paper.parse() text = ' '.join(paper.text[:].split('\n')) # TODO: replace with gensim or other method for efficient comparison if (fuzz.token_set_ratio(text, searched_content) > 80): # print(x," and the news is ",news['title_detail']['value']) # print("this news is authentic") hoax_score += 0 else: # print(x," and the news is ",news['title_detail']['value']) # print("this news is hoax") hoax_score += 1 else: pass hoax_score += url_check(trusted, hoax, url) hoax_score += sentiment_check(url, sid) # print("Score is ",hoax_score) return hoax_score
os.chdir(authentic_folder) url_count = 1 for url in authentic_urls: print("URL Count = {0}".format(url_count)) url_count += 1 if (url_count > 10): break d = feedparser.parse(url) titles.extend(web_gen.get_news_data(d, 0)) # create_files(d) os.chdir('../..') hoax_urls = getTrusted.get_hoax_links() print("Number of hoax links = {0}".format(len(hoax_urls))) hoax_folder = r'Hoax' if not os.path.exists(hoax_folder): os.makedirs(hoax_folder) os.chdir(hoax_folder) url_count = 1 count = 1 for url in hoax_urls: print("URL Count = {0}".format(url_count)) url_count += 1 if (url_count > 10): break d = feedparser.parse(url)