Ejemplo n.º 1
0
def calculate_hoax_score(url, query, searched_content):
    stop_count = 0
    hoax_score = 0

    if (stop_count > 10):
        hoax_score += 0
    stop_count += 1

    # search for similar content from RSS feed sites
    for url_rss in getTrusted.get_hoax_links():
        rss_limit = 0
        d = feedparser.parse(url_rss)
        for news in d['entries']:
            # TODO: replace with gensim or other method
            rss_limit += 1
            if (rss_limit > 10):
                break
            if (fuzz.token_set_ratio(query, news['title_detail']['value']) >
                    80):
                link = news['link']
                paper = Article(link, language='en')
                paper.download()
                paper.parse()
                text = ' '.join(paper.text[:].split('\n'))
                # TODO: replace with gensim or other method for efficient comparison
                if (fuzz.token_set_ratio(text, searched_content) > 80):
                    hoax_score += 0
                else:
                    hoax_score += 1
            else:
                pass
    hoax_score += url_check(trusted, hoax, url)
    hoax_score += sentiment_check(url, sid)
    return hoax_score
Ejemplo n.º 2
0
def calculate_hoax_score(url, query, searched_content):
    stop_count = 0
    # for url in search(query, stop=1):
    hoax_score = 0

    if (stop_count > 10):
        hoax_score += 0
        #break
    stop_count += 1

    # print("Searched query: "+url)
    # gets searched query's article's content
    #    searched_url_content = Article(url, language='en')
    #    searched_url_content.download()
    #    searched_url_content.parse()
    #    searched_content = ' '.join(searched_url_content.text[:].split('\n'))

    # search for similar content from RSS feed sites
    for url_rss in getTrusted.get_hoax_links():
        # print("RSS url: "+url_rss)
        rss_limit = 0
        d = feedparser.parse(url_rss)
        for news in d['entries']:
            # TODO: replace with gensim or other method
            rss_limit += 1
            if (rss_limit > 10):
                break
            if (fuzz.token_set_ratio(query, news['title_detail']['value']) >
                    80):
                # print("title matches")
                link = news['link']
                paper = Article(link, language='en')
                paper.download()
                paper.parse()
                text = ' '.join(paper.text[:].split('\n'))
                # TODO: replace with gensim or other method for efficient comparison
                if (fuzz.token_set_ratio(text, searched_content) > 80):
                    # print(x," and the news is        ",news['title_detail']['value'])
                    # print("this news is authentic")
                    hoax_score += 0
                else:
                    # print(x," and the news is        ",news['title_detail']['value'])
                    # print("this news is hoax")
                    hoax_score += 1
            else:
                pass
    hoax_score += url_check(trusted, hoax, url)
    hoax_score += sentiment_check(url, sid)
    # print("Score is ",hoax_score)
    return hoax_score
Ejemplo n.º 3
0
os.chdir(authentic_folder)

url_count = 1

for url in authentic_urls:
    print("URL Count = {0}".format(url_count))
    url_count += 1
    if (url_count > 10):
        break
    d = feedparser.parse(url)
    titles.extend(web_gen.get_news_data(d, 0))
    # create_files(d)

os.chdir('../..')

hoax_urls = getTrusted.get_hoax_links()
print("Number of hoax links = {0}".format(len(hoax_urls)))
hoax_folder = r'Hoax'
if not os.path.exists(hoax_folder):
    os.makedirs(hoax_folder)
os.chdir(hoax_folder)

url_count = 1
count = 1

for url in hoax_urls:
    print("URL Count = {0}".format(url_count))
    url_count += 1
    if (url_count > 10):
        break
    d = feedparser.parse(url)