def __init__(self): get = gd.get_data() concerns = get.concerns() final_concerns = [] limit = 0 for con in concerns: if limit < 3: final_concerns.append(con) limit += 1 dbs = dbase.access_db() dbs.get_file('twitter_concerns_inf', 'DB/twitter_concerns_inf.json') with open('DB/twitter_concerns_inf.json', 'r') as db_file: db_data = json.load(db_file) with open('raw/twitter_concerns.json', 'r') as tc_file: tc_data = json.load(tc_file) with open('raw/twitter_concerns_inf.json', 'w') as js_file: js_data = {} for i in db_data: js_data[i] = db_data[i] + tc_data[i] top_list = sorted(js_data.items(), key=lambda kv: kv[1], reverse=True) limit = 0 for i in range(len(top_list)): if limit < 3: print(top_list[i][0], final_concerns) if top_list[i][0] not in final_concerns: final_concerns.append(top_list[i][0]) limit += 1 json.dump(js_data, js_file, indent=4, sort_keys=True) with open('clean/final_concerns_inf.txt', 'a') as final: for final_con in final_concerns: final.write(final_con + '\n') os.remove('DB/twitter_concerns_inf.json')
def __init__(self): get = gd.get_data() mod = md.modify_data() dbs = dbase.access_db() json_data = {} dbs.get_file('tweet_scores_inf', 'DB/clean/tweet_scores_inf.json') with open('DB/clean/tweet_scores_inf.json', 'r') as json_file: dbs_data = json.load(json_file) with open('clean/final_tweets.json', 'r') as json_file: data = json.load(json_file) senators = get.senators() concerns = get.concerns() for sen in senators: for con in concerns: json_data[sen + ' - ' + con] = [] total_tweets = len(data[sen][con]) pos = 0 neg = 0 neu = 0 pos_tweets = [] neg_tweets = [] neu_tweets = [] for i in range(total_tweets): tweet = data[sen][con][i]['tweet_text2'] text = TextBlob(tweet) result = text.sentiment.polarity score = self.check_score(data[sen][con][i]['user_verified'], data[sen][con][i]['user_created'], data[sen][con][i]['user_follower'], data[sen][con][i]['is_retweet']) if text.sentiment.polarity >= 0.1: pos += score pos_tweets.append(tweet) print('POSITIVE', text.sentiment.polarity, tweet) elif text.sentiment.polarity <= -0.1: neg += score neg_tweets.append(tweet) print('NEGATIVE', text.sentiment.polarity, tweet) else: neu += score neu_tweets.append(tweet) print('NEUTRAL', text.sentiment.polarity, tweet) with open('common_words.txt', 'a') as common_words: tweet = mod.translate(tweet) tweet = mod.remove_stopwords(tweet) text = nltk.word_tokenize(tweet) posTagged = pos_tag(text) result = [(word, map_tag('en-ptb', 'universal', tag)) for word, tag in posTagged] for res in result: if res[1] == 'NOUN' or res[1] == 'VERB' or res[1] == 'ADJ': if res[0] != sen and res[0] not in con: text = res[0] + ' ' common_words.write(text) total = pos + neg + neu json_data[sen + ' - ' + con].append({ 'pos': pos, 'neg': neg, 'neu': neu, 'total': total, 'num_tweets': total_tweets, 'pos_tweets': pos_tweets, 'neg_tweets': neg_tweets, 'neu_tweets': neu_tweets }) try: for pt in pos_tweets: dbs_data[sen + ' - ' + con][0]['pos_tweets'].append(pt) for nt in neg_tweets: dbs_data[sen + ' - ' + con][0]['neg_tweets'].append(nt) for nt in neu_tweets: dbs_data[sen + ' - ' + con][0]['neu_tweets'].append(nt) dbs_data[sen + ' - ' + con][0]['pos'] += pos dbs_data[sen + ' - ' + con][0]['neg'] += neg dbs_data[sen + ' - ' + con][0]['neu'] += neu except KeyError: json_data[sen + ' - ' + con] = [] json_data[sen + ' - ' + con].append({ 'pos': pos, 'neg': neg, 'neu': neu, 'total': total, 'num_tweets': total_tweets, 'pos_tweets': pos_tweets, 'neg_tweets': neg_tweets, 'neu_tweets': neu_tweets }) if total != 0: print(sen + ' - ' + con) print('Positive: ' + str(round(pos/total*100, 2)) + '%\nNegative: ' + str(round(neg/total*100, 2)) + '%\nNeutral: ' + str(round(neu/total*100, 2)) + '%') words = re.findall(r'\w+', open('common_words.txt').read().lower()) count = Counter(words).most_common(3) common = '' for cnt in count: common = common + cnt[0] + ' ' print('General Keywords: ' + common) os.remove("common_words.txt") print('From ' + str(total_tweets) + ' tweets.\n') with open('clean/tweet_scores.json', 'w') as json_file: json.dump(json_data, json_file, indent=4, sort_keys=True) with open('clean/tweet_scores_inf.json', 'w') as json_file: json.dump(dbs_data, json_file, indent=4, sort_keys=True) os.remove("DB/clean/tweet_scores_inf.json")
# from modules import search_twitter as st # from modules import rss # from modules import triangulation as tr # from modules import sentiment_analysis as sa from modules import dbase # st.gather_concerns() # CHANGE IP # st.gather_tweets() # CHANGE IP # rss.gather_rss() # CHANGE IP # tr.compare_tweet_rss() # sa.analyze_tweets() dbs = dbase.access_db() dbs.insert_all_file()
def __init__(self, week): dbs = dbase.access_db() news_urls = { 'gmanews1': 'https://data.gmanews.tv/gno/rss/news/nation/feed.xml', 'gmanews2': 'https://data.gmanews.tv/gno/rss/news/regions/feed.xml', 'gmanews3': 'https://data.gmanews.tv/gno/rss/news/ulatfilipino/feed.xml', 'gmanews4': 'https://data.gmanews.tv/gno/rss/news/specialreports/feed.xml', 'philstar1': 'https://www.philstar.com/rss/headlines', 'philstar2': 'https://www.philstar.com/rss/nation', 'philstar3': 'https://www.philstar.com/rss/agriculture', 'inquirer': 'https://www.inquirer.net/fullfeed', 'manilatimes': 'https://www.manilatimes.net/feed/', 'businessworld': 'http://www.bworldonline.com/feed/', 'eaglenews': 'https://www.eaglenews.ph/feed/', 'sunstarDav': 'https://www.sunstar.com.ph/rssFeed/67/29', 'sunstarDav2': 'https://www.sunstar.com.ph/rssFeed/67', 'sunstarMnl': 'https://www.sunstar.com.ph/rssFeed/70', 'sunstarMnl2': 'https://www.sunstar.com.ph/rssFeed/70/50', 'sunstarZam': 'https://www.sunstar.com.ph/rssFeed/76', 'sunstarZam2': 'https://www.sunstar.com.ph/rssFeed/76/78', 'sunstarCeb': 'https://www.sunstar.com.ph/rssFeed/63/1', 'sunstarCeb2': 'https://www.sunstar.com.ph/rssFeed/63', 'sunstar1': 'https://www.sunstar.com.ph/rssFeed/81', 'sunstar2': 'https://www.sunstar.com.ph/rssFeed/81/97', 'sunstar3': 'https://www.sunstar.com.ph/rssFeed/selected', 'businessmirror': 'https://businessmirror.com.ph/feed/', 'PhilNewAgency': 'https://www.feedspot.com/infiniterss.php?q=site:http%3A%2F%2Fwww.pna.gov.ph%2Flatest.rss', 'interaksyon': 'https://www.feedspot.com/infiniterss.php?q=site:http%3A%2F%2Fwww.interaksyon.com%2Ffeed' } print('Gathering rss feed on news sources...') mod = md.modify_data() raw_rss = [] if week == 'same_week': try: dbs.get_file('raw_rss', 'raw/raw_rss.txt') dbs.get_file('clean_rss', 'clean/clean_rss.txt') with open('raw/raw_rss.txt', 'r') as raw_file: for raw in raw_file: raw = raw.split('\n')[0] raw_rss.append(raw) except FileNotFoundError: pass for key, url in news_urls.items(): feed = feedparser.parse(url) for newsitem in feed['items']: news = newsitem.title.encode('ascii', 'ignore').decode('utf-8') if news not in raw_rss: raw_rss.append(news) with open('raw/raw_rss.txt', 'a') as raw_file: raw = news + '\n' raw_file.write(raw) news2 = mod.translate(news) news2 = mod.remove_stopwords(news2) with open('clean/clean_rss.txt', 'a') as clean_file: clean = news2 + '\n' clean_file.write(clean) print('Saved raw rss data on \"raw_rss.txt\"...') print('Saved clean rss data on \"clean_rss.txt\"...') print('Finished gathering rss data...') conn = sqlite3.connect('policalc.db') db_con = conn.cursor() with open('raw/raw_rss.txt', 'rb') as file: blob_file = file.read() db_con.execute( "INSERT INTO {} VALUES (:id, :date, :file)".format('raw_rss'), { 'id': None, 'date': dt.now(), 'file': blob_file }) conn.commit() with open('clean/clean_rss.txt', 'rb') as file: blob_file2 = file.read() db_con.execute( "INSERT INTO {} VALUES (:id, :date, :file)".format( 'clean_rss'), { 'id': None, 'date': dt.now(), 'file': blob_file2 }) conn.commit() conn.close() os.remove('raw/raw_rss.txt') os.remove('clean/clean_rss.txt')