コード例 #1
0
ファイル: get_fc_inf.py プロジェクト: rjaragon53/Policalc
    def __init__(self):
        get = gd.get_data()
        concerns = get.concerns()
        final_concerns = []

        limit = 0
        for con in concerns:
            if limit < 3:
                final_concerns.append(con)
                limit += 1

        dbs = dbase.access_db()
        dbs.get_file('twitter_concerns_inf', 'DB/twitter_concerns_inf.json')
        with open('DB/twitter_concerns_inf.json', 'r') as db_file:
            db_data = json.load(db_file)

            with open('raw/twitter_concerns.json', 'r') as tc_file:
                tc_data = json.load(tc_file)

                with open('raw/twitter_concerns_inf.json', 'w') as js_file:
                    js_data = {}

                    for i in db_data:
                        js_data[i] = db_data[i] + tc_data[i]

                    top_list = sorted(js_data.items(),
                                      key=lambda kv: kv[1],
                                      reverse=True)

                    limit = 0
                    for i in range(len(top_list)):
                        if limit < 3:
                            print(top_list[i][0], final_concerns)
                            if top_list[i][0] not in final_concerns:
                                final_concerns.append(top_list[i][0])
                            limit += 1

                    json.dump(js_data, js_file, indent=4, sort_keys=True)

        with open('clean/final_concerns_inf.txt', 'a') as final:
            for final_con in final_concerns:
                final.write(final_con + '\n')

        os.remove('DB/twitter_concerns_inf.json')
コード例 #2
0
    def __init__(self):

        get = gd.get_data()
        mod = md.modify_data()
        dbs = dbase.access_db()
        json_data = {}

        dbs.get_file('tweet_scores_inf', 'DB/clean/tweet_scores_inf.json')
        with open('DB/clean/tweet_scores_inf.json', 'r') as json_file:
            dbs_data = json.load(json_file)

        with open('clean/final_tweets.json', 'r') as json_file:
            data = json.load(json_file)

            senators = get.senators()
            concerns = get.concerns()

            for sen in senators:
                for con in concerns:
                    json_data[sen + ' - ' + con] = []
                    total_tweets = len(data[sen][con])
                    pos = 0
                    neg = 0
                    neu = 0
                    pos_tweets = []
                    neg_tweets = []
                    neu_tweets = []

                    for i in range(total_tweets):
                        tweet = data[sen][con][i]['tweet_text2']
                        text = TextBlob(tweet)
                        result = text.sentiment.polarity
                        score = self.check_score(data[sen][con][i]['user_verified'],
                                                 data[sen][con][i]['user_created'],
                                                 data[sen][con][i]['user_follower'],
                                                 data[sen][con][i]['is_retweet'])

                        if text.sentiment.polarity >= 0.1:
                            pos += score
                            pos_tweets.append(tweet)
                            print('POSITIVE', text.sentiment.polarity, tweet)
                        elif text.sentiment.polarity <= -0.1:
                            neg += score
                            neg_tweets.append(tweet)
                            print('NEGATIVE', text.sentiment.polarity, tweet)
                        else:
                            neu += score
                            neu_tweets.append(tweet)
                            print('NEUTRAL', text.sentiment.polarity, tweet)

                        with open('common_words.txt', 'a') as common_words:
                            tweet = mod.translate(tweet)
                            tweet = mod.remove_stopwords(tweet)
                            text = nltk.word_tokenize(tweet)
                            posTagged = pos_tag(text)
                            result = [(word, map_tag('en-ptb', 'universal', tag)) for word, tag in posTagged]

                            for res in result:
                                if res[1] == 'NOUN' or res[1] == 'VERB' or res[1] == 'ADJ':
                                    if res[0] != sen and res[0] not in con:
                                        text = res[0] + ' '
                                        common_words.write(text)

                    total = pos + neg + neu

                    json_data[sen + ' - ' + con].append({
                        'pos': pos, 'neg': neg, 'neu': neu, 'total': total, 'num_tweets': total_tweets,
                        'pos_tweets': pos_tweets, 'neg_tweets': neg_tweets, 'neu_tweets': neu_tweets
                    })
                    try:
                        for pt in pos_tweets:
                            dbs_data[sen + ' - ' + con][0]['pos_tweets'].append(pt)
                        for nt in neg_tweets:
                            dbs_data[sen + ' - ' + con][0]['neg_tweets'].append(nt)
                        for nt in neu_tweets:
                            dbs_data[sen + ' - ' + con][0]['neu_tweets'].append(nt)

                        dbs_data[sen + ' - ' + con][0]['pos'] += pos
                        dbs_data[sen + ' - ' + con][0]['neg'] += neg
                        dbs_data[sen + ' - ' + con][0]['neu'] += neu

                    except KeyError:
                        json_data[sen + ' - ' + con] = []
                        json_data[sen + ' - ' + con].append({
                            'pos': pos, 'neg': neg, 'neu': neu, 'total': total, 'num_tweets': total_tweets,
                            'pos_tweets': pos_tweets, 'neg_tweets': neg_tweets, 'neu_tweets': neu_tweets
                        })

                    if total != 0:
                        print(sen + ' - ' + con)
                        print('Positive: ' + str(round(pos/total*100, 2)) +
                              '%\nNegative: ' + str(round(neg/total*100, 2)) +
                              '%\nNeutral: ' + str(round(neu/total*100, 2)) + '%')

                        words = re.findall(r'\w+', open('common_words.txt').read().lower())
                        count = Counter(words).most_common(3)
                        common = ''
                        for cnt in count:
                            common = common + cnt[0] + ' '
                        print('General Keywords: ' + common)
                        os.remove("common_words.txt")

                        print('From ' + str(total_tweets) + ' tweets.\n')

        with open('clean/tweet_scores.json', 'w') as json_file:
            json.dump(json_data, json_file, indent=4, sort_keys=True)

        with open('clean/tweet_scores_inf.json', 'w') as json_file:
            json.dump(dbs_data, json_file, indent=4, sort_keys=True)

        os.remove("DB/clean/tweet_scores_inf.json")
コード例 #3
0
# from modules import search_twitter as st
# from modules import rss
# from modules import triangulation as tr
# from modules import sentiment_analysis as sa
from modules import dbase

# st.gather_concerns()
# CHANGE IP
# st.gather_tweets()
# CHANGE IP
# rss.gather_rss()
# CHANGE IP
# tr.compare_tweet_rss()
# sa.analyze_tweets()
dbs = dbase.access_db()
dbs.insert_all_file()
コード例 #4
0
ファイル: rss.py プロジェクト: rjaragon53/Policalc
    def __init__(self, week):
        dbs = dbase.access_db()
        news_urls = {
            'gmanews1':
            'https://data.gmanews.tv/gno/rss/news/nation/feed.xml',
            'gmanews2':
            'https://data.gmanews.tv/gno/rss/news/regions/feed.xml',
            'gmanews3':
            'https://data.gmanews.tv/gno/rss/news/ulatfilipino/feed.xml',
            'gmanews4':
            'https://data.gmanews.tv/gno/rss/news/specialreports/feed.xml',
            'philstar1':
            'https://www.philstar.com/rss/headlines',
            'philstar2':
            'https://www.philstar.com/rss/nation',
            'philstar3':
            'https://www.philstar.com/rss/agriculture',
            'inquirer':
            'https://www.inquirer.net/fullfeed',
            'manilatimes':
            'https://www.manilatimes.net/feed/',
            'businessworld':
            'http://www.bworldonline.com/feed/',
            'eaglenews':
            'https://www.eaglenews.ph/feed/',
            'sunstarDav':
            'https://www.sunstar.com.ph/rssFeed/67/29',
            'sunstarDav2':
            'https://www.sunstar.com.ph/rssFeed/67',
            'sunstarMnl':
            'https://www.sunstar.com.ph/rssFeed/70',
            'sunstarMnl2':
            'https://www.sunstar.com.ph/rssFeed/70/50',
            'sunstarZam':
            'https://www.sunstar.com.ph/rssFeed/76',
            'sunstarZam2':
            'https://www.sunstar.com.ph/rssFeed/76/78',
            'sunstarCeb':
            'https://www.sunstar.com.ph/rssFeed/63/1',
            'sunstarCeb2':
            'https://www.sunstar.com.ph/rssFeed/63',
            'sunstar1':
            'https://www.sunstar.com.ph/rssFeed/81',
            'sunstar2':
            'https://www.sunstar.com.ph/rssFeed/81/97',
            'sunstar3':
            'https://www.sunstar.com.ph/rssFeed/selected',
            'businessmirror':
            'https://businessmirror.com.ph/feed/',
            'PhilNewAgency':
            'https://www.feedspot.com/infiniterss.php?q=site:http%3A%2F%2Fwww.pna.gov.ph%2Flatest.rss',
            'interaksyon':
            'https://www.feedspot.com/infiniterss.php?q=site:http%3A%2F%2Fwww.interaksyon.com%2Ffeed'
        }

        print('Gathering rss feed on news sources...')
        mod = md.modify_data()
        raw_rss = []

        if week == 'same_week':
            try:
                dbs.get_file('raw_rss', 'raw/raw_rss.txt')
                dbs.get_file('clean_rss', 'clean/clean_rss.txt')
                with open('raw/raw_rss.txt', 'r') as raw_file:
                    for raw in raw_file:
                        raw = raw.split('\n')[0]
                        raw_rss.append(raw)
            except FileNotFoundError:
                pass

        for key, url in news_urls.items():
            feed = feedparser.parse(url)

            for newsitem in feed['items']:
                news = newsitem.title.encode('ascii', 'ignore').decode('utf-8')

                if news not in raw_rss:
                    raw_rss.append(news)

                    with open('raw/raw_rss.txt', 'a') as raw_file:
                        raw = news + '\n'
                        raw_file.write(raw)

                    news2 = mod.translate(news)
                    news2 = mod.remove_stopwords(news2)

                    with open('clean/clean_rss.txt', 'a') as clean_file:
                        clean = news2 + '\n'
                        clean_file.write(clean)

        print('Saved raw rss data on \"raw_rss.txt\"...')
        print('Saved clean rss data on \"clean_rss.txt\"...')
        print('Finished gathering rss data...')

        conn = sqlite3.connect('policalc.db')
        db_con = conn.cursor()

        with open('raw/raw_rss.txt', 'rb') as file:
            blob_file = file.read()
            db_con.execute(
                "INSERT INTO {} VALUES (:id, :date, :file)".format('raw_rss'),
                {
                    'id': None,
                    'date': dt.now(),
                    'file': blob_file
                })
            conn.commit()

        with open('clean/clean_rss.txt', 'rb') as file:
            blob_file2 = file.read()
            db_con.execute(
                "INSERT INTO {} VALUES (:id, :date, :file)".format(
                    'clean_rss'), {
                        'id': None,
                        'date': dt.now(),
                        'file': blob_file2
                    })
            conn.commit()

        conn.close()

        os.remove('raw/raw_rss.txt')
        os.remove('clean/clean_rss.txt')