Example #1
0
def find_matching_tweets(num_tweets=100,
                         fname="matching_tweets.csv",
                         shownum=50):
    """Given the number of tweets to retrieve, queries that number of tweets with
    the keyword "Trump" and saves the tweet id and text as a csv file "fname". Prints
    out the shownum amount of tweets using panda. Does not remove retweets."""
    oauth = credsfromfile()
    # create and register a streamer
    client = Streamer(**oauth)
    writer = TweetWriter(limit=num_tweets)
    client.register(writer)
    # get the name of the newly-created json file
    input_file = writer.timestamped_file()
    client.filter(track="trump")  # case-insensitive
    client.sample()

    with open(input_file) as fp:
        # these two fields for now
        json2csv(fp, fname, [
            'id',
            'text',
        ])

    # pretty print using pandas
    tweets = pd.read_csv(fname, encoding="utf8")
    return tweets.head(shownum)
def senti_score_realtime(keyword,
                         client,
                         classifier,
                         twt_num,
                         step,
                         step_num,
                         verb=0):
    score_all = np.zeros((twt_num, step_num))

    for i in range(step_num):
        origin = datetime.datetime.today()
        end_t = origin + datetime.timedelta(minutes=step)

        # don't store files in this case
        client.register(TweetWriter(limit=twt_num))
        tweets_gen = client.filter(track=keyword)
        tweets = []
        for t in tweets_gen:
            tweets.append(t['text'])

        score = classifier.test(tweets)

        score_all[0:len(score[:, 0]), i] = score[:, 0]
        if verb == 1:
            print keyword + ' : ' + str(origin) + ' to ' + str(
                end_t) + ' score=' + str(np.mean(score_all[:, i], axis=0))

        time.sleep((end_t - origin).seconds)

    return score_all
Example #3
0
def streamtofile_demo(limit=20):
    """
    Write 20 tweets sampled from the public Streaming API to a file.
    """
    oauth = credsfromfile()
    client = Streamer(**oauth)
    client.register(TweetWriter(limit=limit, repeat=False))
    client.statuses.sample()
Example #4
0
def tweets_by_user_demo(user="******", count=200):
    """
    Use the REST API to search for past tweets by a given user.
    """
    oauth = credsfromfile()
    client = Query(**oauth)
    client.register(TweetWriter())
    client.user_tweets(user, count)
Example #5
0
def limit_by_time_demo(limit=20):
    """
    Sample from the Streaming API and send output to terminal.
    """
    oauth = credsfromfile()
    client = Streamer(**oauth)
    client.register(TweetWriter(limit=limit, date_limit=DATE))
    client.sample()
from nltk.twitter.common import json2csv
from nltk.twitter.common import json2csv_entities
from nltk.corpus import twitter_samples
from nltk.twitter import Query, Streamer, Twitter, TweetViewer, TweetWriter, credsfromfile
import pandas as pd

oauth = credsfromfile()
n = 10  # 設定拿取 tweets 資料則數
username = '******'

# Query
client = Query(**oauth)  # 歷史資料
client.register(TweetWriter())  # 寫入
client.user_tweets(username, n)  # 拿取 tweets 資料(n則)

'''
使用 json2csv 存取 tweets 資料 (text欄位)
input_file 的 abspath 需參考上述 Query 寫入資料的路徑做修改
'''

input_file = twitter_samples.abspath('/Users/youngmihuang/twitter-files/tweets.20180726-155316.json')
with open(input_file) as fp:
    json2csv(fp, 'tweets_text.csv', ['text'])

# 讀取
data = pd.read_csv('tweets_text.csv')
for line in data.text:
    print('Trump tweets content: ')
    print(line)

# 斷詞
Example #7
0
 def save_tweetes_file(self):
     client = Streamer(**oauth)
     client.register(TweetWriter(limit=100, subdir='twitter_samples_files'))
     client.statuses.sample()
Example #8
0
 def __init__(self, lim=100):
     TweetWriter.__init__(self, limit=lim, subdir="Tweets")