Exemple #1
0
def tweets_by_user_demo(user='******', count=200):
    """
    Use the REST API to search for past tweets by a given user.
    """
    oauth = credsfromfile()
    client = Query(**oauth)
    client.register(TweetWriter())
    client.user_tweets(user, count)
Exemple #2
0
def tweets_by_user_demo(user="******", count=200):
    """
    Use the REST API to search for past tweets by a given user.
    """
    oauth = credsfromfile()
    client = Query(**oauth)
    client.register(TweetWriter())
    client.user_tweets(user, count)
Exemple #3
0
def limit_by_time_demo(keywords="nltk"):
    """
    Query the REST API for Tweets about NLTK since yesterday and send
    the output to terminal.

    This example makes the assumption that there are sufficient Tweets since
    yesterday for the date to be an effective cut-off.
    """
    date = yesterday()
    dt_date = datetime.datetime(*date)
    oauth = credsfromfile()
    client = Query(**oauth)
    client.register(TweetViewer(limit=100, lower_date_limit=date))

    print(f"Cutoff date: {dt_date}\n")

    for tweet in client.search_tweets(keywords=keywords):
        print("{} ".format(tweet["created_at"]), end="")
        client.handler.handle(tweet)
Exemple #4
0
def limit_by_time_demo(keywords="nltk"):
    """
    Query the REST API for Tweets about NLTK since yesterday and send
    the output to terminal.

    This example makes the assumption that there are sufficient Tweets since
    yesterday for the date to be an effective cut-off.
    """
    date = yesterday()
    dt_date = datetime.datetime(*date)
    oauth = credsfromfile()
    client = Query(**oauth)
    client.register(TweetViewer(limit=100, lower_date_limit=date))

    print("Cutoff date: {}\n".format(dt_date))

    for tweet in client.search_tweets(keywords=keywords):
        print("{} ".format(tweet['created_at']), end='')
        client.handler.handle(tweet)
Exemple #5
0
from nltk.twitter import Query, credsfromfile, TweetViewer
import process_twt
from NBClassifier import NBClassifier
from SCClassifier import SCClassifier
from BGClassifier import BGClassifier
from nltk.corpus import twitter_samples, TwitterCorpusReader
import os
import pickle
import matplotlib.pyplot as plt
import numpy as np

# settings
oauth = credsfromfile()
client = Query(**oauth)
twtNum = 10
client.register(TweetViewer(limit=twtNum))
tweets_gen = client.search_tweets(keywords='hearthstone', lang='en')
tweets = []
slangdict = process_twt.get_slang_dict()
twt_list = []
for t in tweets_gen:
    twt_list.append(process_twt.preprocess(t['text'], slangdict=slangdict))
twt_list = list(set(twt_list))

for t in twt_list[:twtNum]:
    print t

fileIds = twitter_samples.fileids()
root = twitter_samples.root

# read tweet data from corpus
from nltk.twitter.common import json2csv
from nltk.twitter.common import json2csv_entities
from nltk.corpus import twitter_samples
from nltk.twitter import Query, Streamer, Twitter, TweetViewer, TweetWriter, credsfromfile
import pandas as pd

oauth = credsfromfile()
n = 10  # 設定拿取 tweets 資料則數
username = '******'

# Query
client = Query(**oauth)  # 歷史資料
client.register(TweetWriter())  # 寫入
client.user_tweets(username, n)  # 拿取 tweets 資料(n則)

'''
使用 json2csv 存取 tweets 資料 (text欄位)
input_file 的 abspath 需參考上述 Query 寫入資料的路徑做修改
'''

input_file = twitter_samples.abspath('/Users/youngmihuang/twitter-files/tweets.20180726-155316.json')
with open(input_file) as fp:
    json2csv(fp, 'tweets_text.csv', ['text'])

# 讀取
data = pd.read_csv('tweets_text.csv')
for line in data.text:
    print('Trump tweets content: ')
    print(line)

# 斷詞
from nltk.twitter import Query, credsfromfile, TweetViewer
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import CountVectorizer
import sys

if (len(sys.argv)<4):
    print ('Usage:', sys.argv[0], ' twitter_username max_tweets_to_search max_top_words_to_print lemmatize(optional)' )
    quit()

#capture the output of tweetViewer to file for processing
sys.stdout = open('tweets.txt', 'w')

oauth = credsfromfile()
client = Query(**oauth)
client.register(TweetViewer(limit=sys.argv[2]))
client.user_tweets(sys.argv[1], sys.argv[2])


#give back control to stdout
sys.stdout = sys.__stdout__
lemmatizer = WordNetLemmatizer()

if (len(sys.argv)>4 and sys.argv[4].lower()=='lemmatize'):
    lemmatize=True
else:
    lemmatize=False


def text_cleaner(documents):
    text_cleaned = []
    for document in documents:
Exemple #8
0
        lsa.display_comparison()
        kl_sum.display_comparison()
        luhn.display_comparison()
        sumbasic.display_comparison()

        res = input("Press 'r' to restart\n")
        if res != 'r':
            restart = False

    elif choice == '2':  # summarize a twitter topic
        tweet_topic = input("Enter the topic you want a summary for\n")

        # Authenticate and retrieve tweets based on user entered topic
        oauth = credsfromfile()
        client = Query(**oauth)
        client.register(TweetWriter())
        tweets = client.search_tweets(keywords=tweet_topic,
                                      limit=100,
                                      lang='en')

        tweetSummarizer = TweetSummarizer()

        # clean tweets and store in tweets.csv
        rows = []
        usable_rows = []
        for tweet in tweets:
            rows.append(str(tweet['text']))
        if len(rows) > 0:
            usable_rows = rows.copy()
            for i in range(0, len(rows)):
                rows[i] = clean_tweet(rows[i])