def tweets_by_user_demo(user='******', count=200): """ Use the REST API to search for past tweets by a given user. """ oauth = credsfromfile() client = Query(**oauth) client.register(TweetWriter()) client.user_tweets(user, count)
def tweets_by_user_demo(user="******", count=200): """ Use the REST API to search for past tweets by a given user. """ oauth = credsfromfile() client = Query(**oauth) client.register(TweetWriter()) client.user_tweets(user, count)
def limit_by_time_demo(keywords="nltk"): """ Query the REST API for Tweets about NLTK since yesterday and send the output to terminal. This example makes the assumption that there are sufficient Tweets since yesterday for the date to be an effective cut-off. """ date = yesterday() dt_date = datetime.datetime(*date) oauth = credsfromfile() client = Query(**oauth) client.register(TweetViewer(limit=100, lower_date_limit=date)) print(f"Cutoff date: {dt_date}\n") for tweet in client.search_tweets(keywords=keywords): print("{} ".format(tweet["created_at"]), end="") client.handler.handle(tweet)
def limit_by_time_demo(keywords="nltk"): """ Query the REST API for Tweets about NLTK since yesterday and send the output to terminal. This example makes the assumption that there are sufficient Tweets since yesterday for the date to be an effective cut-off. """ date = yesterday() dt_date = datetime.datetime(*date) oauth = credsfromfile() client = Query(**oauth) client.register(TweetViewer(limit=100, lower_date_limit=date)) print("Cutoff date: {}\n".format(dt_date)) for tweet in client.search_tweets(keywords=keywords): print("{} ".format(tweet['created_at']), end='') client.handler.handle(tweet)
from nltk.twitter import Query, credsfromfile, TweetViewer import process_twt from NBClassifier import NBClassifier from SCClassifier import SCClassifier from BGClassifier import BGClassifier from nltk.corpus import twitter_samples, TwitterCorpusReader import os import pickle import matplotlib.pyplot as plt import numpy as np # settings oauth = credsfromfile() client = Query(**oauth) twtNum = 10 client.register(TweetViewer(limit=twtNum)) tweets_gen = client.search_tweets(keywords='hearthstone', lang='en') tweets = [] slangdict = process_twt.get_slang_dict() twt_list = [] for t in tweets_gen: twt_list.append(process_twt.preprocess(t['text'], slangdict=slangdict)) twt_list = list(set(twt_list)) for t in twt_list[:twtNum]: print t fileIds = twitter_samples.fileids() root = twitter_samples.root # read tweet data from corpus
from nltk.twitter.common import json2csv from nltk.twitter.common import json2csv_entities from nltk.corpus import twitter_samples from nltk.twitter import Query, Streamer, Twitter, TweetViewer, TweetWriter, credsfromfile import pandas as pd oauth = credsfromfile() n = 10 # 設定拿取 tweets 資料則數 username = '******' # Query client = Query(**oauth) # 歷史資料 client.register(TweetWriter()) # 寫入 client.user_tweets(username, n) # 拿取 tweets 資料(n則) ''' 使用 json2csv 存取 tweets 資料 (text欄位) input_file 的 abspath 需參考上述 Query 寫入資料的路徑做修改 ''' input_file = twitter_samples.abspath('/Users/youngmihuang/twitter-files/tweets.20180726-155316.json') with open(input_file) as fp: json2csv(fp, 'tweets_text.csv', ['text']) # 讀取 data = pd.read_csv('tweets_text.csv') for line in data.text: print('Trump tweets content: ') print(line) # 斷詞
from nltk.twitter import Query, credsfromfile, TweetViewer from nltk.stem import WordNetLemmatizer from sklearn.feature_extraction.text import CountVectorizer import sys if (len(sys.argv)<4): print ('Usage:', sys.argv[0], ' twitter_username max_tweets_to_search max_top_words_to_print lemmatize(optional)' ) quit() #capture the output of tweetViewer to file for processing sys.stdout = open('tweets.txt', 'w') oauth = credsfromfile() client = Query(**oauth) client.register(TweetViewer(limit=sys.argv[2])) client.user_tweets(sys.argv[1], sys.argv[2]) #give back control to stdout sys.stdout = sys.__stdout__ lemmatizer = WordNetLemmatizer() if (len(sys.argv)>4 and sys.argv[4].lower()=='lemmatize'): lemmatize=True else: lemmatize=False def text_cleaner(documents): text_cleaned = [] for document in documents:
lsa.display_comparison() kl_sum.display_comparison() luhn.display_comparison() sumbasic.display_comparison() res = input("Press 'r' to restart\n") if res != 'r': restart = False elif choice == '2': # summarize a twitter topic tweet_topic = input("Enter the topic you want a summary for\n") # Authenticate and retrieve tweets based on user entered topic oauth = credsfromfile() client = Query(**oauth) client.register(TweetWriter()) tweets = client.search_tweets(keywords=tweet_topic, limit=100, lang='en') tweetSummarizer = TweetSummarizer() # clean tweets and store in tweets.csv rows = [] usable_rows = [] for tweet in tweets: rows.append(str(tweet['text'])) if len(rows) > 0: usable_rows = rows.copy() for i in range(0, len(rows)): rows[i] = clean_tweet(rows[i])