def streamtofile_demo(limit=20): """ Write 20 tweets sampled from the public Streaming API to a file. """ oauth = credsfromfile() client = Streamer(**oauth) client.register(TweetWriter(limit=limit, repeat=False)) client.statuses.sample()
def tracktoscreen_demo(track="taylor swift", limit=10): """ Track keywords from the public Streaming API and send output to terminal. """ oauth = credsfromfile() client = Streamer(**oauth) client.register(TweetViewer(limit=limit)) client.filter(track=track)
def sampletoscreen_demo(limit=20): """ Sample from the Streaming API and send output to terminal. """ oauth = credsfromfile() client = Streamer(**oauth) client.register(TweetViewer(limit=limit)) client.sample()
def limit_by_time_demo(limit=20): """ Sample from the Streaming API and send output to terminal. """ oauth = credsfromfile() client = Streamer(**oauth) client.register(TweetWriter(limit=limit, date_limit=DATE)) client.sample()
def followtoscreen_demo(limit=10): """ Using the Streaming API, select just the tweets from a specified list of userIDs. This is will only give results in a reasonable time if the users in question produce a high volume of tweets, and may even so show some delay. """ oauth = credsfromfile() client = Streamer(**oauth) client.register(TweetViewer(limit=limit)) client.statuses.filter(follow=USERIDS)
def find_matching_tweets(num_tweets=100, fname="matching_tweets.csv", shownum=50): """Given the number of tweets to retrieve, queries that number of tweets with the keyword "Trump" and saves the tweet id and text as a csv file "fname". Prints out the shownum amount of tweets using panda. Does not remove retweets.""" oauth = credsfromfile() # create and register a streamer client = Streamer(**oauth) writer = TweetWriter(limit=num_tweets) client.register(writer) # get the name of the newly-created json file input_file = writer.timestamped_file() client.filter(track="trump") # case-insensitive client.sample() with open(input_file) as fp: # these two fields for now json2csv(fp, fname, [ 'id', 'text', ]) # pretty print using pandas tweets = pd.read_csv(fname, encoding="utf8") return tweets.head(shownum)
print("Importing") from DangerTweetWriter import DangerTweetWriter from nltk.twitter import Streamer, credsfromfile from nltk.corpus import stopwords print("Building auth") oauth = credsfromfile() twitterPass = "" alreadyAdded = [] stopWords = set(stopwords.words('english')) for phrase in DangerTweetWriter.dangerPhrases: split = phrase.split(" ") for word in split: if (word not in alreadyAdded and word not in stopWords): if (len(alreadyAdded) > 0): twitterPass = twitterPass + ", " twitterPass = twitterPass + word alreadyAdded.append(word) print("Words to pass to Twitter's filter: " + twitterPass) client = Streamer(**oauth) client.register(DangerTweetWriter(10)) print("Attempting to stream") client.filter(twitterPass)
self.check_date_limit(data) if self.do_stop: return self.startingup = False print("Building auth") oauth = credsfromfile() twitterPass = "" alreadyAdded = [] stopWords = set(stopwords.words('english')) for phrase in DangerTweetWriter.dangerPhrases: split = phrase.split(" ") for word in split: if (word not in alreadyAdded and word not in stopWords): if (len(alreadyAdded) > 0): twitterPass = twitterPass + ", " twitterPass = twitterPass + word alreadyAdded.append(word) print("Words to pass to Twitter's filter: " + twitterPass) client = Streamer(**oauth) client.register(LiveTweetClassifierAndWriter(10000)) print("Attempting to stream") client.filter(twitterPass)
def save_tweetes_file(self): client = Streamer(**oauth) client.register(TweetWriter(limit=100, subdir='twitter_samples_files')) client.statuses.sample()
class TwitterAPI(): oauth = credsfromfile() client = Streamer(**oauth) client.register(TweetViewer(limit=10)) client.sample() path = BASE_DIR def __init__(self, to_screen: True, follow: None, keywords, limit: 10): self.follow = follow self.keywords = keywords tw = Twitter.tweets(to_screen=to_screen, follow=follow, keywords=keywords, limit) def get_twiter(self, keywords): client = Query(**oauth) tweets = client.search_tweets(keywords, limit) tweet = next(tweets) return tweet def get_users(self, *args: None): #by IdUser client = Query(**oauth) user_info = clinet.user_info_from_id(*args) users = [] for user in user_info: name, followers, following = user_info['name'], user_info[ 'followers_count'], user_info['friends_count'] users.append(user) print(f'{name} {followers} {following}\n') return users def save_tweetes_file(self): client = Streamer(**oauth) client.register(TweetWriter(limit=100, subdir='twitter_samples_files')) client.statuses.sample() def get_tweet_JsonFiles(self, json_file2: None): if (json_file2 == None): all_tweets_samples = twitter_samples.fileids() json_file = all_tweet_samples[2] #json file tweet_string = twitter_samples.strings(json_file) return tweet_string tweet_string = json_file2 return tweet_string def tokenize_tweets(self, string_tweet): toked_tweet = twitter_sample.tokenized(string_tweet) return toked_tweet def convert_csv_tweet_file(self, input_file, args=[ 'created_at', 'favorite_count', 'id', 'in_reply_to_status_id', 'in_reply_to_user_id', 'retweet_count', 'text', 'truncated', 'user.id' ]): with open(input_file) as file: json2csv(file, path + 'tweets_text.csv', args) return open(path + 'tweets_text.csv', 'r').readlines() def read_csv_tweets(self, filepath, *args): tw = pd.read_csv(filepath, header='tweets', index_col=1, encoding='utf-8', dtype=32).head() return tw def get_tweet_by_id(self, filepath, tw_id): ids = str(tw_id) ids = StringIO(ids) client = Query(**oauth) hydrated = client.expand_tweetids(ids_f) tw = read_csv_tweets(filepath) for i in hydrated: yield tw.loc[tw['user.id'] == i]['text']