def stream_tweets(bearer_token): """ Write a filtered stream of tweets directly to a new line delimited JSON file, named with today's date in "%Y-%m-%d_%H-%M" format. Parameters ---------- - bearer_token (str) : Twitter V2 bearer token. """ print("Streaming tweets...") oauth2 = osometweet.OAuth2( bearer_token=bearer_token, manage_rate_limits=False ) ot = osometweet.OsomeTweet(oauth2) # Add all tweet fields all_tweet_fields = osometweet.TweetFields(everything=True) # Add streaming rules rules = [{"value": "coronavirus", "tag": "all coronavirus tweets"}, {"value": "indiana", "tag": "all indiana tweets"}] add_rules = {"add": rules} response = ot.set_filtered_stream_rule(rules=add_rules) print(f"API response from adding two rules:\n{response}\n") # Retrieve active streaming rules current_rules = ot.get_filtered_stream_rule() print(f'The current filtered stream rules are:\n{current_rules}\n') # Remove a streaming rule by using it's tag indiana_rule = [ rule["id"] for rule in current_rules["data"] if 'all indiana tweets' in rule["tag"] ] delete_rule = {'delete': {'ids': indiana_rule}} response = ot.set_filtered_stream_rule(rules=delete_rule) print(f"API response from deleting one rule:\n{response}\n") # Get today's date today = dt.strftime(dt.today(), "%Y-%m-%d_%H-%M") # Open two files. One for good data, the other for tweet errors. with open(f"tweet_stream--{today}.json", "a") as data_file: # stream is a Generator stream = ot.filtered_stream(fields=all_tweet_fields) # We have to iterate over the stream to fetch streamed tweets for tweet in stream.iter_lines(): # Get data and errors try: data = json.loads(tweet).get("data") # When data is found, we write it to the open file if data: json.dump(data, data_file) data_file.write("\n") except json.JSONDecodeError: pass
def initialize_osometweet(bearer_token): """ Return an authorized osometweet API object from which we can make API calls. Parameters: ---------- - bearer_token (str) : Your secret Twitter bearer token. """ print("Initializing osometweet...") oauth2 = osometweet.OAuth2( bearer_token=bearer_token, manage_rate_limits=True # Wait if Twitter sends rate limit message ) return osometweet.OsomeTweet(oauth2)
def gather_data(bearer_token, chunked_list): """ Gather tweets based on the chunked list of tweet IDs with the provided bearer_token. """ print("Gathering Data...") oauth2 = osometweet.OAuth2(bearer_token=bearer_token) ot = osometweet.OsomeTweet(oauth2) # Add all tweet fields all_tweet_fields = osometweet.TweetFields(everything=True) # Get today's date today = dt.strftime(dt.today(), "%Y-%m-%d_%H-%M") # Open two files. One for good data, the other for tweet errors. with open(f"tweet_data--{today}.json", 'w') as data_file,\ open(f"tweet_errors--{today}.json", 'w') as error_file: # Iterate through the list of lists for one_hundred_tweets in chunked_list: response = ot.tweet_lookup(tids=one_hundred_tweets, fields=all_tweet_fields) # Get data and errors data = response["data"] errors = response["errors"] # No matter what `data` and `errors` will return something, # however, they may return `None` (i.e. no data/errors), which # will throw a TypeError. try: data_file.writelines(f"{json.dumps(line)}\n" for line in data) except TypeError: print("No data found in this set of tweets, " "skipping to the next set.") try: error_file.writelines(f"{json.dumps(line)}\n" for line in errors) except TypeError: print("No problematic tweets found in this set of tweets, " "skipping to the next set.")
def stream_tweets(bearer_token): """ Stream a 1% sample of tweets from twitter and write them directly to a new line delimited JSON file, named with today's date in "%Y-%m-%d_%H-%M" format. Parameters ---------- - bearer_token (str) : Twitter V2 bearer token. """ print("Streaming tweets...") oauth2 = osometweet.OAuth2(bearer_token=bearer_token, manage_rate_limits=False) ot = osometweet.OsomeTweet(oauth2) # Add all tweet fields all_tweet_fields = osometweet.TweetFields(everything=True) # Get today's date today = dt.strftime(dt.today(), "%Y-%m-%d_%H-%M") # Open two files. One for good data, the other for tweet errors. with open(f"tweet_stream--{today}.json", "a") as data_file: # stream is a Generator stream = ot.sampled_stream(fields=all_tweet_fields) # We have to iterate over the stream to fetch streamed tweets for tweet in stream.iter_lines(): # Get data and errors try: data = json.loads(tweet).get("data") # When data is found, we write it to the open file if data: json.dump(data, data_file) data_file.write("\n") except json.JSONDecodeError: pass
def setUp(self): oauth2 = osometweet.OAuth2(bearer_token=bearer_token) self.ot = osometweet.OsomeTweet(oauth2)
def gather_data( access_token, access_token_secret, api_key, api_key_secret, chunked_user_list ): """ Gather user info based on the chunked list of usernames with the provided bearer_token. """ print("Gathering Data...") oauth1a = osometweet.OAuth1a( api_key=api_key, api_key_secret=api_key_secret, access_token=access_token, access_token_secret=access_token_secret ) ot = osometweet.OsomeTweet(oauth1a) # Add all user_fields all_user_fields = osometweet.UserFields(everything=True) # Get today's date today = dt.strftime(dt.today(), "%Y-%m-%d_%H-%M") # Open two files. One for good data, the other for account errors. with open(f"account_data--{today}.json", 'w') as data_file,\ open(f"account_errors--{today}.json", 'w') as error_file: # Iterate through the list of lists for one_hundred_users in chunked_user_list: response = ot.user_lookup_usernames( usernames=one_hundred_users, fields=all_user_fields ) # Where as the user_ids endpoint always returns both "data" and # "errors", the username endpoint does the opposite - only # including these keys if data is present. if "data" in response: data = response["data"] else: data = None if "errors" in response: errors = response["errors"] else: errors = None try: data_file.writelines(f"{json.dumps(line)}\n" for line in data) except TypeError: print( "No USER data found in this set of users, " "skipping to the next set." ) try: error_file.writelines( f"{json.dumps(line)}\n" for line in errors ) except TypeError: print( "No problematic users found in this set of user, " "skipping to the next set." )
def gather_data( access_token, access_token_secret, api_key, api_key_secret, chunked_user_list ): """ Gather user info based on the chunked list of user IDs with the provided bearer_token. """ print("Gathering Data...") oauth1a = osometweet.OAuth1a( api_key=api_key, api_key_secret=api_key_secret, access_token=access_token, access_token_secret=access_token_secret ) ot = osometweet.OsomeTweet(oauth1a) # Add all user_fields all_user_fields = osometweet.UserFields(everything=True) # Get today's date today = dt.strftime(dt.today(), "%Y-%m-%d_%H-%M") # Open two files. One for good data, the other for account errors. with open(f"account_data--{today}.json", 'w') as data_file,\ open(f"account_errors--{today}.json", 'w') as error_file: # Iterate through the list of lists for one_hundred_users in chunked_user_list: response = ot.user_lookup_ids( user_ids=one_hundred_users, fields=all_user_fields ) # Get data and errors data = response["data"] errors = response["errors"] # No matter what `data` and `errors` will return something, # however, they may return `None` (i.e. no data/errors), which # will throw a TypeError. try: data_file.writelines(f"{json.dumps(line)}\n" for line in data) except TypeError: print( "No USER data found in this set of users, " "skipping to the next set." ) try: error_file.writelines( f"{json.dumps(line)}\n" for line in errors ) except TypeError: print( "No problematic users found in this set of user, " "skipping to the next set." )