def gather_data(user_id): """ Gather tweets (in reverse chronological order) from the timeline of the user_id provided. Parameters: ---------- - user_id (str) : the user ID whose tweets we want to download """ # Load bearer token and authorize osometweet # to gather data... bearer_token = load_bearer_token() ot = initialize_osometweet(bearer_token) # Create tweet fields object with all fields # NOTE: if you include other fields/expansions you will # need to ensure that you parse them properly from the # response object below all_tweet_fields = osometweet.TweetFields(everything=True) # Get today's date today = dt.strftime(dt.today(), "%Y-%m-%d_%H-%M") # Create file names data_file_name = f"timeline_data--{today}.json" errors_file_name = f"timeline_errors--{today}.json" print("Gathering data...") # Open a file for data and errors with open(data_file_name, 'w') as data_file,\ open(errors_file_name, 'w') as error_file: # Make first request and write data and/or errors response = ot.get_tweet_timeline( user_id=user_id, fields=all_tweet_fields, # Get all tweet fields max_results=100 # Request 100 tweets per call ) write_data(response, data_file, error_file) # Begin a while loop which continually makes requests until # up to 3,200 tweets (or all tweets) have been returned # for the user_id provided. This only continues if the "next_token" # is present in `response["meta"]` object, which indicates that # Twitter has more data to provide. while "next_token" in response["meta"]: response = ot.get_tweet_timeline( user_id=user_id, fields=all_tweet_fields, max_results=100, pagination_token=response["meta"]["next_token"]) write_data(response, data_file, error_file) # Now that the loop has finished # we remove any files that might be empty # (for example, if we received no errors) delete_if_empty(data_file_name) delete_if_empty(errors_file_name)
def stream_tweets(bearer_token): """ Write a filtered stream of tweets directly to a new line delimited JSON file, named with today's date in "%Y-%m-%d_%H-%M" format. Parameters ---------- - bearer_token (str) : Twitter V2 bearer token. """ print("Streaming tweets...") oauth2 = osometweet.OAuth2( bearer_token=bearer_token, manage_rate_limits=False ) ot = osometweet.OsomeTweet(oauth2) # Add all tweet fields all_tweet_fields = osometweet.TweetFields(everything=True) # Add streaming rules rules = [{"value": "coronavirus", "tag": "all coronavirus tweets"}, {"value": "indiana", "tag": "all indiana tweets"}] add_rules = {"add": rules} response = ot.set_filtered_stream_rule(rules=add_rules) print(f"API response from adding two rules:\n{response}\n") # Retrieve active streaming rules current_rules = ot.get_filtered_stream_rule() print(f'The current filtered stream rules are:\n{current_rules}\n') # Remove a streaming rule by using it's tag indiana_rule = [ rule["id"] for rule in current_rules["data"] if 'all indiana tweets' in rule["tag"] ] delete_rule = {'delete': {'ids': indiana_rule}} response = ot.set_filtered_stream_rule(rules=delete_rule) print(f"API response from deleting one rule:\n{response}\n") # Get today's date today = dt.strftime(dt.today(), "%Y-%m-%d_%H-%M") # Open two files. One for good data, the other for tweet errors. with open(f"tweet_stream--{today}.json", "a") as data_file: # stream is a Generator stream = ot.filtered_stream(fields=all_tweet_fields) # We have to iterate over the stream to fetch streamed tweets for tweet in stream.iter_lines(): # Get data and errors try: data = json.loads(tweet).get("data") # When data is found, we write it to the open file if data: json.dump(data, data_file) data_file.write("\n") except json.JSONDecodeError: pass
def test_tweet_fields(self): """ Test tweet fields. Test case borrowed from https://developer.twitter.com/en/docs/twitter-api/data-dictionary/object-model/tweet """ fields_to_request = [ "attachments", "author_id", "context_annotations", "created_at", "entities", "id", "in_reply_to_user_id", "lang", "possibly_sensitive", "public_metrics", "referenced_tweets", "source", "text" ] tweet_fields = osometweet.TweetFields() tweet_fields.fields = fields_to_request resp = self.ot.tweet_lookup(['1212092628029698048'], fields=tweet_fields) for field in fields_to_request: self.assertIn(field, resp['data'][0])
def gather_data(bearer_token, chunked_list): """ Gather tweets based on the chunked list of tweet IDs with the provided bearer_token. """ print("Gathering Data...") oauth2 = osometweet.OAuth2(bearer_token=bearer_token) ot = osometweet.OsomeTweet(oauth2) # Add all tweet fields all_tweet_fields = osometweet.TweetFields(everything=True) # Get today's date today = dt.strftime(dt.today(), "%Y-%m-%d_%H-%M") # Open two files. One for good data, the other for tweet errors. with open(f"tweet_data--{today}.json", 'w') as data_file,\ open(f"tweet_errors--{today}.json", 'w') as error_file: # Iterate through the list of lists for one_hundred_tweets in chunked_list: response = ot.tweet_lookup(tids=one_hundred_tweets, fields=all_tweet_fields) # Get data and errors data = response["data"] errors = response["errors"] # No matter what `data` and `errors` will return something, # however, they may return `None` (i.e. no data/errors), which # will throw a TypeError. try: data_file.writelines(f"{json.dumps(line)}\n" for line in data) except TypeError: print("No data found in this set of tweets, " "skipping to the next set.") try: error_file.writelines(f"{json.dumps(line)}\n" for line in errors) except TypeError: print("No problematic tweets found in this set of tweets, " "skipping to the next set.")
def stream_tweets(bearer_token): """ Stream a 1% sample of tweets from twitter and write them directly to a new line delimited JSON file, named with today's date in "%Y-%m-%d_%H-%M" format. Parameters ---------- - bearer_token (str) : Twitter V2 bearer token. """ print("Streaming tweets...") oauth2 = osometweet.OAuth2(bearer_token=bearer_token, manage_rate_limits=False) ot = osometweet.OsomeTweet(oauth2) # Add all tweet fields all_tweet_fields = osometweet.TweetFields(everything=True) # Get today's date today = dt.strftime(dt.today(), "%Y-%m-%d_%H-%M") # Open two files. One for good data, the other for tweet errors. with open(f"tweet_stream--{today}.json", "a") as data_file: # stream is a Generator stream = ot.sampled_stream(fields=all_tweet_fields) # We have to iterate over the stream to fetch streamed tweets for tweet in stream.iter_lines(): # Get data and errors try: data = json.loads(tweet).get("data") # When data is found, we write it to the open file if data: json.dump(data, data_file) data_file.write("\n") except json.JSONDecodeError: pass