def hydrate(idlist_file="data/example_dataset_tweet_ids.txt"): """ This function reads a file with tweet IDs and then loads them through the API into the database. Prepare to wait quite a bit, depending on the size of the dataset. """ ids_to_fetch = set() for line in open(idlist_file, "r"): # Remove newline character through .strip() # Convert to int since that's what the database uses ids_to_fetch.add(int(line.strip())) # Find a list of Tweets that we already have ids_in_db = set(t.id for t in database.Tweet.select(database.Tweet.id)) # Sets have an efficient .difference() method that returns IDs only present # in the first set, but not in the second. ids_to_fetch = ids_to_fetch.difference(ids_in_db) logging.warning("\nLoaded a list of {0} tweet IDs to hydrate".format( len(ids_to_fetch))) # Set up a progressbar bar = Bar('Fetching tweets', max=len(ids_to_fetch), suffix='%(eta)ds') for page in rest.fetch_tweet_list(ids_to_fetch): bar.next(len(page)) for tweet in page: database.create_tweet_from_dict(tweet) bar.finish() logging.warning("Done hydrating!")
def hydrate(idlist_file="data/example_dataset_tweet_ids.txt"): """ This function reads a file with tweet IDs and then loads them through the API into the database. Prepare to wait quite a bit, depending on the size of the dataset. """ ids_to_fetch = set() for line in open(idlist_file, "r"): # Remove newline character through .strip() # Convert to int since that's what the database uses ids_to_fetch.add(int(line.strip())) # Find a list of Tweets that we already have ids_in_db = set(t.id for t in database.Tweet.select(database.Tweet.id)) # Sets have an efficient .difference() method that returns IDs only present # in the first set, but not in the second. ids_to_fetch = ids_to_fetch.difference(ids_in_db) logging.warning( "\nLoaded a list of {0} tweet IDs to hydrate".format(len(ids_to_fetch))) # Set up a progressbar bar = Bar('Fetching tweets', max=len(ids_to_fetch), suffix='%(eta)ds') for page in rest.fetch_tweet_list(ids_to_fetch): bar.next(len(page)) for tweet in page: database.create_tweet_from_dict(tweet) bar.finish() logging.warning("Done hydrating!")
def print_list_of_tweets(): """ Fetch a list of three tweets by ID, then print them line by line This example can be easily adapted to write the tweets to a file, see above. """ list_generator = rest.fetch_tweet_list( [62154131600224256, 662025716746354688, 661931648171302912, ]) for page in list_generator: for tweet in page: print_tweet(tweet)
def print_list_of_tweets(): """ Fetch a list of three tweets by ID, then print them line by line This example can be easily adapted to write the tweets to a file, see above. """ list_generator = rest.fetch_tweet_list([ 62154131600224256, 662025716746354688, 661931648171302912, ]) for page in list_generator: for tweet in page: print_tweet(tweet)