def main(keyword): db_client = MongoClient('localhost', 27017) db = db_client[db_name] keywords = keyword max_id = None while True: api = tw.get_api_client() tweets = get_tweets_by_keyword(api, keywords, max_id) if len(tweets) > 0: try: # Parse the data returned to get max_id to be passed in consequent call. next_results_url_params = tweets['search_metadata']['next_results'] max_id = next_results_url_params.split('max_id=')[1].split('&')[0] except: # No more next pages break for status in tweets['statuses']: if 'media' in status['entities']: ## media = status['entities']['media'][0]['media_url'] db[collection_name+'_'+keyword].insert(status)
def main(users): db_client = MongoClient('localhost', 27017) db = db_client[db_name] users=users for user in users: max_id = None while True: api = tw.get_api_client() tweets = get_tweets_by_user(api, user, max_id) if len(tweets) > 0: max_id = tweets[-1]['id'] for tweet in tweets: db[collection_name+'_'+user].insert(tweet) ## db['tweets_'+str(tweet['user']['screen_name'])].insert(tweet) print 'Inserted', tweet['id'] else: break