def test_since_id(): for tweet in twarc.search('obama'): id = tweet['id_str'] break assert id time.sleep(5) for tweet in twarc.search('obama', since_id=id): assert tweet['id_str'] > id
def test_max_id(): for tweet in twarc.search('obama'): id = tweet['id_str'] break assert id time.sleep(5) count = 0 for tweet in twarc.search('obama', max_id=id): count += 1 assert tweet['id_str'] <= id if count > 100: break
def test_max_and_since_ids(): max_id = since_id = None count = 0 for tweet in twarc.search('obama'): count += 1 if not max_id: max_id = tweet['id_str'] since_id = tweet['id_str'] if count > 500: break count = 0 for tweet in twarc.search('obama', max_id=max_id, since_id=since_id): count += 1 assert tweet['id_str'] <= max_id assert tweet['id_str'] > since_id
def __init__(self, text, images_dir, dbname): """ Initialize Epheme & then kick off the twarc search. """ # store tweet JSON in a db with same name as search text if dbname is None: dbname = text.replace(' ', '_') self.db = MongoClient()[dbname] self.images_dir = images_dir self.hashtag = text print 'Beginning to search Twitter API for', self.hashtag # note: first run through is a special case, most_recent_id() # returns None & twarc will return a bunch of recent tweets # then start stepping backwards in time. It's limited by the # Search API in that it cannot go back that far, a couple of # weeks is the limit I think. for tweet in twarc.search(self.hashtag, since_id=self.most_recent_id()): try: self.insert_tweet_into_db(tweet) # tweet already in db except errors.DuplicateKeyError: print 'Skipping duplicate Tweet', tweet['id'] continue # done getting all the metadata? now grab the referenced images self.download_all_imgs() print 'Done searching Twitter for', self.hashtag, '& downloading images.'
def test_paging(): # pages are 100 tweets big so if we can get 500 paging is working count = 0 for tweet in twarc.search('obama'): count += 1 if count == 500: break assert count == 500
def test_search(): count = 0 for tweet in twarc.search('obama'): assert tweet['id_str'] count += 1 if count == 10: break assert count == 10
#!/usr/bin/env python import twarc for tweet in twarc.search("hmd2014"): print tweet["created_at"] + " @" + tweet["user"]["screen_name"] + ": " + tweet["text"]