def get_stream(self,TIMELINE_NAME,MAX_PAGES): USER = None KW = { # For the Twitter API call 'count': 200, 'trim_user': '******', 'include_rts' : 'true', 'since_id' : 1, } if TIMELINE_NAME == 'user': USER = sys.argv[3] KW['screen_name'] = USER if TIMELINE_NAME == 'home' and MAX_PAGES > 4: MAX_PAGES = 4 if TIMELINE_NAME == 'user' and MAX_PAGES > 16: MAX_PAGES = 16 t = login() client = MongoClient('localhost',27017) db = client.test_database posts = db.tw_data #Collection name posts.drop() api_call = getattr(t.statuses, TIMELINE_NAME + '_timeline') tweets = makeTwitterRequest(api_call, **KW) for tweet in tweets: if(tweet['lang']=='en'): #print tweet['text'] post_id = posts.insert(tweet) #print '# post id' #print post_id #print 'Fetched %i tweets' % len(tweets) page_num = 1 while page_num < MAX_PAGES and len(tweets) > 0: # Necessary for traversing the timeline in Twitter's v1.1 API. # See https://dev.twitter.com/docs/working-with-timelines KW['max_id'] = getNextQueryMaxIdParam(tweets) api_call = getattr(t.statuses, TIMELINE_NAME + '_timeline') tweets = makeTwitterRequest(api_call, **KW) #print json.dumps(tweets,indent = 3) for tweet in tweets: if(tweet['lang']=='en'): #print tweet['text'] post_id = posts.insert(tweet) #print '# post id' #print post_id #print 'Fetched %i tweets' % len(tweets) page_num += 1
def getFriendIds(screen_name=None, user_id=None, friends_limit=10000): ids = [] cursor = -1 while cursor != 0: params = dict(cursor=cursor) if screen_name is not None: params["screen_name"] = screen_name else: params["user_id"] = user_id response = makeTwitterRequest(t, t.friends.ids, **params) ids.extend(response["ids"]) cursor = response["next_cursor"] print >> sys.stderr, "Fetched %i ids for %s" % (len(ids), screen_name or user_id) if len(ids) >= friends_limit: break return ids
def getFriendIds(screen_name=None, user_id=None, friends_limit=10000): ids = [] cursor = -1 while cursor != 0: params = dict(cursor=cursor) if screen_name is not None: params['screen_name'] = screen_name else: params['user_id'] = user_id response = makeTwitterRequest(t, t.friends.ids, **params) ids.extend(response['ids']) cursor = response['next_cursor'] print >> sys.stderr, \ 'Fetched %i ids for %s' % (len(ids), screen_name or user_id) if len(ids) >= friends_limit: break return ids
def getFriends(screen_name=None, user_id=None, friends_limit=10000): assert screen_name is not None or user_id is not None ids = [] cursor = -1 while cursor != 0 : params = dict(cursor=cursor) if screen_name is not None: params['screen_name'] = screen_name else: params['user_id'] = user_id response = makeTwitterRequest(t, t.friends.ids, **params) ids.extend(response['ids']) cursor = response['next_cursor'] print >> sys.stderr, \ 'Fetched %i ids for %s ' % (len(ids), screen_name or user_id) if len(ids) >= friends_limit: break return ids;
# def idMapper(doc): # yield (None, doc['id']) # # # def maxFindingReducer(keys, values, rereduce): # return max(values) # # # view = ViewDefinition('index', 'max_tweet_id', idMapper, maxFindingReducer, # language='python') # view.sync(db) # # KW['since_id'] = int([_id for _id in db.view('index/max_tweet_id')][0].value) api_call = getattr(t.statuses, TIMELINE_NAME + '_timeline') tweets = makeTwitterRequest(api_call, **KW) #print json.dumps(tweets,indent=3) #db.update(tweets, all_or_nothing=True) post_id = posts.insert(tweets) print '# post id' print post_id print 'Fetched %i tweets' % len(tweets) page_num = 1 while page_num < MAX_PAGES and len(tweets) > 0: # Necessary for traversing the timeline in Twitter's v1.1 API. # See https://dev.twitter.com/docs/working-with-timelines KW['max_id'] = getNextQueryMaxIdParam(tweets) api_call = getattr(t.statuses, TIMELINE_NAME + '_timeline')
min_conversation_id = min([int(i[1]) for i in conversation if i[1] is not None]) max_conversation_id = max([int(i[1]) for i in conversation if i[1] is not None]) # Pull tweets from other user using user timeline API to minimize API expenses... t = login() reply_tweets = [] results = [] page = 1 while True: results = makeTwitterRequest(t.statuses.user_timeline, count=200, # Per <http://dev.twitter.com/doc/get/statuses/user_timeline>, some # caveats apply with the oldest id you can fetch using "since_id" since_id=min_conversation_id, max_id=max_conversation_id, skip_users='true', screen_name=USER, page=page) reply_tweets += results page += 1 if len(results) == 0: break # During testing, it was observed that some tweets may not resolve or possibly # even come back with null id values -- possibly a temporary fluke. Workaround. missing_tweets = [] for (doc_id, in_reply_to_id) in conversation: try: print [rt for rt in reply_tweets if rt['id'] == in_reply_to_id][0]['text']
import twitter import couchdb from couchdb.design import ViewDefinition from twitter__util import makeTwitterRequest SEARCH_TERM = sys.argv[1] MAX_PAGES = 15 KW = {"domain": "search.twitter.com", "count": 200, "rpp": 100, "q": SEARCH_TERM} server = couchdb.Server("http://*****:*****@", ""),) try: db = server.create(DB) except couchdb.http.PreconditionFailed, e: # already exists, so append to it, and be mindful of duplicates db = server[DB] t = twitter.Twitter(domain="search.twitter.com") for page in range(1, 16): KW["page"] = page tweets = makeTwitterRequest(t, t.search, **KW) db.update(tweets["results"], all_or_nothing=True) if len(tweets["results"]) == 0: break print "Fetched %i tweets" % len(tweets["results"])
# See http://code.google.com/p/twitter-api/issues/detail?id=214 ############################################# t = login() r = redis.Redis() count = 0 for screen_name in screen_names: ids = [] cursor = -1 while cursor != 0: try: response = makeTwitterRequest(t.friends.ids, screen_name=screen_name, cursor=cursor) ids += response['ids'] cursor = response['next_cursor'] print >> sys.stderr, 'Fetched %i ids for %s' % (len(ids), screen_name) except twitter.api.TwitterHTTPError, e: if e.e.code == 404: print >> sys.stderr, "404 Error with screen_name '%s'. Continuing." % screen_name break # Store the ids into Redis [ r.sadd(getRedisIdByScreenName(screen_name, 'friend_ids'), _id) for _id in ids
def idMapper(doc): yield (None, doc['id']) def maxFindingReducer(keys, values, rereduce): return max(values) view = ViewDefinition('index', 'max_tweet_id', idMapper, maxFindingReducer, language='python') view.sync(db) KW['since_id'] = int([_id for _id in db.view('index/max_tweet_id')][0].value) # Harvest tweets for the given timeline. # For friend and home timelines, the unofficial limitation is about 800 statuses although # other documentation may state otherwise. The public timeline only returns 20 statuses # and gets updated every 60 seconds. # See http://groups.google.com/group/twitter-development-talk/browse_thread/thread/4678df70c301be43 # Note that the count and since_id params have no effect for the public timeline page_num = 1 while page_num <= MAX_PAGES: KW['page'] = page_num api_call = getattr(t.statuses, TIMELINE_NAME + '_timeline') tweets = makeTwitterRequest(t, api_call, **KW) db.update(tweets, all_or_nothing=True) print 'Fetched %i tweets' % len(tweets) page_num += 1
except couchdb.http.PreconditionFailed, e: db = server[DB] t = twitter.Twitter(domain = 'search.twitter.com') # Setting logger logger = logging.getLogger('trafico_chile_service') fl = logging.FileHandler(LOGGER_OUT) formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') fl.setFormatter(formatter) logger.addHandler(fl) logger.setLevel(logging.DEBUG) print 'Starting fetching Service, you can stop it with ctrl-c' while 1: actual_time = time.localtime(time.time()) print 'Starting fetch of tweets at %d-%d-%d : %d:%d' % (actual_time[0],actual_time[1],actual_time[2],actual_time[3],actual_time[4]) total_fetched = 0 tweets_with_ids = [] for page in range(1,16): KW['page'] = page tweets = makeTwitterRequest(t, t.search, **KW) for tweet in tweets['results']: tweet['_id'] = tweet['id_str'] db.update(tweets['results'], all_or_nothing = True) print 'Fetched %i tweets' % len(tweets['results']) total_fetched += len(tweets['results']) print 'Fetched %d tweets, now waiting 1 minute' % total_fetched logger.info('Fetched %d tweets, no problem' % total_fetched) time.sleep(SLEEP_TIME)
# See http://code.google.com/p/twitter-api/issues/detail?id=214 ############################################# t = login() r = redis.Redis() count = 0 for screen_name in screen_names: ids = [] cursor = -1 while cursor != 0: try: response = makeTwitterRequest(t.friends.ids, screen_name=screen_name, cursor=cursor) ids += response['ids'] cursor = response['next_cursor'] print >> sys.stderr, 'Fetched %i ids for %s' % (len(ids), screen_name) except twitter.api.TwitterHTTPError, e: if e.e.code == 404: print >> sys.stderr, "404 Error with screen_name '%s'. Continuing." % screen_name break # Store the ids into Redis [r.sadd(getRedisIdByScreenName(screen_name, 'friend_ids'), _id) for _id in ids] count += 1