def get_stream(self,TIMELINE_NAME,MAX_PAGES): USER = None KW = { # For the Twitter API call 'count': 200, 'trim_user': '******', 'include_rts' : 'true', 'since_id' : 1, } if TIMELINE_NAME == 'user': USER = sys.argv[3] KW['screen_name'] = USER if TIMELINE_NAME == 'home' and MAX_PAGES > 4: MAX_PAGES = 4 if TIMELINE_NAME == 'user' and MAX_PAGES > 16: MAX_PAGES = 16 t = login() client = MongoClient('localhost',27017) db = client.test_database posts = db.tw_data #Collection name posts.drop() api_call = getattr(t.statuses, TIMELINE_NAME + '_timeline') tweets = makeTwitterRequest(api_call, **KW) for tweet in tweets: if(tweet['lang']=='en'): #print tweet['text'] post_id = posts.insert(tweet) #print '# post id' #print post_id #print 'Fetched %i tweets' % len(tweets) page_num = 1 while page_num < MAX_PAGES and len(tweets) > 0: # Necessary for traversing the timeline in Twitter's v1.1 API. # See https://dev.twitter.com/docs/working-with-timelines KW['max_id'] = getNextQueryMaxIdParam(tweets) api_call = getattr(t.statuses, TIMELINE_NAME + '_timeline') tweets = makeTwitterRequest(api_call, **KW) #print json.dumps(tweets,indent = 3) for tweet in tweets: if(tweet['lang']=='en'): #print tweet['text'] post_id = posts.insert(tweet) #print '# post id' #print post_id #print 'Fetched %i tweets' % len(tweets) page_num += 1
# # view = ViewDefinition('index', 'max_tweet_id', idMapper, maxFindingReducer, # language='python') # view.sync(db) # # KW['since_id'] = int([_id for _id in db.view('index/max_tweet_id')][0].value) api_call = getattr(t.statuses, TIMELINE_NAME + '_timeline') tweets = makeTwitterRequest(api_call, **KW) #print json.dumps(tweets,indent=3) #db.update(tweets, all_or_nothing=True) post_id = posts.insert(tweets) print '# post id' print post_id print 'Fetched %i tweets' % len(tweets) page_num = 1 while page_num < MAX_PAGES and len(tweets) > 0: # Necessary for traversing the timeline in Twitter's v1.1 API. # See https://dev.twitter.com/docs/working-with-timelines KW['max_id'] = getNextQueryMaxIdParam(tweets) api_call = getattr(t.statuses, TIMELINE_NAME + '_timeline') tweets = makeTwitterRequest(api_call, **KW) #db.update(tweets, all_or_nothing=True) print json.dumps(tweets,indent = 3) posts.insert(tweets) print 'Fetched %i tweets' % len(tweets) page_num += 1
view.sync(db) KW['since_id'] = int([_id for _id in db.view('index/max_tweet_id')][0].value) api_call = getattr(t.statuses, TIMELINE_NAME + '_timeline') tweets = makeTwitterRequest(t, api_call, **KW) db.update(tweets, all_or_nothing=True) print 'Fetched %i tweets' % len(tweets) page_num = 1 while page_num < MAX_PAGES and len(tweets) > 0: # Necessary for traversing the timeline in Twitter's v1.1 API. # See https://dev.twitter.com/docs/working-with-timelines KW['max_id'] = getNextQueryMaxIdParam(tweets) api_call = getattr(t.statuses, TIMELINE_NAME + '_timeline') tweets = makeTwitterRequest(t, api_call, **KW) db.update(tweets, all_or_nothing=True) print 'Fetched %i tweets' % len(tweets) page_num += 1 # <markdowncell> # Example 5-4. Extracting entities from tweets and performing simple frequency analysis (the_tweet__count_entities_in_tweets.py) # <codecell> # Note: The Twitter v1.1 API includes tweet entities by default, so the use of the # twitter_text package for parsing out tweet entities in this chapter is no longer