コード例 #1
0
ファイル: network.py プロジェクト: elferrus7/QMBlogging
    def get_stream(self,TIMELINE_NAME,MAX_PAGES):
        USER = None
        
        KW = {  # For the Twitter API call
            'count': 200,
            'trim_user': '******',
            'include_rts' : 'true',
            'since_id' : 1,
        }
        
        if TIMELINE_NAME == 'user':
            USER = sys.argv[3]
            KW['screen_name'] = USER
        if TIMELINE_NAME == 'home' and MAX_PAGES > 4:
            MAX_PAGES = 4
        if TIMELINE_NAME == 'user' and MAX_PAGES > 16:
            MAX_PAGES = 16
            
        t = login()
         
         
        client = MongoClient('localhost',27017)

        db = client.test_database
        posts = db.tw_data #Collection name
        posts.drop()
        
        api_call = getattr(t.statuses, TIMELINE_NAME + '_timeline')
        tweets = makeTwitterRequest(api_call, **KW)
        for tweet in tweets:
            if(tweet['lang']=='en'):
                #print tweet['text']
                post_id = posts.insert(tweet)
                #print '# post id'
                #print post_id
        #print 'Fetched %i tweets' % len(tweets)
        
        page_num = 1
        while page_num < MAX_PAGES and len(tweets) > 0:
        
            # Necessary for traversing the timeline in Twitter's v1.1 API.
            # See https://dev.twitter.com/docs/working-with-timelines
            KW['max_id'] = getNextQueryMaxIdParam(tweets)
        
            api_call = getattr(t.statuses, TIMELINE_NAME + '_timeline')
            tweets = makeTwitterRequest(api_call, **KW)
            #print json.dumps(tweets,indent = 3)
            for tweet in tweets:
                if(tweet['lang']=='en'):
                    #print tweet['text']
                    post_id = posts.insert(tweet)
                    #print '# post id'
                    #print post_id
                
            #print 'Fetched %i tweets' % len(tweets)
            page_num += 1
コード例 #2
0
ファイル: twitter_steam.py プロジェクト: elferrus7/QMBlogging
# 
#     view = ViewDefinition('index', 'max_tweet_id', idMapper, maxFindingReducer,
#                           language='python')
#     view.sync(db)
# 
#     KW['since_id'] = int([_id for _id in db.view('index/max_tweet_id')][0].value)

api_call = getattr(t.statuses, TIMELINE_NAME + '_timeline')
tweets = makeTwitterRequest(api_call, **KW)
#print json.dumps(tweets,indent=3)
#db.update(tweets, all_or_nothing=True)
post_id = posts.insert(tweets)
print '# post id'
print post_id
print 'Fetched %i tweets' % len(tweets)

page_num = 1
while page_num < MAX_PAGES and len(tweets) > 0:

    # Necessary for traversing the timeline in Twitter's v1.1 API.
    # See https://dev.twitter.com/docs/working-with-timelines
    KW['max_id'] = getNextQueryMaxIdParam(tweets)

    api_call = getattr(t.statuses, TIMELINE_NAME + '_timeline')
    tweets = makeTwitterRequest(api_call, **KW)
    #db.update(tweets, all_or_nothing=True)
    print json.dumps(tweets,indent = 3)
    posts.insert(tweets)
    print 'Fetched %i tweets' % len(tweets)
    page_num += 1
コード例 #3
0
    view.sync(db)

    KW['since_id'] = int([_id
                          for _id in db.view('index/max_tweet_id')][0].value)

api_call = getattr(t.statuses, TIMELINE_NAME + '_timeline')
tweets = makeTwitterRequest(t, api_call, **KW)
db.update(tweets, all_or_nothing=True)
print 'Fetched %i tweets' % len(tweets)

page_num = 1
while page_num < MAX_PAGES and len(tweets) > 0:

    # Necessary for traversing the timeline in Twitter's v1.1 API.
    # See https://dev.twitter.com/docs/working-with-timelines
    KW['max_id'] = getNextQueryMaxIdParam(tweets)

    api_call = getattr(t.statuses, TIMELINE_NAME + '_timeline')
    tweets = makeTwitterRequest(t, api_call, **KW)
    db.update(tweets, all_or_nothing=True)
    print 'Fetched %i tweets' % len(tweets)
    page_num += 1

# <markdowncell>

# Example 5-4. Extracting entities from tweets and performing simple frequency analysis (the_tweet__count_entities_in_tweets.py)

# <codecell>

# Note: The Twitter v1.1 API includes tweet entities by default, so the use of the
# twitter_text package for parsing out tweet entities in this chapter is no longer