Esempio n. 1
0
    def get_stream(self,TIMELINE_NAME,MAX_PAGES):
        USER = None
        
        KW = {  # For the Twitter API call
            'count': 200,
            'trim_user': '******',
            'include_rts' : 'true',
            'since_id' : 1,
        }
        
        if TIMELINE_NAME == 'user':
            USER = sys.argv[3]
            KW['screen_name'] = USER
        if TIMELINE_NAME == 'home' and MAX_PAGES > 4:
            MAX_PAGES = 4
        if TIMELINE_NAME == 'user' and MAX_PAGES > 16:
            MAX_PAGES = 16
            
        t = login()
         
         
        client = MongoClient('localhost',27017)

        db = client.test_database
        posts = db.tw_data #Collection name
        posts.drop()
        
        api_call = getattr(t.statuses, TIMELINE_NAME + '_timeline')
        tweets = makeTwitterRequest(api_call, **KW)
        for tweet in tweets:
            if(tweet['lang']=='en'):
                #print tweet['text']
                post_id = posts.insert(tweet)
                #print '# post id'
                #print post_id
        #print 'Fetched %i tweets' % len(tweets)
        
        page_num = 1
        while page_num < MAX_PAGES and len(tweets) > 0:
        
            # Necessary for traversing the timeline in Twitter's v1.1 API.
            # See https://dev.twitter.com/docs/working-with-timelines
            KW['max_id'] = getNextQueryMaxIdParam(tweets)
        
            api_call = getattr(t.statuses, TIMELINE_NAME + '_timeline')
            tweets = makeTwitterRequest(api_call, **KW)
            #print json.dumps(tweets,indent = 3)
            for tweet in tweets:
                if(tweet['lang']=='en'):
                    #print tweet['text']
                    post_id = posts.insert(tweet)
                    #print '# post id'
                    #print post_id
                
            #print 'Fetched %i tweets' % len(tweets)
            page_num += 1
def getFriendIds(screen_name=None, user_id=None, friends_limit=10000):

    ids = []
    cursor = -1
    while cursor != 0:
        params = dict(cursor=cursor)
        if screen_name is not None:
            params["screen_name"] = screen_name
        else:
            params["user_id"] = user_id

        response = makeTwitterRequest(t, t.friends.ids, **params)

        ids.extend(response["ids"])
        cursor = response["next_cursor"]
        print >> sys.stderr, "Fetched %i ids for %s" % (len(ids), screen_name or user_id)
        if len(ids) >= friends_limit:
            break

    return ids
Esempio n. 3
0
def getFriendIds(screen_name=None, user_id=None, friends_limit=10000):

    ids = []
    cursor = -1
    while cursor != 0:
        params = dict(cursor=cursor)
        if screen_name is not None:
            params['screen_name'] = screen_name
        else:
            params['user_id'] = user_id

        response = makeTwitterRequest(t, t.friends.ids, **params)

        ids.extend(response['ids'])
        cursor = response['next_cursor']
        print >> sys.stderr, \
            'Fetched %i ids for %s' % (len(ids), screen_name or user_id)
        if len(ids) >= friends_limit:
            break

    return ids
Esempio n. 4
0
def getFriends(screen_name=None, user_id=None, friends_limit=10000):
    
    assert screen_name is not None or user_id is not None
    
    ids = []
    cursor = -1
    
    while cursor != 0 :
        params = dict(cursor=cursor)
        if screen_name is not None:
            params['screen_name'] = screen_name
        else:
            params['user_id'] = user_id
            
        response = makeTwitterRequest(t, t.friends.ids, **params)
        ids.extend(response['ids'])
        cursor = response['next_cursor']
        print >> sys.stderr, \
            'Fetched %i ids for %s ' % (len(ids), screen_name or user_id)
            
        if len(ids) >= friends_limit:
            break
    return ids;
Esempio n. 5
0
#     def idMapper(doc):
#         yield (None, doc['id'])
# 
# 
#     def maxFindingReducer(keys, values, rereduce):
#         return max(values)
# 
# 
#     view = ViewDefinition('index', 'max_tweet_id', idMapper, maxFindingReducer,
#                           language='python')
#     view.sync(db)
# 
#     KW['since_id'] = int([_id for _id in db.view('index/max_tweet_id')][0].value)

api_call = getattr(t.statuses, TIMELINE_NAME + '_timeline')
tweets = makeTwitterRequest(api_call, **KW)
#print json.dumps(tweets,indent=3)
#db.update(tweets, all_or_nothing=True)
post_id = posts.insert(tweets)
print '# post id'
print post_id
print 'Fetched %i tweets' % len(tweets)

page_num = 1
while page_num < MAX_PAGES and len(tweets) > 0:

    # Necessary for traversing the timeline in Twitter's v1.1 API.
    # See https://dev.twitter.com/docs/working-with-timelines
    KW['max_id'] = getNextQueryMaxIdParam(tweets)

    api_call = getattr(t.statuses, TIMELINE_NAME + '_timeline')
min_conversation_id = min([int(i[1]) for i in conversation if i[1] is not None])
max_conversation_id = max([int(i[1]) for i in conversation if i[1] is not None])

# Pull tweets from other user using user timeline API to minimize API expenses...

t = login()

reply_tweets = []
results = []
page = 1
while True:
    results = makeTwitterRequest(t.statuses.user_timeline,
        count=200,
        # Per <http://dev.twitter.com/doc/get/statuses/user_timeline>, some
        # caveats apply with the oldest id you can fetch using "since_id"
        since_id=min_conversation_id,
        max_id=max_conversation_id,
        skip_users='true',
        screen_name=USER,
        page=page)
    reply_tweets += results
    page += 1
    if len(results) == 0: 
        break

# During testing, it was observed that some tweets may not resolve or possibly
# even come back with null id values -- possibly a temporary fluke. Workaround.
missing_tweets = []
for (doc_id, in_reply_to_id) in conversation:
    try:
        print [rt for rt in reply_tweets if rt['id'] == in_reply_to_id][0]['text']
Esempio n. 7
0
import twitter
import couchdb
from couchdb.design import ViewDefinition
from twitter__util import makeTwitterRequest

SEARCH_TERM = sys.argv[1]
MAX_PAGES = 15

KW = {"domain": "search.twitter.com", "count": 200, "rpp": 100, "q": SEARCH_TERM}

server = couchdb.Server("http://*****:*****@", ""),)

try:
    db = server.create(DB)
except couchdb.http.PreconditionFailed, e:

    # already exists, so append to it, and be mindful of duplicates

    db = server[DB]

t = twitter.Twitter(domain="search.twitter.com")

for page in range(1, 16):
    KW["page"] = page
    tweets = makeTwitterRequest(t, t.search, **KW)
    db.update(tweets["results"], all_or_nothing=True)
    if len(tweets["results"]) == 0:
        break
    print "Fetched %i tweets" % len(tweets["results"])
# See http://code.google.com/p/twitter-api/issues/detail?id=214
#############################################

t = login()

r = redis.Redis()

count = 0
for screen_name in screen_names:

    ids = []
    cursor = -1
    while cursor != 0:
        try:
            response = makeTwitterRequest(t.friends.ids,
                                          screen_name=screen_name,
                                          cursor=cursor)
            ids += response['ids']
            cursor = response['next_cursor']
            print >> sys.stderr, 'Fetched %i ids for %s' % (len(ids),
                                                            screen_name)
        except twitter.api.TwitterHTTPError, e:
            if e.e.code == 404:
                print >> sys.stderr, "404 Error with screen_name '%s'. Continuing." % screen_name
                break

    # Store the ids into Redis

    [
        r.sadd(getRedisIdByScreenName(screen_name, 'friend_ids'), _id)
        for _id in ids
Esempio n. 9
0
    def idMapper(doc):
        yield (None, doc['id'])

    def maxFindingReducer(keys, values, rereduce):
        return max(values)

    view = ViewDefinition('index',
                          'max_tweet_id',
                          idMapper,
                          maxFindingReducer,
                          language='python')
    view.sync(db)
    KW['since_id'] = int([_id
                          for _id in db.view('index/max_tweet_id')][0].value)

# Harvest tweets for the given timeline.
# For friend and home timelines, the unofficial limitation is about 800 statuses although
# other documentation may state otherwise. The public timeline only returns 20 statuses
# and gets updated every 60 seconds.
# See http://groups.google.com/group/twitter-development-talk/browse_thread/thread/4678df70c301be43
# Note that the count and since_id params have no effect for the public timeline

page_num = 1
while page_num <= MAX_PAGES:
    KW['page'] = page_num
    api_call = getattr(t.statuses, TIMELINE_NAME + '_timeline')
    tweets = makeTwitterRequest(t, api_call, **KW)
    db.update(tweets, all_or_nothing=True)
    print 'Fetched %i tweets' % len(tweets)
    page_num += 1
except couchdb.http.PreconditionFailed, e:
    db = server[DB]

t = twitter.Twitter(domain = 'search.twitter.com')

# Setting logger
logger = logging.getLogger('trafico_chile_service')
fl = logging.FileHandler(LOGGER_OUT)
formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
fl.setFormatter(formatter)
logger.addHandler(fl)
logger.setLevel(logging.DEBUG)

print 'Starting fetching Service, you can stop it with ctrl-c'
while 1:
    actual_time = time.localtime(time.time())
    print 'Starting fetch of tweets at %d-%d-%d : %d:%d' % (actual_time[0],actual_time[1],actual_time[2],actual_time[3],actual_time[4])
    total_fetched = 0
    tweets_with_ids = []
    for page in range(1,16):    
        KW['page'] = page
        tweets = makeTwitterRequest(t, t.search, **KW)
        for tweet in tweets['results']:
            tweet['_id'] = tweet['id_str']
        db.update(tweets['results'], all_or_nothing = True)
        print 'Fetched %i tweets' % len(tweets['results'])
        total_fetched += len(tweets['results'])
    print 'Fetched %d tweets, now waiting 1 minute' % total_fetched
    logger.info('Fetched %d tweets, no problem' % total_fetched)
    time.sleep(SLEEP_TIME)
# See http://code.google.com/p/twitter-api/issues/detail?id=214
#############################################

t = login()

r = redis.Redis()

count = 0
for screen_name in screen_names:

    ids = []
    cursor = -1
    while cursor != 0:
        try:
            response = makeTwitterRequest(t.friends.ids, 
                                          screen_name=screen_name, 
                                          cursor=cursor)
            ids += response['ids']
            cursor = response['next_cursor']
            print >> sys.stderr, 'Fetched %i ids for %s' % (len(ids), screen_name)
        except twitter.api.TwitterHTTPError, e:
            if e.e.code == 404:
                print >> sys.stderr, "404 Error with screen_name '%s'. Continuing." % screen_name
                break

    # Store the ids into Redis

    [r.sadd(getRedisIdByScreenName(screen_name, 'friend_ids'), _id) for _id in
     ids]

    count += 1