def search_tweets(t, r, keywords, level="country"): # redisId = tu.getRedisIdByScreenName(keywords, 'index') if level == "country": index_name = keywords else: index_name = "%s %s" % (level, keywords) redisTweetId = tu.getRedisIdByScreenName(index_name, 'search') params = {"q": keywords, "count": 100} search_results = tu.makeTwitterRequest(t.search.tweets, **params) tweets = search_results['statuses'] for i in range(MAX_PAGES - 1): print "page %d" % (i + 1) next_results = search_results['search_metadata'].get('next_results') if next_results is None: break kwargs = dict([kv.split('=') for kv in next_results[1:].split('&')]) max_id = str(long(kwargs['max_id']) - 1) kwargs['max_id'] = max_id search_results = tu.makeTwitterRequest(t.search.tweets, **kwargs) tweets += search_results['statuses'] if len(search_results['statuses']) == 0: break print "Fetched %d tweets so far" % len(tweets) for t in tweets: r.sadd(redisTweetId, t) return tweets
def handle(self, *args, **options): # Get the configuration parameters c = config.Config() d = c.cfg # Load the classifier f = open('bayesclass.pickle') classifier = pickle.load(f) t = login() KW = {'user' : d.get('api', 'user'), 'count' : d.get('api', 'count'), 'skip_users' : d.get('api', 'skip_users'), 'include_entities' : 'true', 'since_id' : 1, 'id' : 2} p = Tweet.objects.aggregate(Max('tweet_id')) latestId = p['tweet_id__max'] if latestId == None: latestId = 1 KW['since_id'] = int(latestId) api_call = getattr(t.statuses, 'user_timeline') tweets = makeTwitterRequest(t, api_call, **KW) print 'Fetched %i tweets' % len(tweets) a = len(tweets) for i in range(a): txt = tweets[i]['text'] ref = tweets[i]['id'] src = tweets[i]['source'] outc = int(classifier.classify(word_feats(txt))) created = mysql_date(tweets[i]['created_at']) q = Tweet( datetime = created, user = K['user'], content = txt, source = src, tweet_id = ref, prop = outc ) q.save() f.close()
def getFriendIds(screen_name=None, user_id=None, friends_limit=10000): ids = [] cursor = -1 while cursor != 0: params = dict(cursor=cursor) if screen_name is not None: params["screen_name"] = screen_name else: params["user_id"] = user_id response = makeTwitterRequest(t, t.friends.ids, **params) ids.extend(response["ids"]) cursor = response["next_cursor"] print >> sys.stderr, "Fetched %i ids for %s" % (len(ids), screen_name or user_id) if len(ids) >= friends_limit: break return ids
def getFriendIds(t, screen_name=None, user_id=None, friends_limit=10000): assert screen_name or user_id ids = [] cursor = -1 while cursor != 0: params = dict(cursor=cursor) if screen_name is not None: params['screen_name'] = screen_name else: params['user_id'] = user_id response = makeTwitterRequest(t, t.friends.ids, **params) ids.extend(response['ids']) cursor = response['next_cursor'] print >> sys.stderr,\ 'Fetched %d ids for %s' % (len(ids), screen_name or user_id) if len(ids) >= friends_limit: break return ids
def handle(self, *args, **options): n = 10 # Number of training tweets SEARCH_TERM = ':)' MAX_PAGES = 1 RESULTS_PER_PAGE = 1 LANGUAGE = "en" INCLUDE_ENTITIES = "true" KW = { 'domain': 'search.twitter.com', 'count': 1000, 'rpp': 100, 'q': SEARCH_TERM, 'lang': LANGUAGE, 'include_entities': INCLUDE_ENTITIES } t = twitter.Twitter(domain='search.twitter.com') posfeats = [] for i in range(n): tweets = makeTwitterRequest(t, t.search, **KW) txt = tweets['results'][0]['text'] itemb = extractwords(txt) posfeats.append((word_feats(itemb), '1')) classifier = NaiveBayesClassifier.train(posfeats) f = open('bayesclass.pickle', 'wb') pickle.dump(classifier, f) f.close()
# home and user timelines. It has no effect for the public timeline def idMapper(doc): yield (None, doc['id']) def maxFindingReducer(keys, values, rereduce): return max(values) view = ViewDefinition('index', 'max_tweet_id', idMapper, maxFindingReducer, language='python') view.sync(db) KW['since_id'] = int([_id for _id in db.view('index/max_tweet_id')][0].value) # Harvest tweets for the given timeline. # For friend and home timelines, the unofficial limitation is about 800 statuses although # other documentation may state otherwise. The public timeline only returns 20 statuses # and gets updated every 60 seconds. # See http://groups.google.com/group/twitter-development-talk/browse_thread/thread/4678df70c301be43 # Note that the count and since_id params have no effect for the public timeline page_num = 1 while page_num <= MAX_PAGES: KW['page'] = page_num api_call = getattr(t.statuses, TIMELINE_NAME + '_timeline') tweets = makeTwitterRequest(t, api_call, **KW) db.update(tweets, all_or_nothing=True) print 'Fetched %i tweets' % len(tweets) page_num += 1