class DataCollector(object): def __init__(self, user_id): self.user_id = user_id self.tm = TweepModel() self.tl = TwitterLib() def _get_followers_helper(self, id, level, skip_new=True): # Skip requests that have been made too recently if skip_new: row = self.tm.fetch_one(id) if row: last_updated_stamp = int(row['followers_history'].keys()[-1]) last_updated = datetime.datetime.fromtimestamp( last_updated_stamp) staleness = datetime.datetime.utcnow() - last_updated if staleness.total_seconds() < STALE_AGE: logger.info( "Skipping user %s - last updated %s ago (not stale enough yet)" % (id, staleness)) return row['followers'] # Make the actual API requests followers = self.tl.get_followers(user_id=id) user_data = self.tl.get_user_info([id])[0] followers_count = user_data['followers_count'] following_count = user_data['friends_count'] # Save the data data = self.tm.save_tweep(id, level, followers, followers_count, following_count) return data['followers'] def _update_follower_tree(self, level, followers): '''Recursive breadth-first function for getting all followers ''' if level > MAX_DEPTH_LEVEL: return logger.info("Updating %s followers at level %s..." % (len(followers), level)) next_level_followers = set() for id in followers: try: their_followers = self._get_followers_helper(id, level) next_level_followers |= set(their_followers) except Exception, e: logger.warning( "Exception caught trying to get followers for %s - gave up: %s" % (id, e)) pass logger.info("Finished updating all followers at level %s" % level) self._update_follower_tree(level + 1, list(next_level_followers))
class DataCollector(object): def __init__(self, user_id): self.user_id = user_id self.tm = TweepModel() self.tl = TwitterLib() def _get_followers_helper(self, id, level, skip_new=True): # Skip requests that have been made too recently if skip_new: row = self.tm.fetch_one(id) if row: last_updated_stamp = int(row['followers_history'].keys()[-1]) last_updated = datetime.datetime.fromtimestamp(last_updated_stamp) staleness = datetime.datetime.utcnow() - last_updated if staleness.total_seconds() < STALE_AGE: logger.info("Skipping user %s - last updated %s ago (not stale enough yet)" % (id, staleness)) return row['followers'] # Make the actual API requests followers = self.tl.get_followers(user_id=id) user_data = self.tl.get_user_info([id])[0] followers_count = user_data['followers_count'] following_count = user_data['friends_count'] # Save the data data = self.tm.save_tweep(id, level, followers, followers_count, following_count) return data['followers'] def _update_follower_tree(self, level, followers): '''Recursive breadth-first function for getting all followers ''' if level > MAX_DEPTH_LEVEL: return logger.info("Updating %s followers at level %s..." % (len(followers), level)) next_level_followers = set() for id in followers: try: their_followers = self._get_followers_helper(id, level) next_level_followers |= set(their_followers) except Exception, e: logger.warning("Exception caught trying to get followers for %s - gave up: %s" % (id, e)) pass logger.info("Finished updating all followers at level %s" % level) self._update_follower_tree(level+1, list(next_level_followers))
class DataAnalyzer(object): def __init__(self): self.tm = TweepModel() self.tl = TwitterLib() def analyze_data(self): logger.info("Analyzing tweeps...") # First let's grab all of the people that we already follow # (because we don't want to recommend them, duh) following = self.tl.get_following(user_id=settings.TWITTER_ID) # Let's calculate us some scores! tweep_scores = {} tweep_data = {} all_tweeps = self.tm.fetch_all() for tweep in all_tweeps: # Ignore people who we already follow if tweep['id'] in following: continue # Ignore people who already follow us if tweep['level'] == 0: continue # For everyone else, add some juice if tweep['id'] not in tweep_scores: tweep_scores[tweep['id']] = 0 tweep_scores[tweep['id']] += pow(DEPRECIATION_PER_LEVEL, tweep['level'] - 1) # Get a ratio to be applied later if tweep['id'] not in tweep_data: tweep_data[tweep['id']] = { 'followers_count': tweep['followers_count'], 'following_count': tweep['following_count'], } logger.info("Applying some heuristics...") # Now penalize scores based on: # 1. followers/following ratio # 2. total people following for id, score in tweep_scores.iteritems(): data = tweep_data[id] logger.debug("OriginalScore=%s" % score) if data['followers_count']: boost_effect = 1.0 * data['following_count'] / data[ 'followers_count'] if boost_effect < 10: boost_effect = 10 # Taking the log because we actually want to minimize this boost factor boost_effect = math.log(boost_effect, 10) tweep_scores[id] *= boost_effect logger.debug( "Following=%s Followers=%s BoostEffect=%s NewScore=%s" % ( data['following_count'], data['followers_count'], boost_effect, tweep_scores[id], )) if data['following_count'] >= 10: penalize_effect = math.log(data['following_count'], 10) tweep_scores[id] /= penalize_effect logger.debug("PenalizeEffect=%s NewScore=%s" % ( penalize_effect, tweep_scores[id], )) logger.info("Sorting...") sorted_tweeps = sorted(tweep_scores.iteritems(), key=operator.itemgetter(1), reverse=True) self.recommended_tweeps = [] tweep_ids = [x[0] for x in sorted_tweeps[:NUM_OF_RECOMMENDED_TWEEPS]] tweeps = self.tl.get_user_info(tweep_ids) for i, tweep in enumerate(tweeps): tweep['tweep_score'] = sorted_tweeps[i][1] self.recommended_tweeps.append(tweep) logger.info("OK!") def send_recommendation_email(self): logger.info("Emailing tweep recommendations now...") today = datetime.date.today() subject = "Tweeps to follow for %s" % today context = { 'twitter_user_name': settings.TWITTER_SCREEN_NAME, 'day_of_week': today.strftime("%A"), 'tweeps': self.recommended_tweeps, } template_text = open('templates/who_to_follow_email.txt', 'r').read() email_txt = Template(template_text).render(**context).encode('utf-8') logger.debug("plain text mail:\n%s" % email_txt) send_email(settings.EMAIL_FROM, settings.EMAIL_TO, subject, email_txt)