class DataAnalyzer(object): def __init__(self): self.tm = TweepModel() self.tl = TwitterLib() def analyze_data(self): logger.info("Analyzing tweeps...") # First let's grab all of the people that we already follow # (because we don't want to recommend them, duh) following = self.tl.get_following(user_id=settings.TWITTER_ID) # Let's calculate us some scores! tweep_scores = {} tweep_data = {} all_tweeps = self.tm.fetch_all() for tweep in all_tweeps: # Ignore people who we already follow if tweep['id'] in following: continue # Ignore people who already follow us if tweep['level'] == 0: continue # For everyone else, add some juice if tweep['id'] not in tweep_scores: tweep_scores[tweep['id']] = 0 tweep_scores[tweep['id']] += pow(DEPRECIATION_PER_LEVEL, tweep['level'] - 1) # Get a ratio to be applied later if tweep['id'] not in tweep_data: tweep_data[tweep['id']] = { 'followers_count': tweep['followers_count'], 'following_count': tweep['following_count'], } logger.info("Applying some heuristics...") # Now penalize scores based on: # 1. followers/following ratio # 2. total people following for id, score in tweep_scores.iteritems(): data = tweep_data[id] logger.debug("OriginalScore=%s" % score) if data['followers_count']: boost_effect = 1.0 * data['following_count'] / data[ 'followers_count'] if boost_effect < 10: boost_effect = 10 # Taking the log because we actually want to minimize this boost factor boost_effect = math.log(boost_effect, 10) tweep_scores[id] *= boost_effect logger.debug( "Following=%s Followers=%s BoostEffect=%s NewScore=%s" % ( data['following_count'], data['followers_count'], boost_effect, tweep_scores[id], )) if data['following_count'] >= 10: penalize_effect = math.log(data['following_count'], 10) tweep_scores[id] /= penalize_effect logger.debug("PenalizeEffect=%s NewScore=%s" % ( penalize_effect, tweep_scores[id], )) logger.info("Sorting...") sorted_tweeps = sorted(tweep_scores.iteritems(), key=operator.itemgetter(1), reverse=True) self.recommended_tweeps = [] tweep_ids = [x[0] for x in sorted_tweeps[:NUM_OF_RECOMMENDED_TWEEPS]] tweeps = self.tl.get_user_info(tweep_ids) for i, tweep in enumerate(tweeps): tweep['tweep_score'] = sorted_tweeps[i][1] self.recommended_tweeps.append(tweep) logger.info("OK!") def send_recommendation_email(self): logger.info("Emailing tweep recommendations now...") today = datetime.date.today() subject = "Tweeps to follow for %s" % today context = { 'twitter_user_name': settings.TWITTER_SCREEN_NAME, 'day_of_week': today.strftime("%A"), 'tweeps': self.recommended_tweeps, } template_text = open('templates/who_to_follow_email.txt', 'r').read() email_txt = Template(template_text).render(**context).encode('utf-8') logger.debug("plain text mail:\n%s" % email_txt) send_email(settings.EMAIL_FROM, settings.EMAIL_TO, subject, email_txt)