def user_populate_history(self, user, follow_user): week_ago = datetime.datetime.now() - datetime.timedelta(weeks=1) timezone.make_aware(week_ago, timezone.get_current_timezone()) # get all the visits that the new followee has eyehists = follow_user.eyehistory_set.filter( start_time__gt=week_ago).select_related() for e in queryset_iterator(eyehists): url = e.url url = url[:min(255, len(url))] # popularhistoryinfo stores general information about this page # such as description, title, domain, image, etc. p = PopularHistoryInfo.objects.filter(url=url) if p.exists(): p = p[0] # create a popular history item for the user and the visit that # that user's followee has been to user_pop, _ = PopularHistory.objects.get_or_create( popular_history=p, user=user) self._add_users_and_messages(user_pop, e) # Next, go through all the popular history items created for this user # and score them self._calculate_scores(user)
def remove_user_populate_history(self, user, remove_user): user_pops = PopularHistory.objects.filter(user=user).select_related() for user_pop in queryset_iterator(user_pops): self._remove_users_and_messages(user, user_pop, remove_user) self._calculate_scores(user)
def _update_ratings_time_spent(self): eye_hists = EyeHistory.objects.all().select_related("user", "page") total_updates = eye_hists.count() ratings = {} filled_ratings = set() for i, eye_hist in enumerate(queryset_iterator(eye_hists)): user = eye_hist.user domain,_ = Domain.objects.get_or_create(url=eye_hist.domain) page,_ = Page.objects.get_or_create(url=eye_hist.url, domain=domain) key = (user.id, page.id) if key in filled_ratings or \ Ratings.objects.filter(user=user,page=page, from_time_distribution=False).exists(): filled_ratings.add(key) continue if key not in ratings: ratings[key] = (0,0) ratings[key]= (ratings[key][0] + 1.0* eye_hist.total_time/1000,i) if i != 0 and i % CHUNK_SIZE == 0: self._log_updates(i, total_updates, 'avg_time_spent_for_pages') total_updates = len(ratings) i = 0 users = {} for key,time_spent in ratings.items(): user_id = key[0] avg_time_spent = 1.0*time_spent[0]/time_spent[1] if not user_id in users: users[user_id] = [] users[user_id].append(avg_time_spent) ratings[key] = avg_time_spent if i != 0 and i % CHUNK_SIZE == 0: self._log_updates(i, total_updates, 'forming_time_spent_distributions_for_users') i+=1 i = 0 for key,avg_time_spent in ratings.items(): try: rating = Ratings.objects.get(user=User.objects.get(id=key[0]), page=Page.objects.get(id=key[1])) rating.score = round(stats.percentileofscore(users[key[0]], avg_time_spent))*4.0/100 + 1 rating.save() except Ratings.DoesNotExist: Ratings.objects.create(user=User.objects.get(id=key[0]), page=Page.objects.get(id=key[1]), score=round(stats.percentileofscore(users[key[0]], avg_time_spent))*4.0/100 + 1) if i != 0 and i % CHUNK_SIZE == 0: self._log_updates(i, total_updates, 'calculating_left_over_ratings') i+=1
def user_populate_history(self, user, follow_user): week_ago = datetime.datetime.now() - datetime.timedelta(weeks=1) timezone.make_aware(week_ago, timezone.get_current_timezone()) # get all the visits that the new followee has eyehists = follow_user.eyehistory_set.filter( start_time__gt=week_ago).select_related() for e in queryset_iterator(eyehists): url = e.url url = url[:min(255, len(url))] # popularhistoryinfo stores general information about this page # such as description, title, domain, image, etc. p = PopularHistoryInfo.objects.filter(url=url) if p.exists(): p = p[0] # create a popular history item for the user and the visit that # that user's followee has been to user_pop, _ = PopularHistory.objects.get_or_create( popular_history=p, user=user) self._add_users_and_messages(user_pop, e)
def _populate_popular_history(self): self.log('_populate_popular_history') month_ago = datetime.datetime.now() - datetime.timedelta(weeks=10) timezone.make_aware(month_ago, timezone.get_current_timezone()) # get all eyehistory events from 10 weeks ago to today eye_history = EyeHistory.objects.filter( start_time__gt=month_ago).select_related() i = 0 # in case we try to log but there is no eye_history total_updates = eye_history.count() for i, e in enumerate(queryset_iterator(eye_history)): url = e.url url = url[:min(255, len(url))] # popularhistoryinfo stores general information about this page # such as description, title, domain, image, etc. p = PopularHistoryInfo.objects.filter(url=url) if not p.exists(): # try to extract description, title, etc from the page and # create a popularhistoryinfo item from it p = self._create_pop(e, url) else: p = p[0] # each popularhistory is associated with a popularhistoryitem # popularhistory contains scoring info and is tied either to # no user (so scoring for the firehose) or to a particular user # (so scoring for their following feed) pop_items = PopularHistory.objects.filter( popular_history=p, user=None).prefetch_related('eye_hists') if pop_items.count() == 0: total_pop = PopularHistory.objects.create( popular_history=p, user=None) elif pop_items.count() > 1: total_pop = pop_items[0] for i in pop_items[1:]: i.delete() else: total_pop = pop_items[0] self._add_users_and_messages(total_pop, e) # for each of the users that are following the person # in this eyehistory, we add this eyehistory to the # the popularhistory item for that user follow_users = UserProfile.objects.filter( follows=e.user.profile).select_related() # do this outside of the loop so we can use an iterator user_pop, _ = PopularHistory.objects.get_or_create( popular_history=p, user=e.user) self._add_users_and_messages(user_pop, e) for user_prof in queryset_iterator(follow_users): user_pop, _ = PopularHistory.objects.get_or_create( popular_history=p, user=user_prof.user) self._add_users_and_messages(user_pop, e) if i != 0 and i % CHUNK_SIZE == 0: self._log_updates(i, total_updates, 'popular_history') self._log_updates(i, total_updates, 'popular_history') self._delete_old() # we're interested in including one's own visits to the score in # one's own feed, but don't want to include in list of users popular_history = PopularHistory.objects.filter( user__isnull=False).prefetch_related( 'visitors').select_related() for p in queryset_iterator(popular_history): if p.visitors.count() == 1: if p.visitors.all()[0] == p.user: p.delete() # remove eyehistories that are from over 10 weeks ago # if everything gets removed then delete the popularhistory # though this shouldn't happen (see above) popular_history = PopularHistory.objects.all().prefetch_related( 'eye_hists') for i in queryset_iterator(popular_history): i.eye_hists.remove(*i.eye_hists.filter(start_time__lt=month_ago)) # if there are any popularhistoryinfo items now that have # no corresponding popularhistory objects (because they've been # deleted for being stale presumably) then delete PopularHistoryInfo.objects.filter(popularhistory=None).delete()
def remove_user_populate_history(self, user, remove_user): user_pops = PopularHistory.objects.filter(user=user).select_related() for user_pop in queryset_iterator(user_pops): self._remove_users_and_messages(user, user_pop, remove_user)