def user_populate_history(self, user, follow_user):
        
        week_ago = datetime.datetime.now() - datetime.timedelta(weeks=1)
        timezone.make_aware(week_ago, timezone.get_current_timezone())
        
        # get all the visits that the new followee has
        eyehists = follow_user.eyehistory_set.filter(
            start_time__gt=week_ago).select_related()
        for e in queryset_iterator(eyehists):
            url = e.url
            url = url[:min(255, len(url))]

            # popularhistoryinfo stores general information about this page
            # such as description, title, domain, image, etc.
            p = PopularHistoryInfo.objects.filter(url=url)
            if p.exists():
                p = p[0]
                
                # create a popular history item for the user and the visit that
                # that user's followee has been to
                user_pop, _ = PopularHistory.objects.get_or_create(
                    popular_history=p, user=user)
                self._add_users_and_messages(user_pop, e)
                    
                    
        # Next, go through all the popular history items created for this user
        # and score them
        self._calculate_scores(user)
 def remove_user_populate_history(self, user, remove_user):
     
     user_pops = PopularHistory.objects.filter(user=user).select_related()
     
     for user_pop in queryset_iterator(user_pops):
         self._remove_users_and_messages(user, user_pop, remove_user)
     
     self._calculate_scores(user)
    def _update_ratings_time_spent(self):
        eye_hists = EyeHistory.objects.all().select_related("user", "page")
        total_updates = eye_hists.count()
        ratings = {}
        filled_ratings = set()
        for i, eye_hist in enumerate(queryset_iterator(eye_hists)):
            user = eye_hist.user
            domain,_ = Domain.objects.get_or_create(url=eye_hist.domain)
            page,_ = Page.objects.get_or_create(url=eye_hist.url,
                                                domain=domain)
            key = (user.id, page.id)

            if key in filled_ratings or \
            Ratings.objects.filter(user=user,page=page, from_time_distribution=False).exists():
              filled_ratings.add(key)
              continue

            if key not in ratings:
                ratings[key] = (0,0)
            ratings[key]= (ratings[key][0] + 1.0* eye_hist.total_time/1000,i)

            if i != 0 and i % CHUNK_SIZE == 0:
                self._log_updates(i, total_updates, 'avg_time_spent_for_pages')

        total_updates = len(ratings)
        i = 0
        users = {}
        for key,time_spent in ratings.items():
            user_id = key[0]
            avg_time_spent = 1.0*time_spent[0]/time_spent[1]
            if not user_id in users:
                users[user_id] = []
            users[user_id].append(avg_time_spent)
            ratings[key] = avg_time_spent

            if i != 0 and i % CHUNK_SIZE == 0:
                self._log_updates(i, total_updates, 'forming_time_spent_distributions_for_users')

            i+=1

        i = 0
        for key,avg_time_spent in ratings.items():
            try:
                rating = Ratings.objects.get(user=User.objects.get(id=key[0]),
                                                page=Page.objects.get(id=key[1]))
                rating.score = round(stats.percentileofscore(users[key[0]],
                                                    avg_time_spent))*4.0/100 + 1
                rating.save()
            except Ratings.DoesNotExist:
                Ratings.objects.create(user=User.objects.get(id=key[0]),
                                        page=Page.objects.get(id=key[1]),
                                        score=round(stats.percentileofscore(users[key[0]],
                                                    avg_time_spent))*4.0/100 + 1)
            if i != 0 and i % CHUNK_SIZE == 0:
                self._log_updates(i, total_updates, 'calculating_left_over_ratings')

            i+=1
Example #4
0
    def user_populate_history(self, user, follow_user):
        
        week_ago = datetime.datetime.now() - datetime.timedelta(weeks=1)
        timezone.make_aware(week_ago, timezone.get_current_timezone())
        
        # get all the visits that the new followee has
        eyehists = follow_user.eyehistory_set.filter(
            start_time__gt=week_ago).select_related()
        for e in queryset_iterator(eyehists):
            url = e.url
            url = url[:min(255, len(url))]

            # popularhistoryinfo stores general information about this page
            # such as description, title, domain, image, etc.
            p = PopularHistoryInfo.objects.filter(url=url)
            if p.exists():
                p = p[0]
                
                # create a popular history item for the user and the visit that
                # that user's followee has been to
                user_pop, _ = PopularHistory.objects.get_or_create(
                    popular_history=p, user=user)
                self._add_users_and_messages(user_pop, e)
    def _populate_popular_history(self):
        self.log('_populate_popular_history')

        month_ago = datetime.datetime.now() - datetime.timedelta(weeks=10)
        timezone.make_aware(month_ago, timezone.get_current_timezone())

        # get all eyehistory events from 10 weeks ago to today
        eye_history = EyeHistory.objects.filter(
            start_time__gt=month_ago).select_related()

        i = 0  # in case we try to log but there is no eye_history
        total_updates = eye_history.count()
        for i, e in enumerate(queryset_iterator(eye_history)):
            url = e.url
            url = url[:min(255, len(url))]

            # popularhistoryinfo stores general information about this page
            # such as description, title, domain, image, etc.
            p = PopularHistoryInfo.objects.filter(url=url)
            if not p.exists():
                # try to extract description, title, etc from the page and
                # create a popularhistoryinfo item from it
                p = self._create_pop(e, url)
            else:
                p = p[0]

            # each popularhistory is associated with a popularhistoryitem
            # popularhistory contains scoring info and is tied either to
            # no user (so scoring for the firehose) or to a particular user
            # (so scoring for their following feed)
            pop_items = PopularHistory.objects.filter(
                popular_history=p, user=None).prefetch_related('eye_hists')
            if pop_items.count() == 0:
                total_pop = PopularHistory.objects.create(
                    popular_history=p, user=None)
            elif pop_items.count() > 1:
                total_pop = pop_items[0]
                for i in pop_items[1:]:
                    i.delete()
            else:
                total_pop = pop_items[0]
            self._add_users_and_messages(total_pop, e)

            # for each of the users that are following the person
            # in this eyehistory, we add this eyehistory to the
            # the popularhistory item for that user
            follow_users = UserProfile.objects.filter(
                follows=e.user.profile).select_related()

            # do this outside of the loop so we can use an iterator
            user_pop, _ = PopularHistory.objects.get_or_create(
                popular_history=p, user=e.user)
            self._add_users_and_messages(user_pop, e)

            for user_prof in queryset_iterator(follow_users):
                user_pop, _ = PopularHistory.objects.get_or_create(
                    popular_history=p, user=user_prof.user)
                self._add_users_and_messages(user_pop, e)

            if i != 0 and i % CHUNK_SIZE == 0:
                self._log_updates(i, total_updates, 'popular_history')

        self._log_updates(i, total_updates, 'popular_history')
        self._delete_old()

        # we're interested in including one's own visits to the score in
        # one's own feed, but don't want to include in list of users
        popular_history = PopularHistory.objects.filter(
            user__isnull=False).prefetch_related(
                'visitors').select_related()

        for p in queryset_iterator(popular_history):
            if p.visitors.count() == 1:
                if p.visitors.all()[0] == p.user:
                    p.delete()

        # remove eyehistories that are from over 10 weeks ago
        # if everything gets removed then delete the popularhistory
        # though this shouldn't happen (see above)
        popular_history = PopularHistory.objects.all().prefetch_related(
            'eye_hists')
        for i in queryset_iterator(popular_history):
            i.eye_hists.remove(*i.eye_hists.filter(start_time__lt=month_ago))

        # if there are any popularhistoryinfo items now that have
        # no corresponding popularhistory objects (because they've been
        # deleted for being stale presumably) then delete
        PopularHistoryInfo.objects.filter(popularhistory=None).delete()
Example #6
0
    def _populate_popular_history(self):
        self.log('_populate_popular_history')

        month_ago = datetime.datetime.now() - datetime.timedelta(weeks=10)
        timezone.make_aware(month_ago, timezone.get_current_timezone())

        # get all eyehistory events from 10 weeks ago to today
        eye_history = EyeHistory.objects.filter(
            start_time__gt=month_ago).select_related()

        i = 0  # in case we try to log but there is no eye_history
        total_updates = eye_history.count()
        for i, e in enumerate(queryset_iterator(eye_history)):
            url = e.url
            url = url[:min(255, len(url))]

            # popularhistoryinfo stores general information about this page
            # such as description, title, domain, image, etc.
            p = PopularHistoryInfo.objects.filter(url=url)
            if not p.exists():
                # try to extract description, title, etc from the page and
                # create a popularhistoryinfo item from it
                p = self._create_pop(e, url)
            else:
                p = p[0]

            # each popularhistory is associated with a popularhistoryitem
            # popularhistory contains scoring info and is tied either to
            # no user (so scoring for the firehose) or to a particular user
            # (so scoring for their following feed)
            pop_items = PopularHistory.objects.filter(
                popular_history=p, user=None).prefetch_related('eye_hists')
            if pop_items.count() == 0:
                total_pop = PopularHistory.objects.create(
                    popular_history=p, user=None)
            elif pop_items.count() > 1:
                total_pop = pop_items[0]
                for i in pop_items[1:]:
                    i.delete()
            else:
                total_pop = pop_items[0]
            self._add_users_and_messages(total_pop, e)

            # for each of the users that are following the person
            # in this eyehistory, we add this eyehistory to the
            # the popularhistory item for that user
            follow_users = UserProfile.objects.filter(
                follows=e.user.profile).select_related()

            # do this outside of the loop so we can use an iterator
            user_pop, _ = PopularHistory.objects.get_or_create(
                popular_history=p, user=e.user)
            self._add_users_and_messages(user_pop, e)

            for user_prof in queryset_iterator(follow_users):
                user_pop, _ = PopularHistory.objects.get_or_create(
                    popular_history=p, user=user_prof.user)
                self._add_users_and_messages(user_pop, e)

            if i != 0 and i % CHUNK_SIZE == 0:
                self._log_updates(i, total_updates, 'popular_history')

        self._log_updates(i, total_updates, 'popular_history')
        self._delete_old()

        # we're interested in including one's own visits to the score in
        # one's own feed, but don't want to include in list of users
        popular_history = PopularHistory.objects.filter(
            user__isnull=False).prefetch_related(
                'visitors').select_related()

        for p in queryset_iterator(popular_history):
            if p.visitors.count() == 1:
                if p.visitors.all()[0] == p.user:
                    p.delete()

        # remove eyehistories that are from over 10 weeks ago
        # if everything gets removed then delete the popularhistory
        # though this shouldn't happen (see above)
        popular_history = PopularHistory.objects.all().prefetch_related(
            'eye_hists')
        for i in queryset_iterator(popular_history):
            i.eye_hists.remove(*i.eye_hists.filter(start_time__lt=month_ago))

        # if there are any popularhistoryinfo items now that have
        # no corresponding popularhistory objects (because they've been
        # deleted for being stale presumably) then delete
        PopularHistoryInfo.objects.filter(popularhistory=None).delete()
Example #7
0
 def remove_user_populate_history(self, user, remove_user):
     
     user_pops = PopularHistory.objects.filter(user=user).select_related()
     
     for user_pop in queryset_iterator(user_pops):
         self._remove_users_and_messages(user, user_pop, remove_user)