def _UpdatePastRecommendationItemIdsCacheAsync(user_id, time_period, new_item_ids=None): client = memcache.Client() name = 'prid:' + str(user_id) if time_period: name += ':' + time_period cached = yield client.get_multi_async([name]) if cached: item_ids = cached[name] else: query = models.PastRecommendation.query( models.PastRecommendation.user_id == user_id, models.PastRecommendation.committed == True, projection=['item_id']) if time_period: time_period_numeric = time_periods.Get(time_period)['numeric'] query = query.filter(models.PastRecommendation.time_period_numeric == time_period_numeric) past_recommendations = yield query.order( -models.PastRecommendation.date).fetch_async( PAST_RECOMMENDATIONS_LIMIT) item_ids = [r.item_id for r in past_recommendations] # We reverse the list of seen item ids so that the newest items are in the # end. We do this because we append new items ids to the end and remove # extra items from the back. item_ids = list(reversed(item_ids)) if new_item_ids: for item_id in new_item_ids: if item_id not in item_ids: item_ids.append(item_id) item_ids = item_ids[-PAST_RECOMMENDATIONS_LIMIT:] if new_item_ids or not cached: memcache.set(name, item_ids) raise ndb.Return(set(item_ids))
def SavePastRecommendations(user_id, time_period, recommendations): time_period_numeric = time_periods.Get(time_period)['numeric'] user_key = models.UserKey(user_id) save_time = datetime.now() past_recommendations = [ models.PastRecommendation( key=ndb.Key(models.PastRecommendation, str(time_period_numeric) + ':' + r.destination_url, parent=user_key), user_id=user_id, item_id=r.item_id or items.UrlToItemId(r.destination_url), url=r.destination_url, weight=r.weight, time_period_numeric=time_period_numeric, serialized_recommendation=r.Serialize(), committed=False, date=save_time, index_within_page=i) for i, r in enumerate(recommendations) ] ndb.put_multi(past_recommendations) # Commit newly saved recommendations after 30 minutes of inactivity. deferred.defer( _CommitPastRecommendations, user_id, time_period_numeric, save_time, _countdown=TIME_TO_COMMIT_PAST_RECOMMENDATIONS.total_seconds())
def MarkUnread(user_id, start_url, time_period): """Removes past recommendations from the list to be committed. Args: user_id: The user. start_url: This recommended item and below should not be committed. time_period: The time period the user is reading recommendation in. Returns: A tuple with the number of items affected and whether the visit will not be counted. """ time_period_numeric = time_periods.Get(time_period)['numeric'] most_recently_saved = PastRecommendation.query( PastRecommendation.user_id == user_id, PastRecommendation.time_period_numeric == time_period_numeric, PastRecommendation.committed == False).order( -PastRecommendation.date).fetch() start_item = None for item in most_recently_saved: if item.url == start_url: start_item = item unread_item_keys = [ item.key for item in most_recently_saved if ((not start_item) or # In case the item for start_url is already committed. (item.date > start_item.date and item.weight <= start_item.weight ) or (item.date == start_item.date and item.index_within_page >= start_item.index_within_page)) ] # If all items were marked as unread then we don't count this visit. # For example, when the user opens Recommender just to recommend something. visit_discarded = False ndb.delete_multi(unread_item_keys) return (len(unread_item_keys), visit_discarded)
def PopularPagesReduce(key, values): url, time_period = pickle.loads(key) popular_page = models.PopularPage(url=url, time_period=time_period, score=0, positive_ratings=0, negative_ratings=0) half_life_seconds = time_periods.Get( time_period)['timedelta'].total_seconds() for value in values: rating, time_passed = pickle.loads(value) if rating < 0: popular_page.negative_ratings += 1 else: popular_page.positive_ratings += 1 popular_page.score += rating * (0.5**(time_passed.total_seconds() / half_life_seconds)) existing = models.PopularPage.query( models.PopularPage.url == url, models.PopularPage.time_period == popular_page.time_period).get() # Update the existing entry or create a new one. if popular_page.score > 0: if existing is not None: popular_page.key = existing.key popular_page.put() # Or delete the existing entry. elif existing is not None: existing.key.delete()
def GetPastRecommendations(user_id, time_period, offset, limit): time_period_numeric = time_periods.Get(time_period)['numeric'] past_recommendations = PastRecommendation.query( PastRecommendation.user_id == user_id, PastRecommendation.time_period_numeric == time_period_numeric, PastRecommendation.committed == True, ).order(-PastRecommendation.session_number, -PastRecommendation.weight).fetch(limit, offset=offset) recommendations = [ DeserializeRecommendation(r.serialized_recommendation) for r in past_recommendations ] return DecorateRecommendations(user_id, recommendations)
def PopularPages(user, time_period, offset, limit): """"Returns a list of popular items. Args: user: The current user. time_period: The time period outside of which votes don't count. offset: The pagination offset. limit: The pagination page size. Returns: A list of popular items. """ # Get all top connections. # We find all ratings that were left within the time_period. # Find the weights of top connections. # The score of an item is sum of: # vote.value * (trust(vote.user) + nominal_weight) is_recent = time_period == time_periods.RECENT if is_recent: now_date = datetime.now() query = PageRating.query(projection=[ 'url', 'user_id', 'rating', 'category', 'date', 'source' ]) if time_period != time_periods.ALL and time_period != time_periods.RECENT: query = query.filter(PageRating.date > now_date - time_periods.Get(time_period)['timedelta']) fetch_limit = 1000 if is_recent: fetch_limit = 200 user_ratings = query.order(-PageRating.date).fetch(fetch_limit) url_to_popular_page = {} nominal_weight = 1 for r in user_ratings: if r.rating == 0: continue if r.url in url_to_popular_page: popular_page = url_to_popular_page[r.url] else: popular_page = PopularPage(url=r.url, score=0, positive_ratings=0, negative_ratings=0, updated_datetime=r.date) url_to_popular_page[r.url] = popular_page if r.rating > 0: popular_page.positive_ratings += 1 if r.rating < 0: popular_page.negative_ratings += 1 if is_recent: popular_page.updated_datetime = min( popular_page.updated_datetime, r.date) else: popular_page.updated_datetime = max( popular_page.updated_datetime, r.date) weight = nominal_weight popular_page.score += r.rating * weight result = [v for v in url_to_popular_page.values() if v.score > 0] for v in result: v.score = _TimeDecayScore(v.score, v.updated_datetime, now_date) result.sort(key=lambda v: (v.score, v.updated_datetime), reverse=True) result = result[offset:offset + limit] else: query = PopularPage.query(PopularPage.time_period == time_period) result = query.order(-PopularPage.score).fetch(limit, offset=offset) urls = set() for p in result: urls.add(p.url) rated_pages_future = GetRatedPagesAsync(user, urls) PopulatePageInfos(result) rated_pages = rated_pages_future.get_result() for p in result: if p.url in rated_pages: rating = rated_pages[p.url] p.rating = rating.rating if rating.category is not None: p.category = rating.category.get() return result
def _GetSinceTime(time_period, now): if time_period == time_periods.ALL: return datetime.min return now - time_periods.Get(time_period)['timedelta']