def _UpdatePastRecommendationItemIdsCacheAsync(user_id,
                                               time_period,
                                               new_item_ids=None):
    client = memcache.Client()
    name = 'prid:' + str(user_id)
    if time_period:
        name += ':' + time_period
    cached = yield client.get_multi_async([name])
    if cached:
        item_ids = cached[name]
    else:
        query = models.PastRecommendation.query(
            models.PastRecommendation.user_id == user_id,
            models.PastRecommendation.committed == True,
            projection=['item_id'])
        if time_period:
            time_period_numeric = time_periods.Get(time_period)['numeric']
            query = query.filter(models.PastRecommendation.time_period_numeric
                                 == time_period_numeric)
        past_recommendations = yield query.order(
            -models.PastRecommendation.date).fetch_async(
                PAST_RECOMMENDATIONS_LIMIT)
        item_ids = [r.item_id for r in past_recommendations]
        # We reverse the list of seen item ids so that the newest items are in the
        # end. We do this because we append new items ids to the end and remove
        # extra items from the back.
        item_ids = list(reversed(item_ids))
    if new_item_ids:
        for item_id in new_item_ids:
            if item_id not in item_ids:
                item_ids.append(item_id)
        item_ids = item_ids[-PAST_RECOMMENDATIONS_LIMIT:]
    if new_item_ids or not cached:
        memcache.set(name, item_ids)
    raise ndb.Return(set(item_ids))
def SavePastRecommendations(user_id, time_period, recommendations):
    time_period_numeric = time_periods.Get(time_period)['numeric']
    user_key = models.UserKey(user_id)
    save_time = datetime.now()
    past_recommendations = [
        models.PastRecommendation(
            key=ndb.Key(models.PastRecommendation,
                        str(time_period_numeric) + ':' + r.destination_url,
                        parent=user_key),
            user_id=user_id,
            item_id=r.item_id or items.UrlToItemId(r.destination_url),
            url=r.destination_url,
            weight=r.weight,
            time_period_numeric=time_period_numeric,
            serialized_recommendation=r.Serialize(),
            committed=False,
            date=save_time,
            index_within_page=i) for i, r in enumerate(recommendations)
    ]
    ndb.put_multi(past_recommendations)
    # Commit newly saved recommendations after 30 minutes of inactivity.
    deferred.defer(
        _CommitPastRecommendations,
        user_id,
        time_period_numeric,
        save_time,
        _countdown=TIME_TO_COMMIT_PAST_RECOMMENDATIONS.total_seconds())
Example #3
0
def MarkUnread(user_id, start_url, time_period):
    """Removes past recommendations from the list to be committed.

  Args:
    user_id: The user.
    start_url: This recommended item and below should not be committed.
    time_period: The time period the user is reading recommendation in.

  Returns:
    A tuple with the number of items affected and whether the visit will not be
    counted.
  """
    time_period_numeric = time_periods.Get(time_period)['numeric']
    most_recently_saved = PastRecommendation.query(
        PastRecommendation.user_id == user_id,
        PastRecommendation.time_period_numeric == time_period_numeric,
        PastRecommendation.committed == False).order(
            -PastRecommendation.date).fetch()
    start_item = None
    for item in most_recently_saved:
        if item.url == start_url:
            start_item = item
    unread_item_keys = [
        item.key for item in most_recently_saved
        if ((not start_item)
            or  # In case the item for start_url is already committed.
            (item.date > start_item.date and item.weight <= start_item.weight
             ) or (item.date == start_item.date
                   and item.index_within_page >= start_item.index_within_page))
    ]
    # If all items were marked as unread then we don't count this visit.
    # For example, when the user opens Recommender just to recommend something.
    visit_discarded = False
    ndb.delete_multi(unread_item_keys)
    return (len(unread_item_keys), visit_discarded)
Example #4
0
def PopularPagesReduce(key, values):
    url, time_period = pickle.loads(key)
    popular_page = models.PopularPage(url=url,
                                      time_period=time_period,
                                      score=0,
                                      positive_ratings=0,
                                      negative_ratings=0)
    half_life_seconds = time_periods.Get(
        time_period)['timedelta'].total_seconds()
    for value in values:
        rating, time_passed = pickle.loads(value)
        if rating < 0:
            popular_page.negative_ratings += 1
        else:
            popular_page.positive_ratings += 1
        popular_page.score += rating * (0.5**(time_passed.total_seconds() /
                                              half_life_seconds))
    existing = models.PopularPage.query(
        models.PopularPage.url == url,
        models.PopularPage.time_period == popular_page.time_period).get()
    # Update the existing entry or create a new one.
    if popular_page.score > 0:
        if existing is not None:
            popular_page.key = existing.key
        popular_page.put()
    # Or delete the existing entry.
    elif existing is not None:
        existing.key.delete()
Example #5
0
def GetPastRecommendations(user_id, time_period, offset, limit):
    time_period_numeric = time_periods.Get(time_period)['numeric']
    past_recommendations = PastRecommendation.query(
        PastRecommendation.user_id == user_id,
        PastRecommendation.time_period_numeric == time_period_numeric,
        PastRecommendation.committed == True,
    ).order(-PastRecommendation.session_number,
            -PastRecommendation.weight).fetch(limit, offset=offset)
    recommendations = [
        DeserializeRecommendation(r.serialized_recommendation)
        for r in past_recommendations
    ]
    return DecorateRecommendations(user_id, recommendations)
Example #6
0
def PopularPages(user, time_period, offset, limit):
    """"Returns a list of popular items.

  Args:
    user: The current user.
    time_period: The time period outside of which votes don't count.
    offset: The pagination offset.
    limit: The pagination page size.

  Returns:
    A list of popular items.
  """
    # Get all top connections.
    # We find all ratings that were left within the time_period.
    # Find the weights of top connections.
    # The score of an item is sum of:
    #   vote.value * (trust(vote.user) + nominal_weight)
    is_recent = time_period == time_periods.RECENT
    if is_recent:
        now_date = datetime.now()
        query = PageRating.query(projection=[
            'url', 'user_id', 'rating', 'category', 'date', 'source'
        ])
        if time_period != time_periods.ALL and time_period != time_periods.RECENT:
            query = query.filter(PageRating.date > now_date -
                                 time_periods.Get(time_period)['timedelta'])
        fetch_limit = 1000
        if is_recent:
            fetch_limit = 200
        user_ratings = query.order(-PageRating.date).fetch(fetch_limit)

        url_to_popular_page = {}
        nominal_weight = 1
        for r in user_ratings:
            if r.rating == 0:
                continue
            if r.url in url_to_popular_page:
                popular_page = url_to_popular_page[r.url]
            else:
                popular_page = PopularPage(url=r.url,
                                           score=0,
                                           positive_ratings=0,
                                           negative_ratings=0,
                                           updated_datetime=r.date)
                url_to_popular_page[r.url] = popular_page
            if r.rating > 0:
                popular_page.positive_ratings += 1
            if r.rating < 0:
                popular_page.negative_ratings += 1
            if is_recent:
                popular_page.updated_datetime = min(
                    popular_page.updated_datetime, r.date)
            else:
                popular_page.updated_datetime = max(
                    popular_page.updated_datetime, r.date)
            weight = nominal_weight
            popular_page.score += r.rating * weight

        result = [v for v in url_to_popular_page.values() if v.score > 0]
        for v in result:
            v.score = _TimeDecayScore(v.score, v.updated_datetime, now_date)
        result.sort(key=lambda v: (v.score, v.updated_datetime), reverse=True)
        result = result[offset:offset + limit]
    else:
        query = PopularPage.query(PopularPage.time_period == time_period)
        result = query.order(-PopularPage.score).fetch(limit, offset=offset)
    urls = set()
    for p in result:
        urls.add(p.url)
    rated_pages_future = GetRatedPagesAsync(user, urls)
    PopulatePageInfos(result)
    rated_pages = rated_pages_future.get_result()
    for p in result:
        if p.url in rated_pages:
            rating = rated_pages[p.url]
            p.rating = rating.rating
            if rating.category is not None:
                p.category = rating.category.get()
    return result
def _GetSinceTime(time_period, now):
  if time_period == time_periods.ALL:
    return datetime.min
  return now - time_periods.Get(time_period)['timedelta']