Exemple #1
0
    def filter_popular(self, queryset, value):
        """
        Recommend content that is popular with all users.

        :param queryset: all content nodes for this channel
        :param value: id of currently logged in user, or none if user is anonymous
        :return: 10 most popular content nodes
        """
        if ContentSessionLog.objects.count() < 50:
            # return 25 random content nodes if not enough session logs
            pks = queryset.values_list('pk', flat=True).exclude(kind=content_kinds.TOPIC)
            # .count scales with table size, so can get slow on larger channels
            count_cache_key = 'content_count_for_{}'.format(get_active_content_database())
            count = cache.get(count_cache_key) or min(pks.count(), 25)
            return queryset.filter(pk__in=sample(list(pks), count))

        cache_key = 'popular_for_{}'.format(get_active_content_database())
        if cache.get(cache_key):
            return cache.get(cache_key)

        # get the most accessed content nodes
        content_counts_sorted = ContentSessionLog.objects \
            .filter(channel_id=get_active_content_database()) \
            .values_list('content_id', flat=True) \
            .annotate(Count('content_id')) \
            .order_by('-content_id__count')

        most_popular = queryset.filter(content_id__in=list(content_counts_sorted[:10]))

        # cache the popular results queryset for 10 minutes, for efficiency
        cache.set(cache_key, most_popular, 60 * 10)
        return most_popular
Exemple #2
0
    def filter_popular(self, queryset, value):
        """
        Recommend content that is popular with all users.

        :param queryset: all content nodes for this channel
        :param value: id of currently logged in user, or none if user is anonymous
        :return: 10 most popular content nodes
        """
        if ContentSessionLog.objects.count() < 50:
            # return 25 random content nodes if not enough session logs
            pks = queryset.values_list('pk', flat=True).exclude(kind__in=['topic', ''])
            count = min(pks.count(), 25)
            return queryset.filter(pk__in=sample(list(pks), count))

        cache_key = 'popular_for_{}'.format(get_active_content_database())
        if cache.get(cache_key):
            return cache.get(cache_key)

        # get the most accessed content nodes
        content_counts_sorted = ContentSessionLog.objects \
            .filter(channel_id=get_active_content_database()) \
            .values_list('content_id', flat=True) \
            .annotate(Count('content_id')) \
            .order_by('-content_id__count')

        most_popular = queryset.filter(content_id__in=list(content_counts_sorted[:10]))

        # cache the popular results queryset for 10 minutes, for efficiency
        cache.set(cache_key, most_popular, 60 * 10)
        return most_popular
Exemple #3
0
    def filter_resume(self, queryset, value):
        """
        Recommend content that the user has recently engaged with, but not finished.

        :param queryset: all content nodes for this channel
        :param value: id of currently logged in user, or none if user is anonymous
        :return: 10 most recently viewed content nodes
        """

        # if user is anonymous, return no nodes
        if not value:
            return queryset.none()

        # get the most recently viewed, but not finished, content nodes
        content_ids = ContentSummaryLog.objects \
            .filter(user=value, channel_id=get_active_content_database()) \
            .exclude(progress=1) \
            .order_by('end_timestamp') \
            .values_list('content_id', flat=True) \
            .distinct()

        # If no logs, don't bother doing the other queries
        if not content_ids:
            return queryset.none()

        resume = queryset.filter(content_id__in=list(content_ids[:10]))

        return resume
Exemple #4
0
 def list(self, request, **kwargs):
     with using_content_database(kwargs['channel_id']):
         file_summary = models.File.objects.aggregate(
             total_files=Count('pk'), total_file_size=Sum('file_size'))
         file_summary['channel_id'] = get_active_content_database()
         # Need to wrap in an array to be fetchable as a Collection on client
         return Response([file_summary])
Exemple #5
0
    def filter_by_content_ids(self, content_ids, content_id_lookup="content_id"):
        """
        Filter a set of logs by content_id, using content_ids from the provided list or queryset.
        """

        if default_database_is_attached():
            # perform the query using an efficient cross-database join, if possible
            return self.using(get_active_content_database()).filter(**{content_id_lookup + "__in": content_ids})
        else:
            # if the databases can't be joined, convert the content_ids into a list and pass in
            return self.filter(**{content_id_lookup + "__in": list(content_ids)})
Exemple #6
0
    def ancestors(self, request, **kwargs):
        cache_key = 'contentnode_ancestors_{db}_{pk}'.format(db=get_active_content_database(), pk=kwargs.get('pk'))

        if cache.get(cache_key) is not None:
            return Response(cache.get(cache_key))

        ancestors = list(self.get_object().get_ancestors().values('pk', 'title'))

        cache.set(cache_key, ancestors, 60 * 10)

        return Response(ancestors)
Exemple #7
0
 def get_queryset(self):
     attempted_mastery_logs = MasteryLog.objects.filter(
         attemptlogs__isnull=False)
     query_node = ContentNode.objects.get(pk=self.kwargs['content_node_id'])
     if self.request.query_params.get('last_active_time'):
         # Last active time specified
         datetime_cutoff = parse(
             self.request.query_params.get('last_active_time'))
     else:
         datetime_cutoff = timezone.now() - datetime.timedelta(7)
     # Set on the kwargs to pass into the serializer
     self.kwargs['last_active_time'] = datetime_cutoff.isoformat()
     if default_database_is_attached(
     ):  # if possible, do a direct join between the content and default databases
         channel_alias = get_active_content_database()
         SummaryLogManager = ContentSummaryLog.objects.using(channel_alias)
     else:
         SummaryLogManager = ContentSummaryLog.objects
     recent_content_items = SummaryLogManager.filter_by_topic(
         query_node).filter(
             Q(progress__gt=0) | Q(masterylogs__in=attempted_mastery_logs),
             user__in=list(
                 get_members_or_user(self.kwargs['collection_kind'],
                                     self.kwargs['collection_id'])),
             end_timestamp__gte=datetime_cutoff).values_list('content_id',
                                                             flat=True)
     if not default_database_is_attached():
         recent_content_items = list(recent_content_items)
     # note from rtibbles:
     # As good as either I or jamalex could come up with to ensure that we only return
     # unique content_id'ed ContentNodes from the coach recent report endpoint.
     # Would have loved to use distinct('content_id'), but unfortunately DISTINCT ON is Postgresql only
     pks_with_unique_content_ids = ContentNode.objects.filter(
         content_id__in=recent_content_items).values('content_id').order_by(
             'lft').annotate(pk=Min('pk')).values_list('pk', flat=True)
     return ContentNode.objects.filter(
         pk__in=pks_with_unique_content_ids).order_by('lft')
Exemple #8
0
    def filter_resume(self, queryset, value):
        """
        Recommend content that the user has recently engaged with, but not finished.

        :param queryset: all content nodes for this channel
        :param value: id of currently logged in user, or none if user is anonymous
        :return: 10 most recently viewed content nodes
        """

        # if user is anonymous, return no nodes
        if not value:
            return queryset.none()

        # get the most recently viewed, but not finished, content nodes
        content_ids = ContentSummaryLog.objects \
            .filter(user=value, channel_id=get_active_content_database()) \
            .exclude(progress=1) \
            .order_by('end_timestamp') \
            .values_list('content_id', flat=True) \
            .distinct()

        resume = queryset.filter(content_id__in=list(content_ids[:10]))

        return resume
Exemple #9
0
def get_progress_and_last_active(target_nodes, **kwargs):
    # Prepare dictionaries to output the progress and last active, keyed by content_id
    output_progress_dict = {}
    output_last_active_dict = {}
    # Get a list of all the users that we are querying
    users = list(get_members_or_user(kwargs['collection_kind'], kwargs['collection_id']))

    # Get a list of all content ids for all target nodes and their descendants
    content_ids = target_nodes.get_descendants(include_self=True).order_by().values_list("content_id", flat=True)
    # get all summary logs for the current user that correspond to the content nodes and descendant content nodes
    if default_database_is_attached():  # if possible, do a direct join between the content and default databases
        channel_alias = get_active_content_database()
        SummaryLogManager = ContentSummaryLog.objects.using(channel_alias)
    else:  # otherwise, convert the leaf queryset into a flat list of ids and use that
        SummaryLogManager = ContentSummaryLog.objects
        content_ids = list(content_ids)
    # Filter by users and the content ids
    progress_query = SummaryLogManager \
        .filter(user__in=users, content_id__in=content_ids)
    # Conditionally filter by last active time
    if kwargs.get('last_active_time'):
        progress_query.filter(end_timestamp__gte=parse(kwargs.get('last_active_time')))
    # Get an annotated list of dicts of type:
    # {
    #   'content_id': <content_id>,
    #   'kind': <kind>,
    #   'total_progress': <sum of all progress for this content>,
    #   'log_count_total': <number of summary logs for this content>,
    #   'log_count_complete': <number of complete summary logs for this content>,
    #   'last_active': <most recent end_timestamp for this content>,
    # }
    progress_list = progress_query.values('content_id', 'kind').annotate(
        total_progress=Sum('progress'),
        log_count_total=Count('pk'),
        log_count_complete=Sum(Case(When(progress=1, then=1), default=0, output_field=IntegerField())),
        last_active=Max('end_timestamp'))
    # Evaluate query and make a loop dict of all progress
    progress_dict = {item.get('content_id'): item for item in progress_list}
    if isinstance(target_nodes, ContentNode):
        # Have been passed an individual model
        target_nodes = [target_nodes]
    # Loop through each node to add progress and last active information to the output dicts
    for target_node in target_nodes:
        # In the case of a topic, we need to look at the progress and last active from each of its descendants
        if target_node.kind == content_kinds.TOPIC:
            # Get all the content_ids and kinds of each leaf node as a tuple
            # (about half the size of the dict from 'values' method)
            # Remove topics in generator comprehension, rather than using .exclude as kind is not indexed
            # Use set to remove repeated content
            leaf_nodes = set(node for node in target_node.get_descendants(include_self=False).order_by().values_list(
                'content_id', 'kind') if node[1] != content_kinds.TOPIC)
            # Get a unique set of all non-topic content kinds
            leaf_kinds = sorted(set(leaf_node[1] for leaf_node in leaf_nodes))
            # Create a list of progress summary dicts for each content kind
            progress = [{
                # For total progress sum across all the progress dicts for the descendant content leaf nodes
                'total_progress': reduce(
                    # Reduce with a function that just adds the total_progress of the passed in dict to the accumulator
                    sum_progress_dicts,
                    # Get all dicts of progress for every leaf_id that has some progress recorded
                    # and matches the kind we are aggregating over
                    (progress_dict.get(leaf_node[0]) for leaf_node in leaf_nodes\
                        if leaf_node[0] in progress_dict and leaf_node[1] == kind),
                    # Pass in an initial value of total_progress as zero to initialize the reduce
                    0.0,
                ),
                'kind': kind,
                # Count the number of leaf nodes of this particular kind
                'node_count': reduce(lambda x, y: x + int(y[1] == kind), leaf_nodes, 0)
            } for kind in leaf_kinds]
            # Set the output progress for this topic to this list of progress dicts
            output_progress_dict[target_node.content_id] = progress
            # Create a generator of last active times for the leaf_ids
            last_active_times = map(
                # Return the last active time for this leaf_node
                lambda leaf_node: progress_dict[leaf_node[0]]['last_active'],
                filter(
                    # Filter leaf_nodes to those that are in the progress_dict
                    lambda leaf_node: leaf_node[0] in progress_dict,
                    leaf_nodes))
            # Max does not handle empty iterables, so try this
            try:
                # If it is not empty, great!
                output_last_active_dict[target_node.content_id] = max(last_active_times)
            except (ValueError, TypeError):
                # If it is empty, catch the value error and set the last active time to None
                # If they are all none, catch the TypeError and also set to None
                output_last_active_dict[target_node.content_id] = None
        else:
            if target_node.content_id in progress_dict:
                progress = progress_dict.pop(target_node.content_id)
                output_last_active_dict[target_node.content_id] = progress.pop('last_active')
                # return as array for consistency in api
                output_progress_dict[target_node.content_id] = [{
                    'total_progress': progress['total_progress'],
                    'log_count_total': progress['log_count_total'],
                    'log_count_complete': progress['log_count_complete'],
                }]
            elif target_node.content_id not in output_progress_dict:
                # Not in the progress dict, but also not in our output, so supply default values
                output_last_active_dict[target_node.content_id] = None
                output_progress_dict[target_node.content_id] = [{
                    'total_progress': 0.0,
                    'log_count_total': 0,
                    'log_count_complete': 0,
                }]
    return output_progress_dict, output_last_active_dict