Beispiel #1
0
 def create_random_video_file(self):
     """
     Helper function for testing video files.
     """
     video_id = get_node_cache("Video").keys()[0]
     youtube_id = get_node_cache("Video")[video_id]["youtube_id"]
     fake_video_file = os.path.join(settings.CONTENT_ROOT, "%s.mp4" % youtube_id)
     with open(fake_video_file, "w") as fh:
         fh.write("")
     self.assertTrue(os.path.exists(fake_video_file), "Make sure the video file was created, youtube_id='%s'." % youtube_id)
     return (fake_video_file, video_id, youtube_id)
Beispiel #2
0
def exercise_dashboard(request):
    slug = request.GET.get("topic")
    if not slug:
        title = _("Your Knowledge Map")
    elif slug in topic_tools.get_node_cache("Topic"):
        title = _(topic_tools.get_node_cache("Topic")[slug][0]["title"])
    else:
        raise Http404

    context = {
        "title": title,
    }
    return context
Beispiel #3
0
 def create_random_video_file(self):
     """
     Helper function for testing video files.
     """
     video_id = get_node_cache("Video").keys()[0]
     youtube_id = get_node_cache("Video")[video_id]["youtube_id"]
     fake_video_file = os.path.join(settings.CONTENT_ROOT,
                                    "%s.mp4" % youtube_id)
     with open(fake_video_file, "w") as fh:
         fh.write("")
     self.assertTrue(
         os.path.exists(fake_video_file),
         "Make sure the video file was created, youtube_id='%s'." %
         youtube_id)
     return (fake_video_file, video_id, youtube_id)
Beispiel #4
0
    def handle(self, *args, **options):
        if not options["lang_code"]:
            raise CommandError("You must specify a language code.")

        lang_code = lcode_to_ietf(options["lang_code"])
        if lang_code not in AVAILABLE_EXERCISE_LANGUAGE_CODES:
            logging.info("No exercises available for language %s" % lang_code)

        else:
            # Get list of exercises
            exercise_ids = options["exercise_ids"].split(
                ",") if options["exercise_ids"] else None
            exercise_ids = exercise_ids or ([
                ex["id"]
                for ex in get_topic_exercises(topic_id=options["topic_id"])
            ] if options["topic_id"] else None)
            exercise_ids = exercise_ids or get_node_cache("Exercise").keys()

            # Download the exercises
            for exercise_id in exercise_ids:
                scrape_exercise(exercise_id=exercise_id,
                                lang_code=lang_code,
                                force=options["force"])

        logging.info("Process complete.")
def update_all_distributed_callback(request):
    """
    """

    if request.method != "POST":
        raise PermissionDenied("Only POST allowed to this URL endpoint.")

    videos = json.loads(request.POST["video_logs"])
    exercises = json.loads(request.POST["exercise_logs"])
    user = FacilityUser.objects.get(id=request.POST["user_id"])
    node_cache = get_node_cache()
    # Save videos
    n_videos_uploaded = 0
    for video in videos:
        video_id = video['video_id']
        youtube_id = video['youtube_id']

        # Only save video logs for videos that we recognize.
        if video_id not in node_cache["Video"]:
            logging.warn("Skipping unknown video %s" % video_id)
            continue

        try:
            (vl, _) = VideoLog.get_or_initialize(user=user, video_id=video_id, youtube_id=youtube_id)
            for key,val in video.iteritems():
                setattr(vl, key, val)
            logging.debug("Saving video log for %s: %s" % (video_id, vl))
            vl.save()
            n_videos_uploaded += 1
        except KeyError:  #
            logging.error("Could not save video log for data with missing values: %s" % video)
        except Exception as e:
            error_message = "Unexpected error importing videos: %s" % e
            return JsonResponseMessageError(error_message)

    # Save exercises
    n_exercises_uploaded = 0
    for exercise in exercises:
        # Only save video logs for videos that we recognize.
        if exercise['exercise_id'] not in node_cache['Exercise']:
            logging.warn("Skipping unknown video %s" % exercise['exercise_id'])
            continue

        try:
            (el, _) = ExerciseLog.get_or_initialize(user=user, exercise_id=exercise["exercise_id"])
            for key,val in exercise.iteritems():
                setattr(el, key, val)
            logging.debug("Saving exercise log for %s: %s" % (exercise['exercise_id'], el))
            el.save()
            n_exercises_uploaded += 1
        except KeyError:
            logging.error("Could not save exercise log for data with missing values: %s" % exercise)
        except Exception as e:
            error_message = "Unexpected error importing exercises: %s" % e
            return JsonResponseMessageError(error_message)

    return JsonResponse({"success": "Uploaded %d exercises and %d videos" % (n_exercises_uploaded, n_videos_uploaded)})
Beispiel #6
0
    def show_cache(self, force=False):
        """Go through each cacheable page, and show which are cached and which are NOT"""

        for node_type in ['Topic', 'Video', 'Exercise']:
            self.stdout.write("Cached %ss:\n" % node_type)
            for narr in topic_tools.get_node_cache(node_type).values():
                for n in narr:
                    if caching.has_cache_key(path=n["path"]):
                        self.stdout.write("\t%s\n" % n["path"])
Beispiel #7
0
    def show_cache(self, force=False):
        """Go through each cacheable page, and show which are cached and which are NOT"""

        for node_type in ['Topic', 'Video', 'Exercise']:
            self.stdout.write("Cached %ss:\n" % node_type)
            for narr in topic_tools.get_node_cache(node_type).values():
                for n in narr:
                    if caching.has_cache_key(path=n["path"]):
                        self.stdout.write("\t%s\n" % n["path"])
    def setUp(self):
        """
        Create a student, log the student in, and go to the exercise page.
        """
        super(StudentExerciseTest, self).setUp()
        self.student = self.create_student(facility_name=self.facility_name)
        self.browser_login_student(self.student_username, self.student_password, facility_name=self.facility_name)

        self.browse_to(self.live_server_url + get_node_cache("Exercise")[self.EXERCISE_SLUG][0]["path"])
        self.browser_check_django_message(num_messages=0)  # make sure no messages
Beispiel #9
0
def search(request, topics):  # we don't use the topics variable, but this setup will refresh the node cache
    # Inputs
    query = request.GET.get('query')
    category = request.GET.get('category')
    max_results_per_category = request.GET.get('max_results', 25)

    # Outputs
    query_error = None
    possible_matches = {}
    hit_max = {}

    if query is None:
        query_error = _("Error: query not specified.")

#    elif len(query) < 3:
#        query_error = _("Error: query too short.")

    else:
        query = query.lower()
        # search for topic, video or exercise with matching title
        nodes = []
        for node_type, node_dict in topic_tools.get_node_cache().iteritems():
            if category and node_type != category:
                # Skip categories that don't match (if specified)
                continue

            possible_matches[node_type] = []  # make dict only for non-skipped categories
            for nodearr in node_dict.values():
                node = nodearr[0]
                title = _(node['title']).lower()  # this could be done once and stored.
                if title == query:
                    # Redirect to an exact match
                    return HttpResponseRedirect(node['path'])

                elif len(possible_matches[node_type]) < max_results_per_category and query in title:
                    # For efficiency, don't do substring matches when we've got lots of results
                    possible_matches[node_type].append(node)

            hit_max[node_type] = len(possible_matches[node_type]) == max_results_per_category

    return {
        'title': _("Search results for '%s'") % (query if query else ""),
        'query_error': query_error,
        'results': possible_matches,
        'hit_max': hit_max,
        'query': query,
        'max_results': max_results_per_category,
        'category': category,
    }
    def handle(self, *args, **options):
        if not options["lang_code"]:
            raise CommandError("You must specify a language code.")


        lang_code = lcode_to_ietf(options["lang_code"])
        if lang_code not in AVAILABLE_EXERCISE_LANGUAGE_CODES:
            logging.info("No exercises available for language %s" % lang_code)

        else:
            # Get list of exercises
            exercise_ids = options["exercise_ids"].split(",") if options["exercise_ids"] else None
            exercise_ids = exercise_ids or ([ex["id"] for ex in get_topic_exercises(topic_id=options["topic_id"])] if options["topic_id"] else None)
            exercise_ids = exercise_ids or get_node_cache("Exercise").keys()

            # Download the exercises
            for exercise_id in exercise_ids:
                scrape_exercise(exercise_id=exercise_id, lang_code=lang_code, force=options["force"])

        logging.info("Process complete.")
Beispiel #11
0
    def _setup(self, num_logs=50, **kwargs):
        super(OneHundredRandomLogUpdates, self)._setup(**kwargs)
        node_cache = get_node_cache()

        try:
            self.user = FacilityUser.objects.get(username=self.username)
        except:
            #take username from ExerciseLog
            all_exercises = ExerciseLog.objects.all()
            self.user = FacilityUser.objects.get(id=all_exercises[0].user_id)
            print self.username, " not in FacilityUsers, using ", self.user
        self.num_logs = num_logs
        #give the platform a chance to cache the logs
        ExerciseLog.objects.filter(user=self.user).delete()
        for x in range(num_logs):
            while True:
                ex_idx = int(self.random.random() *
                             len(node_cache["Exercise"].keys()))
                ex_id = node_cache["Exercise"].keys()[ex_idx]
                if not ExerciseLog.objects.filter(user=self.user,
                                                  exercise_id=ex_id):
                    break
            ex = ExerciseLog(user=self.user, exercise_id=ex_id)
            ex.save()
        self.exercise_list = ExerciseLog.objects.filter(user=self.user)
        self.exercise_count = self.exercise_list.count()

        VideoLog.objects.filter(user=self.user).delete()
        for x in range(num_logs):
            while True:
                vid_idx = int(self.random.random() *
                              len(node_cache["Video"].keys()))
                vid_id = node_cache["Video"].keys()[vid_idx]
                if not VideoLog.objects.filter(user=self.user,
                                               video_id=vid_id):
                    break
            vid = VideoLog(user=self.user, video_id=vid_id)
            vid.save()
        self.video_list = VideoLog.objects.filter(user=self.user)
        self.video_count = self.video_list.count()
Beispiel #12
0
    def handle(self, *args, **options):
        if settings.CENTRAL_SERVER:
            raise CommandError("This must only be run on the distributed server.")

        if not options["lang_code"]:
            raise CommandError("You must specify a language code.")

        #
        ensure_dir(settings.CONTENT_ROOT)

        # Get list of videos
        lang_code = lcode_to_ietf(options["lang_code"])
        video_map = get_dubbed_video_map(lang_code) or {}
        video_ids = options["video_ids"].split(",") if options["video_ids"] else None
        video_ids = video_ids or ([vid["id"] for vid in get_topic_videos(topic_id=options["topic_id"])] if options["topic_id"] else None)
        video_ids = video_ids or video_map.keys()

        # Download the videos
        for video_id in video_ids:
            if video_id in video_map:
                youtube_id = video_map[video_id]

            elif video_id in video_map.values():
                # Perhaps they sent in a youtube ID?  We can handle that!
                youtube_id = video_id
            else:
                logging.error("No mapping for video_id=%s; skipping" % video_id)
                continue

            try:
                scrape_video(youtube_id=youtube_id, format=options["format"], force=options["force"])
                #scrape_thumbnail(youtube_id=youtube_id)
                logging.info("Access video %s at %s" % (youtube_id, get_node_cache("Video")[video_id][0]["path"]))
            except Exception as e:
                logging.error("Failed to download video %s: %s" % (youtube_id, e))

        logging.info("Process complete.")
    def _setup(self, num_logs=50, **kwargs):
        super(OneHundredRandomLogUpdates, self)._setup(**kwargs)
        node_cache = get_node_cache()

        try:
            self.user = FacilityUser.objects.get(username=self.username)
        except:
            #take username from ExerciseLog
            all_exercises = ExerciseLog.objects.all()
            self.user = FacilityUser.objects.get(id=all_exercises[0].user_id)
            print self.username, " not in FacilityUsers, using ", self.user
        self.num_logs = num_logs
        #give the platform a chance to cache the logs
        ExerciseLog.objects.filter(user=self.user).delete()
        for x in range(num_logs):
            while True:
                ex_idx = int(self.random.random() * len(node_cache["Exercise"].keys()))
                ex_id = node_cache["Exercise"].keys()[ex_idx]
                if not ExerciseLog.objects.filter(user=self.user, exercise_id=ex_id):
                    break
            ex = ExerciseLog(user=self.user, exercise_id=ex_id)
            ex.save()
        self.exercise_list = ExerciseLog.objects.filter(user=self.user)
        self.exercise_count = self.exercise_list.count()

        VideoLog.objects.filter(user=self.user).delete()
        for x in range(num_logs):
            while True:
                vid_idx = int(self.random.random() * len(node_cache["Video"].keys()))
                vid_id = node_cache["Video"].keys()[vid_idx]
                if not VideoLog.objects.filter(user=self.user, video_id=vid_id):
                    break
            vid = VideoLog(user=self.user, video_id=vid_id)
            vid.save()
        self.video_list = VideoLog.objects.filter(user=self.user)
        self.video_count = self.video_list.count()
Beispiel #14
0
def _get_user_usage_data(users, period_start=None, period_end=None):
    """
    Returns facility user data, within the given date range.
    """

    # compute period start and end
    # Now compute stats, based on queried data
    num_exercises = len(get_node_cache('Exercise'))
    user_data = OrderedDict()
    group_data = OrderedDict()


    # Make queries efficiently
    exercise_logs = ExerciseLog.objects.filter(user__in=users, complete=True)
    video_logs = VideoLog.objects.filter(user__in=users)
    login_logs = UserLogSummary.objects.filter(user__in=users)

    # filter results
    if period_start:
        exercise_logs = exercise_logs.filter(completion_timestamp__gte=period_start)
        video_logs = video_logs.filter(completion_timestamp__gte=period_start)
        login_logs = login_logs.filter(start_datetime__gte=period_start)
    if period_end:
        exercise_logs = exercise_logs.filter(completion_timestamp__lte=period_end)
        video_logs = video_logs.filter(completion_timestamp__lte=period_end)
        login_logs = login_logs.filter(end_datetime__lte=period_end)

    # Force results in a single query
    exercise_logs = list(exercise_logs.values("exercise_id", "user__pk"))
    video_logs = list(video_logs.values("video_id", "user__pk"))
    login_logs = list(login_logs.values("activity_type", "total_seconds", "user__pk"))

    for user in users:
        user_data[user.pk] = OrderedDict()
        user_data[user.pk]["first_name"] = user.first_name
        user_data[user.pk]["last_name"] = user.last_name
        user_data[user.pk]["username"] = user.username
        user_data[user.pk]["group"] = user.group


        user_data[user.pk]["total_report_views"] = 0#report_stats["count__sum"] or 0
        user_data[user.pk]["total_logins"] =0# login_stats["count__sum"] or 0
        user_data[user.pk]["total_hours"] = 0#login_stats["total_seconds__sum"] or 0)/3600.

        user_data[user.pk]["total_exercises"] = 0
        user_data[user.pk]["pct_mastery"] = 0.
        user_data[user.pk]["exercises_mastered"] = []

        user_data[user.pk]["total_videos"] = 0
        user_data[user.pk]["videos_watched"] = []


    for elog in exercise_logs:
        user_data[elog["user__pk"]]["total_exercises"] += 1
        user_data[elog["user__pk"]]["pct_mastery"] += 1. / num_exercises
        user_data[elog["user__pk"]]["exercises_mastered"].append(elog["exercise_id"])

    for vlog in video_logs:
        user_data[vlog["user__pk"]]["total_videos"] += 1
        user_data[vlog["user__pk"]]["videos_watched"].append(vlog["video_id"])

    for llog in login_logs:
        if llog["activity_type"] == UserLog.get_activity_int("coachreport"):
            user_data[llog["user__pk"]]["total_report_views"] += 1
        elif llog["activity_type"] == UserLog.get_activity_int("login"):
            user_data[llog["user__pk"]]["total_hours"] += (llog["total_seconds"]) / 3600.
            user_data[llog["user__pk"]]["total_logins"] += 1

    # Add group data.  Allow a fake group "Ungrouped"
    for user in users:
        group_pk = getattr(user.group, "pk", None)
        group_name = getattr(user.group, "name", _("Ungrouped"))
        if not group_pk in group_data:
            group_data[group_pk] = {
                "name": group_name,
                "total_logins": 0,
                "total_hours": 0,
                "total_users": 0,
                "total_videos": 0,
                "total_exercises": 0,
                "pct_mastery": 0,
            }
        group_data[group_pk]["total_users"] += 1
        group_data[group_pk]["total_logins"] += user_data[user.pk]["total_logins"]
        group_data[group_pk]["total_hours"] += user_data[user.pk]["total_hours"]
        group_data[group_pk]["total_videos"] += user_data[user.pk]["total_videos"]
        group_data[group_pk]["total_exercises"] += user_data[user.pk]["total_exercises"]

        total_mastery_so_far = (group_data[group_pk]["pct_mastery"] * (group_data[group_pk]["total_users"] - 1) + user_data[user.pk]["pct_mastery"])
        group_data[group_pk]["pct_mastery"] =  total_mastery_so_far / group_data[group_pk]["total_users"]

    return (user_data, group_data)
def update_all_central_callback(request):
    """
    Callback after authentication.

    Parses out the request token verification.
    Then finishes the request by getting an auth token.
    """
    if not "ACCESS_TOKEN" in request.session:
        finish_auth(request)

    exercises = get_api_resource(request, "/api/v1/user/exercises")
    videos = get_api_resource(request, "/api/v1/user/videos")
    node_cache = get_node_cache()

    # Collate videos
    video_logs = []
    for video in videos:
        # Assume that KA videos are all english-language, not dubbed (for now)
        video_id = youtube_id = video.get('video', {}).get('youtube_id', "")

        # Only save videos with progress
        if not video.get('seconds_watched', None):
            continue

        # Only save video logs for videos that we recognize.
        if video_id not in node_cache["Video"]:
            logging.warn("Skipping unknown video %s" % video_id)
            continue

        try:
            video_logs.append({
                "video_id": video_id,
                "youtube_id": youtube_id,
                "total_seconds_watched": video['seconds_watched'],
                "points": VideoLog.calc_points(video['seconds_watched'], video['duration']),
                "complete": video['completed'],
                "completion_timestamp": convert_ka_date(video['last_watched']) if video['completed'] else None,
            })
            logging.debug("Got video log for %s: %s" % (video_id, video_logs[-1]))
        except KeyError:  #
            logging.error("Could not save video log for data with missing values: %s" % video)

    # Collate exercises
    exercise_logs = []
    for exercise in exercises:
        # Only save exercises that have any progress.
        if not exercise.get('last_done', None):
            continue

        # Only save video logs for videos that we recognize.
        slug = exercise.get('exercise', "")
        if slug not in node_cache['Exercise']:
            logging.warn("Skipping unknown video %s" % slug)
            continue

        try:
            completed = exercise['streak'] >= 10
            basepoints = node_cache['Exercise'][slug][0]['basepoints']
            exercise_logs.append({
                "exercise_id": slug,
                "streak_progress": min(100, 100 * exercise['streak']/10),  # duplicates logic elsewhere
                "attempts": exercise['total_done'],
                "points": ExerciseLog.calc_points(basepoints, ncorrect=exercise['streak'], add_randomness=False),  # no randomness when importing from KA
                "complete": completed,
                "attempts_before_completion": exercise['total_done'] if not exercise['practiced'] else None,  #can't figure this out if they practiced after mastery.
                "completion_timestamp": convert_ka_date(exercise['proficient_date']) if completed else None,
            })
            logging.debug("Got exercise log for %s: %s" % (slug, exercise_logs[-1]))
        except KeyError:
            logging.error("Could not save exercise log for data with missing values: %s" % exercise)

    # POST the data back to the distributed server
    try:

        dthandler = lambda obj: obj.isoformat() if isinstance(obj, datetime.datetime) else None
        logging.debug("POST'ing to %s" % request.session["distributed_callback_url"])
        response = requests.post(
            request.session["distributed_callback_url"],
            cookies={ "csrftoken": request.session["distributed_csrf_token"] },
            data = {
                "csrfmiddlewaretoken": request.session["distributed_csrf_token"],
                "video_logs": json.dumps(video_logs, default=dthandler),
                "exercise_logs": json.dumps(exercise_logs, default=dthandler),
                "user_id": request.session["distributed_user_id"],
            }
        )
        logging.debug("Response (%d): %s" % (response.status_code, response.content))
    except requests.exceptions.ConnectionError as e:
        return HttpResponseRedirect(set_query_params(request.session["distributed_redirect_url"], {
            "message_type": "error",
            "message": _("Could not connect to your KA Lite installation to share Khan Academy data."),
            "message_id": "id_khanload",
        }))
    except Exception as e:
        return HttpResponseRedirect(set_query_params(request.session["distributed_redirect_url"], {
            "message_type": "error",
            "message": _("Failure to send data to your KA Lite installation: %s") % e,
            "message_id": "id_khanload",
        }))


    try:
        json_response = json.loads(response.content)
        if not isinstance(json_response, dict) or len(json_response) != 1:
            # Could not validate the message is a single key-value pair
            raise Exception(_("Unexpected response format from your KA Lite installation."))
        message_type = json_response.keys()[0]
        message = json_response.values()[0]
    except ValueError as e:
        message_type = "error"
        message = unicode(e)
    except Exception as e:
        message_type = "error"
        message = _("Loading json object: %s") % e

    # If something broke on the distribute d server, we are SCREWED.
    #   For now, just show the error to users.
    #
    # Ultimately, we have a message, would like to share with the distributed server.
#    if response.status_code != 200:
#        return HttpResponseServerError(response.content)

    return HttpResponseRedirect(set_query_params(request.session["distributed_redirect_url"], {
        "message_type": message_type,
        "message": message,
        "message_id": "id_khanload",
    }))
Beispiel #16
0
def student_view_context(request, xaxis="pct_mastery", yaxis="ex:attempts"):
    """
    Context done separately, to be importable for similar pages.
    """
    user = get_user_from_request(request=request)
    if not user:
        raise Http404("User not found.")

    node_cache = get_node_cache()
    topic_ids = get_knowledgemap_topics()
    topic_ids += [ch["id"] for node in get_topic_tree()["children"] for ch in node["children"] if node["id"] != "math"]
    topics = [node_cache["Topic"][id][0] for id in topic_ids]

    user_id = user.id
    exercise_logs = list(ExerciseLog.objects \
        .filter(user=user) \
        .values("exercise_id", "complete", "points", "attempts", "streak_progress", "struggling", "completion_timestamp"))
    video_logs = list(VideoLog.objects \
        .filter(user=user) \
        .values("video_id", "complete", "total_seconds_watched", "points", "completion_timestamp"))

    exercise_sparklines = dict()
    stats = dict()
    topic_exercises = dict()
    topic_videos = dict()
    exercises_by_topic = dict()
    videos_by_topic = dict()

    # Categorize every exercise log into a "midlevel" exercise
    for elog in exercise_logs:
        if not elog["exercise_id"] in node_cache["Exercise"]:
            # Sometimes KA updates their topic tree and eliminates exercises;
            #   we also want to support 3rd party switching of trees arbitrarily.
            logging.debug("Skip unknown exercise log for %s/%s" % (user_id, elog["exercise_id"]))
            continue

        parent_ids = [topic for ex in node_cache["Exercise"][elog["exercise_id"]] for topic in ex["ancestor_ids"]]
        topic = set(parent_ids).intersection(set(topic_ids))
        if not topic:
            logging.error("Could not find a topic for exercise %s (parents=%s)" % (elog["exercise_id"], parent_ids))
            continue
        topic = topic.pop()
        if not topic in topic_exercises:
            topic_exercises[topic] = get_topic_exercises(path=node_cache["Topic"][topic][0]["path"])
        exercises_by_topic[topic] = exercises_by_topic.get(topic, []) + [elog]

    # Categorize every video log into a "midlevel" exercise.
    for vlog in video_logs:
        if not vlog["video_id"] in node_cache["Video"]:
            # Sometimes KA updates their topic tree and eliminates videos;
            #   we also want to support 3rd party switching of trees arbitrarily.
            logging.debug("Skip unknown video log for %s/%s" % (user_id, vlog["video_id"]))
            continue

        parent_ids = [topic for vid in node_cache["Video"][vlog["video_id"]] for topic in vid["ancestor_ids"]]
        topic = set(parent_ids).intersection(set(topic_ids))
        if not topic:
            logging.error("Could not find a topic for video %s (parents=%s)" % (vlog["video_id"], parent_ids))
            continue
        topic = topic.pop()
        if not topic in topic_videos:
            topic_videos[topic] = get_topic_videos(path=node_cache["Topic"][topic][0]["path"])
        videos_by_topic[topic] = videos_by_topic.get(topic, []) + [vlog]


    # Now compute stats
    for id in topic_ids:#set(topic_exercises.keys()).union(set(topic_videos.keys())):
        n_exercises = len(topic_exercises.get(id, []))
        n_videos = len(topic_videos.get(id, []))

        exercises = exercises_by_topic.get(id, [])
        videos = videos_by_topic.get(id, [])
        n_exercises_touched = len(exercises)
        n_videos_touched = len(videos)

        exercise_sparklines[id] = [el["completion_timestamp"] for el in filter(lambda n: n["complete"], exercises)]

        # total streak currently a pct, but expressed in max 100; convert to
        # proportion (like other percentages here)
        stats[id] = {
            "ex:pct_mastery":      0 if not n_exercises_touched else sum([el["complete"] for el in exercises]) / float(n_exercises),
            "ex:pct_started":      0 if not n_exercises_touched else n_exercises_touched / float(n_exercises),
            "ex:average_points":   0 if not n_exercises_touched else sum([el["points"] for el in exercises]) / float(n_exercises_touched),
            "ex:average_attempts": 0 if not n_exercises_touched else sum([el["attempts"] for el in exercises]) / float(n_exercises_touched),
            "ex:average_streak":   0 if not n_exercises_touched else sum([el["streak_progress"] for el in exercises]) / float(n_exercises_touched) / 100.,
            "ex:total_struggling": 0 if not n_exercises_touched else sum([el["struggling"] for el in exercises]),
            "ex:last_completed": None if not n_exercises_touched else max_none([el["completion_timestamp"] or None for el in exercises]),

            "vid:pct_started":      0 if not n_videos_touched else n_videos_touched / float(n_videos),
            "vid:pct_completed":    0 if not n_videos_touched else sum([vl["complete"] for vl in videos]) / float(n_videos),
            "vid:total_minutes":      0 if not n_videos_touched else sum([vl["total_seconds_watched"] for vl in videos]) / 60.,
            "vid:average_points":   0. if not n_videos_touched else float(sum([vl["points"] for vl in videos]) / float(n_videos_touched)),
            "vid:last_completed": None if not n_videos_touched else max_none([vl["completion_timestamp"] or None for vl in videos]),
        }

    context = plotting_metadata_context(request)

    return {
        "form": context["form"],
        "groups": context["groups"],
        "facilities": context["facilities"],
        "student": user,
        "topics": topics,
        "exercises": topic_exercises,
        "exercise_logs": exercises_by_topic,
        "video_logs": videos_by_topic,
        "exercise_sparklines": exercise_sparklines,
        "no_data": not exercise_logs and not video_logs,
        "stats": stats,
        "stat_defs": [  # this order determines the order of display
            {"key": "ex:pct_mastery",      "title": _("% Mastery"),        "type": "pct"},
            {"key": "ex:pct_started",      "title": _("% Started"),        "type": "pct"},
            {"key": "ex:average_points",   "title": _("Average Points"),   "type": "float"},
            {"key": "ex:average_attempts", "title": _("Average Attempts"), "type": "float"},
            {"key": "ex:average_streak",   "title": _("Average Streak"),   "type": "pct"},
            {"key": "ex:total_struggling", "title": _("Struggling"),       "type": "int"},
            {"key": "ex:last_completed",   "title": _("Last Completed"),   "type": "date"},
            {"key": "vid:pct_completed",   "title": _("% Completed"),      "type": "pct"},
            {"key": "vid:pct_started",     "title": _("% Started"),        "type": "pct"},
            {"key": "vid:total_minutes",   "title": _("Average Minutes Watched"),"type": "float"},
            {"key": "vid:average_points",  "title": _("Average Points"),   "type": "float"},
            {"key": "vid:last_completed",  "title": _("Last Completed"),   "type": "date"},
        ]
    }
Beispiel #17
0
def tabular_view(request, facility, report_type="exercise"):
    """Tabular view also gets data server-side."""
    # Define how students are ordered--used to be as efficient as possible.
    student_ordering = ["last_name", "first_name", "username"]

    # Get a list of topics (sorted) and groups
    topics = [get_node_cache("Topic").get(tid) for tid in get_knowledgemap_topics()]
    (groups, facilities) = get_accessible_objects_from_logged_in_user(request, facility=facility)
    context = plotting_metadata_context(request, facility=facility)
    context.update({
        # For translators: the following two translations are nouns
        "report_types": (_("exercise"), _("video")),
        "request_report_type": report_type,
        "topics": [{"id": t[0]["id"], "title": t[0]["title"]} for t in topics if t],
    })

    # get querystring info
    topic_id = request.GET.get("topic", "")
    # No valid data; just show generic
    if not topic_id or not re.match("^[\w\-]+$", topic_id):
        return context

    group_id = request.GET.get("group", "")
    if group_id:
        # Narrow by group
        users = FacilityUser.objects.filter(
            group=group_id, is_teacher=False).order_by(*student_ordering)

    elif facility:
        # Narrow by facility
        search_groups = [groups_dict["groups"] for groups_dict in groups if groups_dict["facility"] == facility.id]
        assert len(search_groups) <= 1, "Should only have one or zero matches."

        # Return groups and ungrouped
        search_groups = search_groups[0]  # make sure to include ungrouped students
        users = FacilityUser.objects.filter(
            Q(group__in=search_groups) | Q(group=None, facility=facility), is_teacher=False).order_by(*student_ordering)

    else:
        # Show all (including ungrouped)
        for groups_dict in groups:
            search_groups += groups_dict["groups"]
        users = FacilityUser.objects.filter(
            Q(group__in=search_groups) | Q(group=None), is_teacher=False).order_by(*student_ordering)

    # We have enough data to render over a group of students
    # Get type-specific information
    if report_type == "exercise":
        # Fill in exercises
        exercises = get_topic_exercises(topic_id=topic_id)
        exercises = sorted(exercises, key=lambda e: (e["h_position"], e["v_position"]))
        context["exercises"] = exercises

        # More code, but much faster
        exercise_names = [ex["name"] for ex in context["exercises"]]
        # Get students
        context["students"] = []
        exlogs = ExerciseLog.objects \
            .filter(user__in=users, exercise_id__in=exercise_names) \
            .order_by(*["user__%s" % field for field in student_ordering]) \
            .values("user__id", "struggling", "complete", "exercise_id")
        exlogs = list(exlogs)  # force the query to be evaluated

        exlog_idx = 0
        for user in users:
            log_table = {}
            while exlog_idx < len(exlogs) and exlogs[exlog_idx]["user__id"] == user.id:
                log_table[exlogs[exlog_idx]["exercise_id"]] = exlogs[exlog_idx]
                exlog_idx += 1

            context["students"].append({  # this could be DRYer
                "first_name": user.first_name,
                "last_name": user.last_name,
                "username": user.username,
                "name": user.get_name(),
                "id": user.id,
                "exercise_logs": log_table,
            })

    elif report_type == "video":
        # Fill in videos
        context["videos"] = get_topic_videos(topic_id=topic_id)

        # More code, but much faster
        video_ids = [vid["id"] for vid in context["videos"]]
        # Get students
        context["students"] = []
        vidlogs = VideoLog.objects \
            .filter(user__in=users, video_id__in=video_ids) \
            .order_by(*["user__%s" % field for field in student_ordering])\
            .values("user__id", "complete", "video_id", "total_seconds_watched", "points")
        vidlogs = list(vidlogs)  # force the query to be executed now

        vidlog_idx = 0
        for user in users:
            log_table = {}
            while vidlog_idx < len(vidlogs) and vidlogs[vidlog_idx]["user__id"] == user.id:
                log_table[vidlogs[vidlog_idx]["video_id"]] = vidlogs[vidlog_idx]
                vidlog_idx += 1

            context["students"].append({  # this could be DRYer
                "first_name": user.first_name,
                "last_name": user.last_name,
                "username": user.username,
                "name": user.get_name(),
                "id": user.id,
                "video_logs": log_table,
            })

    else:
        raise Http404(_("Unknown report_type: %(report_type)s") % {"report_type": report_type})

    if "facility_user" in request.session:
        try:
            # Log a "begin" and end here
            user = request.session["facility_user"]
            UserLog.begin_user_activity(user, activity_type="coachreport")
            UserLog.update_user_activity(user, activity_type="login")  # to track active login time for teachers
            UserLog.end_user_activity(user, activity_type="coachreport")
        except ValidationError as e:
            # Never report this error; don't want this logging to block other functionality.
            logging.error("Failed to update Teacher userlog activity login: %s" % e)

    return context
def generate_dubbed_video_mappings(download_url=None, csv_data=None):
    """
    Function to do the heavy lifting in getting the dubbed videos map.

    Could be moved into utils
    """
    if not download_url:
        download_url = SPREADSHEET_BASE_URL
        params = {'key': SPREADSHEET_ID, 'gid': SPREADSHEET_GID, 'output': SPREADSHEET_EXPORT_FORMAT}
    else:
        params = {}

    if not csv_data:
        logging.info("Downloading dubbed video data from %s" % download_url)
        response = requests.get(download_url, params=params)
        if response.status_code != 200:
            raise CommandError("Failed to download dubbed video CSV data: status=%s" % response.status)
        csv_data = response.content

    # This CSV file is in standard format: separated by ",", quoted by '"'
    logging.info("Parsing csv file.")
    reader = csv.reader(StringIO(csv_data))

    # Build a two-level video map.
    #   First key: language name
    #   Second key: english youtube ID
    #   Value: corresponding youtube ID in the new language.
    video_map = {}

    row_num = -1
    try:
        # Loop through each row in the spreadsheet.
        while (True):
            row_num += 1
            row = reader.next()


            if row_num < 4:
                # Rows 1-4 are crap.
                continue

            elif row_num == 4:
                # Row 5 is the header row.
                header_row = [v.lower() for v in row]  # lcase all header row values (including language names)
                slug_idx = header_row.index("titled id")
                english_idx = header_row.index("english")
                assert slug_idx != -1, "Video slug column header should be found."
                assert english_idx != -1, "English video column header should be found."

            else:
                # Rows 6 and beyond are data.
                assert len(row) == len(header_row), "Values line length equals headers line length"

                # Grab the slug and english video ID.
                video_slug = row[slug_idx]
                english_video_id = row[english_idx]
                assert english_video_id, "English Video ID should not be empty"
                assert video_slug, "Slug should not be empty"

                # English video is the first video ID column,
                #   and following columns (until the end) are other languages.
                # Loop through those columns and, if a video exists,
                #   add it to the dictionary.
                for idx in range(english_idx, len(row)):
                    if not row[idx]:  # make sure there's a dubbed video
                        continue

                    lang = header_row[idx]
                    if lang not in video_map:  # add the first level if it doesn't exist
                        video_map[lang] = {}
                    dubbed_youtube_id = row[idx]
                    if english_video_id == dubbed_youtube_id and lang != "english":
                        logging.error("Removing entry for (%s, %s): dubbed and english youtube ID are the same." % (lang, english_video_id))
                    #elif dubbed_youtube_id in video_map[lang].values():
                        # Talked to Bilal, and this is actually supposed to be OK.  Would throw us for a loop!
                        #    For now, just keep one.
                        #for key in video_map[lang].keys():
                        #    if video_map[lang][key] == dubbed_youtube_id:
                        #        del video_map[lang][key]
                        #        break
                        #logging.error("Removing entry for (%s, %s): the same dubbed video ID is used in two places, and we can only keep one in our current system." % (lang, english_video_id))
                    else:
                        video_map[lang][english_video_id] = row[idx]  # add the corresponding video id for the video, in this language.

    except StopIteration:
        # The loop ends when the CSV file hits the end and throws a StopIteration
        pass

    # Now, validate the mappings with our topic data
    known_videos = get_node_cache("Video").keys()
    missing_videos = set(known_videos) - set(video_map["english"].keys())
    extra_videos = set(video_map["english"].keys()) - set(known_videos)
    if missing_videos:
        logging.warn("There are %d known videos not in the list of dubbed videos" % len(missing_videos))
        logging.warn("Adding missing English videos to English dubbed video map")
        for video in missing_videos:
            video_map["english"][video] = video
    if extra_videos:
        logging.warn("There are %d videos in the list of dubbed videos that we have never heard of." % len(extra_videos))

    return (video_map, csv_data)
Beispiel #19
0
 def create_cache(self, force=False):
     for node_type in ['Topic', 'Video', 'Exercise']:
         self.stdout.write("Caching %ss:\n" % node_type)
         for narr in topic_tools.get_node_cache(node_type).values():
             for n in narr:
                 self.create_page_cache(path=n["path"], force=force)
Beispiel #20
0
def compute_data(data_types, who, where):
    """
    Compute the data in "data_types" for each user in "who", for the topics selected by "where"

    who: list of users
    where: topic_path
    data_types can include:
        pct_mastery
        effort
        attempts
    """

    # None indicates that the data hasn't been queried yet.
    #   We'll query it on demand, for efficiency
    topics = None
    exercises = None
    videos = None

    # Initialize an empty dictionary of data, video logs, exercise logs, for each user
    data = OrderedDict(zip([w.id for w in who], [dict() for i in range(len(who))]))  # maintain the order of the users
    vid_logs = dict(zip([w.id for w in who], [[] for i in range(len(who))]))
    ex_logs = dict(zip([w.id for w in who], [[] for i in range(len(who))]))
    if UserLog.is_enabled():
        activity_logs = dict(zip([w.id for w in who], [[] for i in range(len(who))]))

    # Set up queries (but don't run them), so we have really easy aliases.
    #   Only do them if they haven't been done yet (tell this by passing in a value to the lambda function)
    # Topics: topics.
    # Exercises: names (ids for ExerciseLog objects)
    # Videos: video_id (ids for VideoLog objects)

    # This lambda partial creates a function to return all items with a particular path from the NODE_CACHE.
    search_fun_single_path = partial(lambda t, p: t["path"].startswith(p), p=tuple(where))
    # This lambda partial creates a function to return all items with paths matching a list of paths from NODE_CACHE.
    search_fun_multi_path = partial(lambda ts, p: any([t["path"].startswith(p) for t in ts]),  p=tuple(where))
    # Functions that use the functions defined above to return topics, exercises, and videos based on paths.
    query_topics = partial(lambda t, sf: t if t is not None else [t[0]["id"] for t in filter(sf, get_node_cache('Topic').values())], sf=search_fun_single_path)
    query_exercises = partial(lambda e, sf: e if e is not None else [ex[0]["id"] for ex in filter(sf, get_node_cache('Exercise').values())], sf=search_fun_multi_path)
    query_videos = partial(lambda v, sf: v if v is not None else [vid[0]["id"] for vid in filter(sf, get_node_cache('Video').values())], sf=search_fun_multi_path)

    # No users, don't bother.
    if len(who) > 0:

        # Query out all exercises, videos, exercise logs, and video logs before looping to limit requests.
        # This means we could pull data for n-dimensional coach report displays with the same number of requests!
        # Note: User activity is polled inside the loop, to prevent possible slowdown for exercise and video reports.
        exercises = query_exercises(exercises)

        videos = query_videos(videos)

        if exercises:
            ex_logs = query_logs(data.keys(), exercises, "exercise", ex_logs)

        if videos:
            vid_logs = query_logs(data.keys(), videos, "video", vid_logs)

        for data_type in (data_types if not hasattr(data_types, "lower") else [data_types]):  # convert list from string, if necessary
            if data_type in data[data.keys()[0]]:  # if the first user has it, then all do; no need to calc again.
                continue

            #
            # These are summary stats: you only get one per user
            #
            if data_type == "pct_mastery":

                # Efficient query out, spread out to dict
                for user in data.keys():
                    data[user][data_type] = 0 if not ex_logs[user] else 100. * sum([el['complete'] for el in ex_logs[user]]) / float(len(exercises))

            elif data_type == "effort":
                if "ex:attempts" in data[data.keys()[0]] and "vid:total_seconds_watched" in data[data.keys()[0]]:
                    # exercises and videos would be initialized already
                    for user in data.keys():
                        avg_attempts = 0 if len(exercises) == 0 else sum(data[user]["ex:attempts"].values()) / float(len(exercises))
                        avg_video_points = 0 if len(videos) == 0 else sum(data[user]["vid:total_seconds_watched"].values()) / float(len(videos))
                        data[user][data_type] = 100. * (0.5 * avg_attempts / 10. + 0.5 * avg_video_points / 750.)
                else:
                    data_types += ["ex:attempts", "vid:total_seconds_watched", "effort"]

            #
            # These are detail stats: you get many per user
            #
            # Just querying out data directly: Video
            elif data_type.startswith("vid:") and data_type[4:] in [f.name for f in VideoLog._meta.fields]:

                for user in data.keys():
                    data[user][data_type] = OrderedDict([(v['video_id'], v[data_type[4:]]) for v in vid_logs[user]])

            # Just querying out data directly: Exercise
            elif data_type.startswith("ex:") and data_type[3:] in [f.name for f in ExerciseLog._meta.fields]:

                for user in data.keys():
                    data[user][data_type] = OrderedDict([(el['exercise_id'], el[data_type[3:]]) for el in ex_logs[user]])

            # User Log Queries
            elif data_type.startswith("user:"******"", "activity", activity_logs)

                for user in data.keys():
                    data[user][data_type] = [log[data_type[5:]] for log in activity_logs[user]]

            # User Summary Queries
            elif data_type.startswith("usersum:") and data_type[8:] in [f.name for f in UserLogSummary._meta.fields] and UserLog.is_enabled():

                activity_logs = query_logs(data.keys(), "", "summaryactivity", activity_logs)

                for user in data.keys():
                    data[user][data_type] = sum([log[data_type[8:]] for log in activity_logs[user]])
            # Unknown requested quantity
            else:
                raise Exception("Unknown type: '%s' not in %s" % (data_type, str([f.name for f in ExerciseLog._meta.fields])))

    # Returning empty list instead of None allows javascript on client
    # side to read 'length' property without error.
    exercises = exercises or []

    videos = videos or []

    return {
        "data": data,
        "topics": topics,
        "exercises": exercises,
        "videos": videos,
    }
Beispiel #21
0
def tabular_view(request, facility, report_type="exercise"):
    """Tabular view also gets data server-side."""
    # Define how students are ordered--used to be as efficient as possible.
    student_ordering = ["last_name", "first_name", "username"]

    # Get a list of topics (sorted) and groups
    topics = [get_node_cache("Topic").get(tid) for tid in get_knowledgemap_topics()]
    (groups, facilities) = get_accessible_objects_from_logged_in_user(request, facility=facility)
    context = plotting_metadata_context(request, facility=facility)
    context.update({
        # For translators: the following two translations are nouns
        "report_types": (_("exercise"), _("video")),
        "request_report_type": report_type,
        "topics": [{"id": t[0]["id"], "title": t[0]["title"]} for t in topics if t],
    })

    # get querystring info
    topic_id = request.GET.get("topic", "")
    # No valid data; just show generic
    if not topic_id or not re.match("^[\w\-]+$", topic_id):
        return context

    group_id = request.GET.get("group", "")
    if group_id:
        # Narrow by group
        users = FacilityUser.objects.filter(
            group=group_id, is_teacher=False).order_by(*student_ordering)

    elif facility:
        # Narrow by facility
        search_groups = [groups_dict["groups"] for groups_dict in groups if groups_dict["facility"] == facility.id]
        assert len(search_groups) <= 1, "Should only have one or zero matches."

        # Return groups and ungrouped
        search_groups = search_groups[0]  # make sure to include ungrouped students
        users = FacilityUser.objects.filter(
            Q(group__in=search_groups) | Q(group=None, facility=facility), is_teacher=False).order_by(*student_ordering)

    else:
        # Show all (including ungrouped)
        for groups_dict in groups:
            search_groups += groups_dict["groups"]
        users = FacilityUser.objects.filter(
            Q(group__in=search_groups) | Q(group=None), is_teacher=False).order_by(*student_ordering)

    # We have enough data to render over a group of students
    # Get type-specific information
    if report_type == "exercise":
        # Fill in exercises
        exercises = get_topic_exercises(topic_id=topic_id)
        exercises = sorted(exercises, key=lambda e: (e["h_position"], e["v_position"]))
        context["exercises"] = exercises

        # More code, but much faster
        exercise_names = [ex["name"] for ex in context["exercises"]]
        # Get students
        context["students"] = []
        exlogs = ExerciseLog.objects \
            .filter(user__in=users, exercise_id__in=exercise_names) \
            .order_by(*["user__%s" % field for field in student_ordering]) \
            .values("user__id", "struggling", "complete", "exercise_id")
        exlogs = list(exlogs)  # force the query to be evaluated

        exlog_idx = 0
        for user in users:
            log_table = {}
            while exlog_idx < len(exlogs) and exlogs[exlog_idx]["user__id"] == user.id:
                log_table[exlogs[exlog_idx]["exercise_id"]] = exlogs[exlog_idx]
                exlog_idx += 1

            context["students"].append({  # this could be DRYer
                "first_name": user.first_name,
                "last_name": user.last_name,
                "username": user.username,
                "name": user.get_name(),
                "id": user.id,
                "exercise_logs": log_table,
            })

    elif report_type == "video":
        # Fill in videos
        context["videos"] = get_topic_videos(topic_id=topic_id)

        # More code, but much faster
        video_ids = [vid["id"] for vid in context["videos"]]
        # Get students
        context["students"] = []
        vidlogs = VideoLog.objects \
            .filter(user__in=users, video_id__in=video_ids) \
            .order_by(*["user__%s" % field for field in student_ordering])\
            .values("user__id", "complete", "video_id", "total_seconds_watched", "points")
        vidlogs = list(vidlogs)  # force the query to be executed now

        vidlog_idx = 0
        for user in users:
            log_table = {}
            while vidlog_idx < len(vidlogs) and vidlogs[vidlog_idx]["user__id"] == user.id:
                log_table[vidlogs[vidlog_idx]["video_id"]] = vidlogs[vidlog_idx]
                vidlog_idx += 1

            context["students"].append({  # this could be DRYer
                "first_name": user.first_name,
                "last_name": user.last_name,
                "username": user.username,
                "name": user.get_name(),
                "id": user.id,
                "video_logs": log_table,
            })

    else:
        raise Http404(_("Unknown report_type: %(report_type)s") % {"report_type": report_type})

    if "facility_user" in request.session:
        try:
            # Log a "begin" and end here
            user = request.session["facility_user"]
            UserLog.begin_user_activity(user, activity_type="coachreport")
            UserLog.update_user_activity(user, activity_type="login")  # to track active login time for teachers
            UserLog.end_user_activity(user, activity_type="coachreport")
        except ValidationError as e:
            # Never report this error; don't want this logging to block other functionality.
            logging.error("Failed to update Teacher userlog activity login: %s" % e)

    return context
Beispiel #22
0
def student_view_context(request, xaxis="pct_mastery", yaxis="ex:attempts"):
    """
    Context done separately, to be importable for similar pages.
    """
    user = get_user_from_request(request=request)
    if not user:
        raise Http404("User not found.")

    node_cache = get_node_cache()
    topic_ids = get_knowledgemap_topics()
    topic_ids += [ch["id"] for node in get_topic_tree()["children"] for ch in node["children"] if node["id"] != "math"]
    topics = [node_cache["Topic"][id][0] for id in topic_ids]

    user_id = user.id
    exercise_logs = list(ExerciseLog.objects \
        .filter(user=user) \
        .values("exercise_id", "complete", "points", "attempts", "streak_progress", "struggling", "completion_timestamp"))
    video_logs = list(VideoLog.objects \
        .filter(user=user) \
        .values("video_id", "complete", "total_seconds_watched", "points", "completion_timestamp"))

    exercise_sparklines = dict()
    stats = dict()
    topic_exercises = dict()
    topic_videos = dict()
    exercises_by_topic = dict()
    videos_by_topic = dict()

    # Categorize every exercise log into a "midlevel" exercise
    for elog in exercise_logs:
        if not elog["exercise_id"] in node_cache["Exercise"]:
            # Sometimes KA updates their topic tree and eliminates exercises;
            #   we also want to support 3rd party switching of trees arbitrarily.
            logging.debug("Skip unknown exercise log for %s/%s" % (user_id, elog["exercise_id"]))
            continue

        parent_ids = [topic for ex in node_cache["Exercise"][elog["exercise_id"]] for topic in ex["ancestor_ids"]]
        topic = set(parent_ids).intersection(set(topic_ids))
        if not topic:
            logging.error("Could not find a topic for exercise %s (parents=%s)" % (elog["exercise_id"], parent_ids))
            continue
        topic = topic.pop()
        if not topic in topic_exercises:
            topic_exercises[topic] = get_topic_exercises(path=node_cache["Topic"][topic][0]["path"])
        exercises_by_topic[topic] = exercises_by_topic.get(topic, []) + [elog]

    # Categorize every video log into a "midlevel" exercise.
    for vlog in video_logs:
        if not vlog["video_id"] in node_cache["Video"]:
            # Sometimes KA updates their topic tree and eliminates videos;
            #   we also want to support 3rd party switching of trees arbitrarily.
            logging.debug("Skip unknown video log for %s/%s" % (user_id, vlog["video_id"]))
            continue

        parent_ids = [topic for vid in node_cache["Video"][vlog["video_id"]] for topic in vid["ancestor_ids"]]
        topic = set(parent_ids).intersection(set(topic_ids))
        if not topic:
            logging.error("Could not find a topic for video %s (parents=%s)" % (vlog["video_id"], parent_ids))
            continue
        topic = topic.pop()
        if not topic in topic_videos:
            topic_videos[topic] = get_topic_videos(path=node_cache["Topic"][topic][0]["path"])
        videos_by_topic[topic] = videos_by_topic.get(topic, []) + [vlog]


    # Now compute stats
    for id in topic_ids:#set(topic_exercises.keys()).union(set(topic_videos.keys())):
        n_exercises = len(topic_exercises.get(id, []))
        n_videos = len(topic_videos.get(id, []))

        exercises = exercises_by_topic.get(id, [])
        videos = videos_by_topic.get(id, [])
        n_exercises_touched = len(exercises)
        n_videos_touched = len(videos)

        exercise_sparklines[id] = [el["completion_timestamp"] for el in filter(lambda n: n["complete"], exercises)]

        # total streak currently a pct, but expressed in max 100; convert to
        # proportion (like other percentages here)
        stats[id] = {
            "ex:pct_mastery":      0 if not n_exercises_touched else sum([el["complete"] for el in exercises]) / float(n_exercises),
            "ex:pct_started":      0 if not n_exercises_touched else n_exercises_touched / float(n_exercises),
            "ex:average_points":   0 if not n_exercises_touched else sum([el["points"] for el in exercises]) / float(n_exercises_touched),
            "ex:average_attempts": 0 if not n_exercises_touched else sum([el["attempts"] for el in exercises]) / float(n_exercises_touched),
            "ex:average_streak":   0 if not n_exercises_touched else sum([el["streak_progress"] for el in exercises]) / float(n_exercises_touched) / 100.,
            "ex:total_struggling": 0 if not n_exercises_touched else sum([el["struggling"] for el in exercises]),
            "ex:last_completed": None if not n_exercises_touched else max_none([el["completion_timestamp"] or None for el in exercises]),

            "vid:pct_started":      0 if not n_videos_touched else n_videos_touched / float(n_videos),
            "vid:pct_completed":    0 if not n_videos_touched else sum([vl["complete"] for vl in videos]) / float(n_videos),
            "vid:total_minutes":      0 if not n_videos_touched else sum([vl["total_seconds_watched"] for vl in videos]) / 60.,
            "vid:average_points":   0. if not n_videos_touched else float(sum([vl["points"] for vl in videos]) / float(n_videos_touched)),
            "vid:last_completed": None if not n_videos_touched else max_none([vl["completion_timestamp"] or None for vl in videos]),
        }

    context = plotting_metadata_context(request)

    return {
        "form": context["form"],
        "groups": context["groups"],
        "facilities": context["facilities"],
        "student": user,
        "topics": topics,
        "exercises": topic_exercises,
        "exercise_logs": exercises_by_topic,
        "video_logs": videos_by_topic,
        "exercise_sparklines": exercise_sparklines,
        "no_data": not exercise_logs and not video_logs,
        "stats": stats,
        "stat_defs": [  # this order determines the order of display
            {"key": "ex:pct_mastery",      "title": _("% Mastery"),        "type": "pct"},
            {"key": "ex:pct_started",      "title": _("% Started"),        "type": "pct"},
            {"key": "ex:average_points",   "title": _("Average Points"),   "type": "float"},
            {"key": "ex:average_attempts", "title": _("Average Attempts"), "type": "float"},
            {"key": "ex:average_streak",   "title": _("Average Streak"),   "type": "pct"},
            {"key": "ex:total_struggling", "title": _("Struggling"),       "type": "int"},
            {"key": "ex:last_completed",   "title": _("Last Completed"),   "type": "date"},
            {"key": "vid:pct_completed",   "title": _("% Completed"),      "type": "pct"},
            {"key": "vid:pct_started",     "title": _("% Started"),        "type": "pct"},
            {"key": "vid:total_minutes",   "title": _("Average Minutes Watched"),"type": "float"},
            {"key": "vid:average_points",  "title": _("Average Points"),   "type": "float"},
            {"key": "vid:last_completed",  "title": _("Last Completed"),   "type": "date"},
        ]
    }
 def validate_mapping(srt_file, srt_issues):
     youtube_id = os.path.basename(srt_file)[:-4]
     if youtube_id not in get_node_cache("Video"):
         srt_issues.append("youtube ID unknown: %s" % youtube_id)
Beispiel #24
0
 def validate_mapping(srt_file, srt_issues):
     youtube_id = os.path.basename(srt_file)[:-4]
     if youtube_id not in get_node_cache("Video"):
         srt_issues.append("youtube ID unknown: %s" % youtube_id)
def compute_data(data_types, who, where):
    """
    Compute the data in "data_types" for each user in "who", for the topics selected by "where"

    who: list of users
    where: topic_path
    data_types can include:
        pct_mastery
        effort
        attempts
    """

    # None indicates that the data hasn't been queried yet.
    #   We'll query it on demand, for efficiency
    topics = None
    exercises = None
    videos = None

    # Initialize an empty dictionary of data, video logs, exercise logs, for each user
    data = OrderedDict(zip([w.id for w in who], [dict() for i in range(len(who))]))  # maintain the order of the users
    vid_logs = dict(zip([w.id for w in who], [[] for i in range(len(who))]))
    ex_logs = dict(zip([w.id for w in who], [[] for i in range(len(who))]))
    if UserLog.is_enabled():
        activity_logs = dict(zip([w.id for w in who], [[] for i in range(len(who))]))

    # Set up queries (but don't run them), so we have really easy aliases.
    #   Only do them if they haven't been done yet (tell this by passing in a value to the lambda function)
    # Topics: topics.
    # Exercises: names (ids for ExerciseLog objects)
    # Videos: video_id (ids for VideoLog objects)

    # This lambda partial creates a function to return all items with a particular path from the NODE_CACHE.
    search_fun_single_path = partial(lambda t, p: t["path"].startswith(p), p=tuple(where))
    # This lambda partial creates a function to return all items with paths matching a list of paths from NODE_CACHE.
    search_fun_multi_path = partial(lambda ts, p: any([t["path"].startswith(p) for t in ts]),  p=tuple(where))
    # Functions that use the functions defined above to return topics, exercises, and videos based on paths.
    query_topics = partial(lambda t, sf: t if t is not None else [t[0]["id"] for t in filter(sf, get_node_cache('Topic').values())], sf=search_fun_single_path)
    query_exercises = partial(lambda e, sf: e if e is not None else [ex[0]["id"] for ex in filter(sf, get_node_cache('Exercise').values())], sf=search_fun_multi_path)
    query_videos = partial(lambda v, sf: v if v is not None else [vid[0]["id"] for vid in filter(sf, get_node_cache('Video').values())], sf=search_fun_multi_path)

    # No users, don't bother.
    if len(who) > 0:

        # Query out all exercises, videos, exercise logs, and video logs before looping to limit requests.
        # This means we could pull data for n-dimensional coach report displays with the same number of requests!
        # Note: User activity is polled inside the loop, to prevent possible slowdown for exercise and video reports.
        exercises = query_exercises(exercises)

        videos = query_videos(videos)

        if exercises:
            ex_logs = query_logs(data.keys(), exercises, "exercise", ex_logs)

        if videos:
            vid_logs = query_logs(data.keys(), videos, "video", vid_logs)

        for data_type in (data_types if not hasattr(data_types, "lower") else [data_types]):  # convert list from string, if necessary
            if data_type in data[data.keys()[0]]:  # if the first user has it, then all do; no need to calc again.
                continue

            #
            # These are summary stats: you only get one per user
            #
            if data_type == "pct_mastery":

                # Efficient query out, spread out to dict
                for user in data.keys():
                    data[user][data_type] = 0 if not ex_logs[user] else 100. * sum([el['complete'] for el in ex_logs[user]]) / float(len(exercises))

            elif data_type == "effort":
                if "ex:attempts" in data[data.keys()[0]] and "vid:total_seconds_watched" in data[data.keys()[0]]:
                    # exercises and videos would be initialized already
                    for user in data.keys():
                        avg_attempts = 0 if len(exercises) == 0 else sum(data[user]["ex:attempts"].values()) / float(len(exercises))
                        avg_video_points = 0 if len(videos) == 0 else sum(data[user]["vid:total_seconds_watched"].values()) / float(len(videos))
                        data[user][data_type] = 100. * (0.5 * avg_attempts / 10. + 0.5 * avg_video_points / 750.)
                else:
                    data_types += ["ex:attempts", "vid:total_seconds_watched", "effort"]

            #
            # These are detail stats: you get many per user
            #
            # Just querying out data directly: Video
            elif data_type.startswith("vid:") and data_type[4:] in [f.name for f in VideoLog._meta.fields]:

                for user in data.keys():
                    data[user][data_type] = OrderedDict([(v['video_id'], v[data_type[4:]]) for v in vid_logs[user]])

            # Just querying out data directly: Exercise
            elif data_type.startswith("ex:") and data_type[3:] in [f.name for f in ExerciseLog._meta.fields]:

                for user in data.keys():
                    data[user][data_type] = OrderedDict([(el['exercise_id'], el[data_type[3:]]) for el in ex_logs[user]])

            # User Log Queries
            elif data_type.startswith("user:"******"", "activity", activity_logs)

                for user in data.keys():
                    data[user][data_type] = [log[data_type[5:]] for log in activity_logs[user]]

            # User Summary Queries
            elif data_type.startswith("usersum:") and data_type[8:] in [f.name for f in UserLogSummary._meta.fields] and UserLog.is_enabled():

                activity_logs = query_logs(data.keys(), "", "summaryactivity", activity_logs)

                for user in data.keys():
                    data[user][data_type] = sum([log[data_type[8:]] for log in activity_logs[user]])
            # Unknown requested quantity
            else:
                raise Exception("Unknown type: '%s' not in %s" % (data_type, str([f.name for f in ExerciseLog._meta.fields])))

    # Returning empty list instead of None allows javascript on client
    # side to read 'length' property without error.
    exercises = exercises or []

    videos = videos or []

    return {
        "data": data,
        "topics": topics,
        "exercises": exercises,
        "videos": videos,
    }
Beispiel #26
0
 def create_cache(self, force=False):
     for node_type in ['Topic', 'Video', 'Exercise']:
         self.stdout.write("Caching %ss:\n" % node_type)
         for narr in topic_tools.get_node_cache(node_type).values():
             for n in narr:
                 self.create_page_cache(path=n["path"], force=force)
Beispiel #27
0
def update_all_central_callback(request):
    """
    Callback after authentication.

    Parses out the request token verification.
    Then finishes the request by getting an auth token.
    """
    if not "ACCESS_TOKEN" in request.session:
        finish_auth(request)

    exercises = get_api_resource(request, "/api/v1/user/exercises")
    videos = get_api_resource(request, "/api/v1/user/videos")
    node_cache = get_node_cache()

    # Collate videos
    video_logs = []
    for video in videos:
        # Assume that KA videos are all english-language, not dubbed (for now)
        video_id = youtube_id = video.get('video', {}).get('youtube_id', "")

        # Only save videos with progress
        if not video.get('seconds_watched', None):
            continue

        # Only save video logs for videos that we recognize.
        if video_id not in node_cache["Video"]:
            logging.warn("Skipping unknown video %s" % video_id)
            continue

        try:
            video_logs.append({
                "video_id":
                video_id,
                "youtube_id":
                youtube_id,
                "total_seconds_watched":
                video['seconds_watched'],
                "points":
                VideoLog.calc_points(video['seconds_watched'],
                                     video['duration']),
                "complete":
                video['completed'],
                "completion_timestamp":
                convert_ka_date(video['last_watched'])
                if video['completed'] else None,
            })
            logging.debug("Got video log for %s: %s" %
                          (video_id, video_logs[-1]))
        except KeyError:  #
            logging.error(
                "Could not save video log for data with missing values: %s" %
                video)

    # Collate exercises
    exercise_logs = []
    for exercise in exercises:
        # Only save exercises that have any progress.
        if not exercise.get('last_done', None):
            continue

        # Only save video logs for videos that we recognize.
        slug = exercise.get('exercise', "")
        if slug not in node_cache['Exercise']:
            logging.warn("Skipping unknown video %s" % slug)
            continue

        try:
            completed = exercise['streak'] >= 10
            basepoints = node_cache['Exercise'][slug][0]['basepoints']
            exercise_logs.append({
                "exercise_id":
                slug,
                "streak_progress":
                min(100, 100 * exercise['streak'] /
                    10),  # duplicates logic elsewhere
                "attempts":
                exercise['total_done'],
                "points":
                ExerciseLog.calc_points(
                    basepoints,
                    ncorrect=exercise['streak'],
                    add_randomness=False
                ),  # no randomness when importing from KA
                "complete":
                completed,
                "attempts_before_completion":
                exercise['total_done'] if not exercise['practiced'] else
                None,  #can't figure this out if they practiced after mastery.
                "completion_timestamp":
                convert_ka_date(exercise['proficient_date'])
                if completed else None,
            })
            logging.debug("Got exercise log for %s: %s" %
                          (slug, exercise_logs[-1]))
        except KeyError:
            logging.error(
                "Could not save exercise log for data with missing values: %s"
                % exercise)

    # POST the data back to the distributed server
    try:

        dthandler = lambda obj: obj.isoformat() if isinstance(
            obj, datetime.datetime) else None
        logging.debug("POST'ing to %s" %
                      request.session["distributed_callback_url"])
        response = requests.post(
            request.session["distributed_callback_url"],
            cookies={"csrftoken": request.session["distributed_csrf_token"]},
            data={
                "csrfmiddlewaretoken":
                request.session["distributed_csrf_token"],
                "video_logs": json.dumps(video_logs, default=dthandler),
                "exercise_logs": json.dumps(exercise_logs, default=dthandler),
                "user_id": request.session["distributed_user_id"],
            })
        logging.debug("Response (%d): %s" %
                      (response.status_code, response.content))
    except requests.exceptions.ConnectionError as e:
        return HttpResponseRedirect(
            set_query_params(
                request.session["distributed_redirect_url"], {
                    "message_type":
                    "error",
                    "message":
                    _("Could not connect to your KA Lite installation to share Khan Academy data."
                      ),
                    "message_id":
                    "id_khanload",
                }))
    except Exception as e:
        return HttpResponseRedirect(
            set_query_params(
                request.session["distributed_redirect_url"], {
                    "message_type":
                    "error",
                    "message":
                    _("Failure to send data to your KA Lite installation: %s")
                    % e,
                    "message_id":
                    "id_khanload",
                }))

    try:
        json_response = json.loads(response.content)
        if not isinstance(json_response, dict) or len(json_response) != 1:
            # Could not validate the message is a single key-value pair
            raise Exception(
                _("Unexpected response format from your KA Lite installation.")
            )
        message_type = json_response.keys()[0]
        message = json_response.values()[0]
    except ValueError as e:
        message_type = "error"
        message = unicode(e)
    except Exception as e:
        message_type = "error"
        message = _("Loading json object: %s") % e

    # If something broke on the distribute d server, we are SCREWED.
    #   For now, just show the error to users.
    #
    # Ultimately, we have a message, would like to share with the distributed server.


#    if response.status_code != 200:
#        return HttpResponseServerError(response.content)

    return HttpResponseRedirect(
        set_query_params(
            request.session["distributed_redirect_url"], {
                "message_type": message_type,
                "message": message,
                "message_id": "id_khanload",
            }))
def generate_dubbed_video_mappings(download_url=None, csv_data=None):
    """
    Function to do the heavy lifting in getting the dubbed videos map.

    Could be moved into utils
    """
    if not download_url:
        download_url = SPREADSHEET_BASE_URL
        params = {
            'key': SPREADSHEET_ID,
            'gid': SPREADSHEET_GID,
            'output': SPREADSHEET_EXPORT_FORMAT
        }
    else:
        params = {}

    if not csv_data:
        logging.info("Downloading dubbed video data from %s" % download_url)
        response = requests.get(download_url, params=params)
        if response.status_code != 200:
            raise CommandError(
                "Failed to download dubbed video CSV data: status=%s" %
                response.status)
        csv_data = response.content

    # This CSV file is in standard format: separated by ",", quoted by '"'
    logging.info("Parsing csv file.")
    reader = csv.reader(StringIO(csv_data))

    # Build a two-level video map.
    #   First key: language name
    #   Second key: english youtube ID
    #   Value: corresponding youtube ID in the new language.
    video_map = {}

    row_num = -1
    try:
        # Loop through each row in the spreadsheet.
        while (True):
            row_num += 1
            row = reader.next()

            if row_num < 4:
                # Rows 1-4 are crap.
                continue

            elif row_num == 4:
                # Row 5 is the header row.
                header_row = [
                    v.lower() for v in row
                ]  # lcase all header row values (including language names)
                slug_idx = header_row.index("titled id")
                english_idx = header_row.index("english")
                assert slug_idx != -1, "Video slug column header should be found."
                assert english_idx != -1, "English video column header should be found."

            else:
                # Rows 6 and beyond are data.
                assert len(row) == len(
                    header_row
                ), "Values line length equals headers line length"

                # Grab the slug and english video ID.
                video_slug = row[slug_idx]
                english_video_id = row[english_idx]
                assert english_video_id, "English Video ID should not be empty"
                assert video_slug, "Slug should not be empty"

                # English video is the first video ID column,
                #   and following columns (until the end) are other languages.
                # Loop through those columns and, if a video exists,
                #   add it to the dictionary.
                for idx in range(english_idx, len(row)):
                    if not row[idx]:  # make sure there's a dubbed video
                        continue

                    lang = header_row[idx]
                    if lang not in video_map:  # add the first level if it doesn't exist
                        video_map[lang] = {}
                    dubbed_youtube_id = row[idx]
                    if english_video_id == dubbed_youtube_id and lang != "english":
                        logging.error(
                            "Removing entry for (%s, %s): dubbed and english youtube ID are the same."
                            % (lang, english_video_id))
                    #elif dubbed_youtube_id in video_map[lang].values():
                    # Talked to Bilal, and this is actually supposed to be OK.  Would throw us for a loop!
                    #    For now, just keep one.
                    #for key in video_map[lang].keys():
                    #    if video_map[lang][key] == dubbed_youtube_id:
                    #        del video_map[lang][key]
                    #        break
                    #logging.error("Removing entry for (%s, %s): the same dubbed video ID is used in two places, and we can only keep one in our current system." % (lang, english_video_id))
                    else:
                        video_map[lang][english_video_id] = row[
                            idx]  # add the corresponding video id for the video, in this language.

    except StopIteration:
        # The loop ends when the CSV file hits the end and throws a StopIteration
        pass

    # Now, validate the mappings with our topic data
    known_videos = get_node_cache("Video").keys()
    missing_videos = set(known_videos) - set(video_map["english"].keys())
    extra_videos = set(video_map["english"].keys()) - set(known_videos)
    if missing_videos:
        logging.warn(
            "There are %d known videos not in the list of dubbed videos" %
            len(missing_videos))
        logging.warn(
            "Adding missing English videos to English dubbed video map")
        for video in missing_videos:
            video_map["english"][video] = video
    if extra_videos:
        logging.warn(
            "There are %d videos in the list of dubbed videos that we have never heard of."
            % len(extra_videos))

    return (video_map, csv_data)
Beispiel #29
0
def update_all_distributed_callback(request):
    """
    """

    if request.method != "POST":
        raise PermissionDenied("Only POST allowed to this URL endpoint.")

    videos = json.loads(request.POST["video_logs"])
    exercises = json.loads(request.POST["exercise_logs"])
    user = FacilityUser.objects.get(id=request.POST["user_id"])
    node_cache = get_node_cache()
    # Save videos
    n_videos_uploaded = 0
    for video in videos:
        video_id = video['video_id']
        youtube_id = video['youtube_id']

        # Only save video logs for videos that we recognize.
        if video_id not in node_cache["Video"]:
            logging.warn("Skipping unknown video %s" % video_id)
            continue

        try:
            (vl, _) = VideoLog.get_or_initialize(user=user,
                                                 video_id=video_id,
                                                 youtube_id=youtube_id)
            for key, val in video.iteritems():
                setattr(vl, key, val)
            logging.debug("Saving video log for %s: %s" % (video_id, vl))
            vl.save()
            n_videos_uploaded += 1
        except KeyError:  #
            logging.error(
                "Could not save video log for data with missing values: %s" %
                video)
        except Exception as e:
            error_message = "Unexpected error importing videos: %s" % e
            return JsonResponseMessageError(error_message)

    # Save exercises
    n_exercises_uploaded = 0
    for exercise in exercises:
        # Only save video logs for videos that we recognize.
        if exercise['exercise_id'] not in node_cache['Exercise']:
            logging.warn("Skipping unknown video %s" % exercise['exercise_id'])
            continue

        try:
            (el, _) = ExerciseLog.get_or_initialize(
                user=user, exercise_id=exercise["exercise_id"])
            for key, val in exercise.iteritems():
                setattr(el, key, val)
            logging.debug("Saving exercise log for %s: %s" %
                          (exercise['exercise_id'], el))
            el.save()
            n_exercises_uploaded += 1
        except KeyError:
            logging.error(
                "Could not save exercise log for data with missing values: %s"
                % exercise)
        except Exception as e:
            error_message = "Unexpected error importing exercises: %s" % e
            return JsonResponseMessageError(error_message)

    return JsonResponse({
        "success":
        "Uploaded %d exercises and %d videos" %
        (n_exercises_uploaded, n_videos_uploaded)
    })
Beispiel #30
0
class CachingTest(KALiteTestCase):
    video_cache = get_node_cache("Video")

    @unittest.skipIf(settings.CACHE_TIME == 0,
                     "Test only relevant when caching is enabled")
    def test_cache_invalidation(self):
        """Create the cache item, then invalidate it and show that it is deleted."""

        # Get a random video id
        n_videos = len(self.video_cache)
        video_id = self.video_cache.keys()[
            10]  #random.choice(self.video_cache.keys())
        sys.stdout.write("Testing on video_id = %s\n" % video_id)
        video_path = self.video_cache[video_id][0]['path']

        # Clean the cache for this item
        caching.expire_page(path=video_path, failure_ok=True)

        # Create the cache item, and check it
        self.assertTrue(not caching.has_cache_key(path=video_path),
                        "expect: no cache key after expiring the page")
        caching.regenerate_all_pages_related_to_videos(video_ids=[video_id])
        self.assertTrue(caching.has_cache_key(path=video_path),
                        "expect: Cache key exists after Django Client get")

        # Invalidate the cache item, and check it
        caching.invalidate_all_caches()  # test the convenience function
        self.assertTrue(not caching.has_cache_key(path=video_path),
                        "expect: no cache key after expiring the page")

    @unittest.skipIf(settings.CACHE_TIME == 0,
                     "Test only relevant when caching is enabled")
    def test_cache_across_clients(self):
        """Show that caching is accessible across all clients
        (i.e. that different clients don't generate different cache keys)"""

        # Get a random video id
        n_videos = len(self.video_cache)
        video_id = random.choice(self.video_cache.keys())
        sys.stdout.write("Testing on video_id = %s\n" % video_id)
        video_path = self.video_cache[video_id][0]['path']

        # Clean the cache for this item
        caching.expire_page(path=video_path, failure_ok=True)
        self.assertTrue(not caching.has_cache_key(path=video_path),
                        "expect: No cache key after expiring the page")

        # Set up the cache with Django client
        Client().get(video_path)
        self.assertTrue(caching.has_cache_key(path=video_path),
                        "expect: Cache key exists after Django Client get")
        caching.expire_page(path=video_path)  # clean cache
        self.assertTrue(not caching.has_cache_key(path=video_path),
                        "expect: No cache key after expiring the page")

        # Get the same cache key when getting with urllib, and make sure the cache is created again
        urllib.urlopen(self.live_server_url + video_path).close()
        self.assertTrue(caching.has_cache_key(path=video_path),
                        "expect: Cache key exists after urllib get")
        caching.expire_page(path=video_path)  # clean cache
        self.assertTrue(not caching.has_cache_key(path=video_path),
                        "expect: No cache key after expiring the page")

        # Same deal, now using requests library
        requests.get(self.live_server_url + video_path)
        self.assertTrue(caching.has_cache_key(path=video_path),
                        "expect: Cache key exists after requestsget")
        caching.expire_page(path=video_path)  # clean cache
        self.assertTrue(not caching.has_cache_key(path=video_path),
                        "expect: No cache key after expiring the page")
Beispiel #31
0
def _get_user_usage_data(users, period_start=None, period_end=None):
    """
    Returns facility user data, within the given date range.
    """

    # compute period start and end
    # Now compute stats, based on queried data
    num_exercises = len(get_node_cache('Exercise'))
    user_data = OrderedDict()
    group_data = OrderedDict()


    # Make queries efficiently
    exercise_logs = ExerciseLog.objects.filter(user__in=users, complete=True)
    video_logs = VideoLog.objects.filter(user__in=users)
    login_logs = UserLogSummary.objects.filter(user__in=users)

    # filter results
    if period_start:
        exercise_logs = exercise_logs.filter(completion_timestamp__gte=period_start)
        video_logs = video_logs.filter(completion_timestamp__gte=period_start)
        login_logs = login_logs.filter(start_datetime__gte=period_start)
    if period_end:
        exercise_logs = exercise_logs.filter(completion_timestamp__lte=period_end)
        video_logs = video_logs.filter(completion_timestamp__lte=period_end)
        login_logs = login_logs.filter(end_datetime__lte=period_end)

    # Force results in a single query
    exercise_logs = list(exercise_logs.values("exercise_id", "user__pk"))
    video_logs = list(video_logs.values("video_id", "user__pk"))
    login_logs = list(login_logs.values("activity_type", "total_seconds", "user__pk"))

    for user in users:
        user_data[user.pk] = OrderedDict()
        user_data[user.pk]["first_name"] = user.first_name
        user_data[user.pk]["last_name"] = user.last_name
        user_data[user.pk]["username"] = user.username
        user_data[user.pk]["group"] = user.group


        user_data[user.pk]["total_report_views"] = 0#report_stats["count__sum"] or 0
        user_data[user.pk]["total_logins"] =0# login_stats["count__sum"] or 0
        user_data[user.pk]["total_hours"] = 0#login_stats["total_seconds__sum"] or 0)/3600.

        user_data[user.pk]["total_exercises"] = 0
        user_data[user.pk]["pct_mastery"] = 0.
        user_data[user.pk]["exercises_mastered"] = []

        user_data[user.pk]["total_videos"] = 0
        user_data[user.pk]["videos_watched"] = []


    for elog in exercise_logs:
        user_data[elog["user__pk"]]["total_exercises"] += 1
        user_data[elog["user__pk"]]["pct_mastery"] += 1. / num_exercises
        user_data[elog["user__pk"]]["exercises_mastered"].append(elog["exercise_id"])

    for vlog in video_logs:
        user_data[vlog["user__pk"]]["total_videos"] += 1
        user_data[vlog["user__pk"]]["videos_watched"].append(vlog["video_id"])

    for llog in login_logs:
        if llog["activity_type"] == UserLog.get_activity_int("coachreport"):
            user_data[llog["user__pk"]]["total_report_views"] += 1
        elif llog["activity_type"] == UserLog.get_activity_int("login"):
            user_data[llog["user__pk"]]["total_hours"] += (llog["total_seconds"]) / 3600.
            user_data[llog["user__pk"]]["total_logins"] += 1

    # Add group data.  Allow a fake group "Ungrouped"
    for user in users:
        group_pk = getattr(user.group, "pk", None)
        group_name = getattr(user.group, "name", _("Ungrouped"))
        if not group_pk in group_data:
            group_data[group_pk] = {
                "name": group_name,
                "total_logins": 0,
                "total_hours": 0,
                "total_users": 0,
                "total_videos": 0,
                "total_exercises": 0,
                "pct_mastery": 0,
            }
        group_data[group_pk]["total_users"] += 1
        group_data[group_pk]["total_logins"] += user_data[user.pk]["total_logins"]
        group_data[group_pk]["total_hours"] += user_data[user.pk]["total_hours"]
        group_data[group_pk]["total_videos"] += user_data[user.pk]["total_videos"]
        group_data[group_pk]["total_exercises"] += user_data[user.pk]["total_exercises"]

        total_mastery_so_far = (group_data[group_pk]["pct_mastery"] * (group_data[group_pk]["total_users"] - 1) + user_data[user.pk]["pct_mastery"])
        group_data[group_pk]["pct_mastery"] =  total_mastery_so_far / group_data[group_pk]["total_users"]

    return (user_data, group_data)