def handle(self, *args, **options):
        if settings.CENTRAL_SERVER:
            raise CommandError("This must only be run on the distributed server.")

        if not options["lang_code"]:
            raise CommandError("You must specify a language code.")

        #
        ensure_dir(settings.CONTENT_ROOT)

        # Get list of videos
        lang_code = lcode_to_ietf(options["lang_code"])
        video_map = get_dubbed_video_map(lang_code) or {}
        video_ids = options["video_ids"].split(",") if options["video_ids"] else None
        video_ids = video_ids or (
            [vid["id"] for vid in get_topic_videos(topic_id=options["topic_id"])] if options["topic_id"] else None
        )
        video_ids = video_ids or video_map.keys()

        # Download the videos
        for video_id in video_ids:
            if video_id in video_map:
                youtube_id = video_map[video_id]

            elif video_id in video_map.values():
                # Perhaps they sent in a youtube ID?  We can handle that!
                youtube_id = video_id
            else:
                logging.error("No mapping for video_id=%s; skipping" % video_id)
                continue

            try:
                scrape_video(youtube_id=youtube_id, format=options["format"], force=options["force"])
                # scrape_thumbnail(youtube_id=youtube_id)
                logging.info("Access video %s at %s" % (youtube_id, get_node_cache("Video")[video_id][0]["path"]))
            except Exception as e:
                logging.error("Failed to download video %s: %s" % (youtube_id, e))

        logging.info("Process complete.")
    def handle(self, *args, **options):
        if settings.CENTRAL_SERVER:
            raise CommandError("This must only be run on the distributed server.")

        if not options["lang_code"]:
            raise CommandError("You must specify a language code.")

        #
        ensure_dir(settings.CONTENT_ROOT)

        # Get list of videos
        lang_code = lcode_to_ietf(options["lang_code"])
        video_map = get_dubbed_video_map(lang_code) or {}
        video_ids = options["video_ids"].split(",") if options["video_ids"] else None
        video_ids = video_ids or ([vid["id"] for vid in get_topic_videos(topic_id=options["topic_id"])] if options["topic_id"] else None)
        video_ids = video_ids or video_map.keys()

        # Download the videos
        for video_id in video_ids:
            if video_id in video_map:
                youtube_id = video_map[video_id]

            elif video_id in video_map.values():
                # Perhaps they sent in a youtube ID?  We can handle that!
                youtube_id = video_id
            else:
                logging.error("No mapping for video_id=%s; skipping" % video_id)
                continue

            try:
                scrape_video(youtube_id=youtube_id, format=options["format"], force=options["force"])
                #scrape_thumbnail(youtube_id=youtube_id)
                logging.info("Access video %s at %s" % (youtube_id, get_node_cache("Video")[video_id][0]["path"]))
            except Exception as e:
                logging.error("Failed to download video %s: %s" % (youtube_id, e))

        logging.info("Process complete.")
Exemple #3
0
def tabular_view(request, report_type="exercise"):
    """Tabular view also gets data server-side."""
    # important for setting the defaults for the coach nav bar

    language = lcode_to_django_lang(request.language)

    facility, group_id, context = coach_nav_context(request, "tabular")

    # Define how students are ordered--used to be as efficient as possible.
    student_ordering = ["last_name", "first_name", "username"]

    # Get a list of topics (sorted) and groups
    topics = [get_node_cache("Topic", language=language).get(tid["id"]) for tid in get_knowledgemap_topics(language=language) if report_type.title() in tid["contains"]]
    playlists = Playlist.all()

    (groups, facilities, ungrouped_available) = get_accessible_objects_from_logged_in_user(request, facility=facility)

    context.update(plotting_metadata_context(request, facility=facility))

    context.update({
        # For translators: the following two translations are nouns
        "report_types": ({"value": "exercise", "name":_("exercise")}, {"value": "video", "name": _("video")}),
        "request_report_type": report_type,
        "topics": [{"id": t["id"], "title": t["title"]} for t in topics if t],
        "playlists": [{"id": p.id, "title": p.title, "tag": p.tag} for p in playlists if p],
    })

    # get querystring info
    topic_id = request.GET.get("topic", "")
    playlist_id = request.GET.get("playlist", "")
    # No valid data; just show generic
    # Exactly one of topic_id or playlist_id should be present
    if not ((topic_id or playlist_id) and not (topic_id and playlist_id)):
        if playlists:
            messages.add_message(request, WARNING, _("Please select a playlist."))
        elif topics:
            messages.add_message(request, WARNING, _("Please select a topic."))
        return context

    playlist = (filter(lambda p: p.id == playlist_id, Playlist.all()) or [None])[0]

    if group_id:
        # Narrow by group
        if group_id == control_panel_api_resources.UNGROUPED_KEY:
            users = FacilityUser.objects.filter(group__isnull=True, is_teacher=False)
            if facility:
                # filter only those ungrouped students for the facility
                users = users.filter(facility=facility)
                users = users.order_by(*student_ordering)
            else:
                # filter all ungroup students
                users = FacilityUser.objects.filter(group__isnull=True, is_teacher=False).order_by(*student_ordering)

        else:
            users = FacilityUser.objects.filter(
                group=group_id, is_teacher=False).order_by(*student_ordering)

    elif facility:
        # Narrow by facility
        search_groups = [groups_dict["groups"] for groups_dict in groups if groups_dict["facility"] == facility.id]
        assert len(search_groups) <= 1, "Should only have one or zero matches."

        # Return groups and ungrouped
        search_groups = search_groups[0]  # make sure to include ungrouped students
        users = FacilityUser.objects.filter(
            Q(group__in=search_groups) | Q(group=None, facility=facility), is_teacher=False).order_by(*student_ordering)

    else:
        # Show all (including ungrouped)
        search_groups = []
        for groups_dict in groups:
            search_groups += groups_dict["groups"]
        users = FacilityUser.objects.filter(
            Q(group__in=search_groups) | Q(group=None), is_teacher=False).order_by(*student_ordering)

    # We have enough data to render over a group of students
    # Get type-specific information
    if report_type == "exercise":
        # Fill in exercises
        if topic_id:
            exercises = get_topic_exercises(topic_id=topic_id)
        elif playlist:
            exercises = playlist.get_playlist_entries("Exercise", language=language)

        context["exercises"] = exercises

        # More code, but much faster
        exercise_names = [ex["id"] for ex in context["exercises"]]
        # Get students
        context["students"] = []
        exlogs = ExerciseLog.objects \
            .filter(user__in=users, exercise_id__in=exercise_names) \
            .order_by(*["user__%s" % field for field in student_ordering]) \
            .values("user__id", "struggling", "complete", "exercise_id")
        exlogs = list(exlogs)  # force the query to be evaluated

        exlog_idx = 0
        for user in users:
            log_table = {}
            while exlog_idx < len(exlogs) and exlogs[exlog_idx]["user__id"] == user.id:
                log_table[exlogs[exlog_idx]["exercise_id"]] = exlogs[exlog_idx]
                exlog_idx += 1

            context["students"].append({  # this could be DRYer
                "first_name": user.first_name,
                "last_name": user.last_name,
                "username": user.username,
                "name": user.get_name(),
                "id": user.id,
                "exercise_logs": log_table,
            })

    elif report_type == "video":
        # Fill in videos
        if topic_id:
            context["videos"] = get_topic_videos(topic_id=topic_id)
        elif playlist:
            context["videos"] = playlist.get_playlist_entries("Video", language=language)

        # More code, but much faster
        video_ids = [vid["id"] for vid in context["videos"]]
        # Get students
        context["students"] = []
        vidlogs = VideoLog.objects \
            .filter(user__in=users, video_id__in=video_ids) \
            .order_by(*["user__%s" % field for field in student_ordering])\
            .values("user__id", "complete", "video_id", "total_seconds_watched", "points")
        vidlogs = list(vidlogs)  # force the query to be executed now

        vidlog_idx = 0
        for user in users:
            log_table = {}
            while vidlog_idx < len(vidlogs) and vidlogs[vidlog_idx]["user__id"] == user.id:
                log_table[vidlogs[vidlog_idx]["video_id"]] = vidlogs[vidlog_idx]
                vidlog_idx += 1

            context["students"].append({  # this could be DRYer
                "first_name": user.first_name,
                "last_name": user.last_name,
                "username": user.username,
                "name": user.get_name(),
                "id": user.id,
                "video_logs": log_table,
            })
    else:
        raise Http404(_("Unknown report_type: %(report_type)s") % {"report_type": report_type})

    # Validate results by showing user messages.
    if not users:
        # 1. check group facility groups
        if len(groups) > 0 and not groups[0]['groups']:
            # 1. No groups available (for facility) and "no students" returned.
            messages.add_message(request, WARNING,
                                 _("No learner accounts have been created for selected facility/group."))
        elif topic_id and playlist_id:
            # 2. Both topic and playlist are selected.
            messages.add_message(request, WARNING, _("Please select either a topic or a playlist above, but not both."))
        elif not topic_id and not playlist_id:
            # 3. Group was selected, but data not queried because a topic or playlist was not selected.
            if playlists:
                # 4. No playlist was selected.
                messages.add_message(request, WARNING, _("Please select a playlist."))
            elif topics:
                # 5. No topic was selected.
                messages.add_message(request, WARNING, _("Please select a topic."))
        else:
            # 6. Everything specified, but no users fit the query.
            messages.add_message(request, WARNING, _("No learner accounts in this group have been created."))
    # End: Validate results by showing user messages.

    log_coach_report_view(request)

    return context
def generate_fake_video_logs(
    facility_user=None, topics=topics, start_date=datetime.datetime.now() - datetime.timedelta(days=30 * 6)
):
    """Add video logs for the given topics, for each of the given users.
    If no users are given, they are created.
    If no topics exist, they are taken from the list at the top of this file."""

    date_diff = datetime.datetime.now() - start_date
    video_logs = []

    # It's not a user: probably a list.
    # Recursive case
    if not hasattr(facility_user, "username"):
        # It's NONE :-/ generate the users first!
        if not facility_user:
            (facility_user, _, _) = generate_fake_facility_users()

        for topic in topics:
            for user in facility_user:
                video_logs.append(generate_fake_video_logs(facility_user=user, topics=[topic], start_date=start_date))

    # Actually generate!
    else:
        # First, make videos for the associated logs

        # Then make some unassociated videos, to simulate both exploration
        #   and watching videos without finishing.
        # Get (or create) user type
        try:
            user_settings = json.loads(facility_user.notes)
        except:
            user_settings = sample_user_settings()
            facility_user.notes = json.dumps(user_settings)
            try:
                facility_user.save()
            except Exception as e:
                logging.error("Error saving facility user: %s" % e)

        date_diff_started = datetime.timedelta(
            seconds=datediff(date_diff, units="seconds") * user_settings["time_in_program"]
        )  # when this user started in the program, relative to NOW

        # contains the video duration key
        video_cache = get_content_cache()

        for topic in topics:
            videos = get_topic_videos(topic_id=topic)

            exercises = get_topic_exercises(topic_id=topic)
            exercise_ids = [ex["id"] if "id" in ex else ex["name"] for ex in exercises]
            exercise_logs = ExerciseLog.objects.filter(user=facility_user, id__in=exercise_ids)

            # Probability of watching a video, irrespective of the context
            p_video_outer = probability_of("video", user_settings=user_settings)
            logging.debug(
                "# videos: %d; p(videos)=%4.3f, user settings: %s\n"
                % (len(videos), p_video_outer, json.dumps(user_settings))
            )

            for video in videos:
                p_completed = probability_of("completed", user_settings=user_settings)

                # If we're just doing random videos, fine.
                # If these videos relate to exercises, then suppress non-exercise-related videos
                #   for this user.
                p_video = p_video_outer  # start with the context-free value
                did_exercise = False
                if exercise_logs.count() > 0:
                    # 5x less likely to watch a video if you haven't done the exercise,
                    if "related_exercise" not in video:
                        p_video /= 5  # suppress

                    # 5x more likely to watch a video if they've done the exercise
                    # 2x more likely to have finished it.
                    else:
                        exercise_log = ExerciseLog.objects.filter(
                            user=facility_user, id=video["related_exercise"]["id"]
                        )
                        did_exercise = exercise_log.count() != 0
                        if did_exercise:
                            p_video *= 5
                            p_completed *= 2

                # Do the sampling
                if p_video < random.random():
                    continue
                    # didn't watch it
                elif p_completed > random.random():
                    pct_completed = 100.0
                else:  # Slower students will use videos more.  Effort also important.
                    pct_completed = 100.0 * min(
                        1.0,
                        sqrt(
                            random.random()
                            * sqrt(
                                user_settings["effort_level"]
                                * user_settings["time_in_program"]
                                / sqrt(user_settings["speed_of_learning"])
                            )
                        ),
                    )

                # get the video duration on the video cache
                video_id = video.get("id", "")
                video_duration = 0
                if video_id and video_cache:
                    video_item = video_cache.get(video_id, None)
                    if video_item:
                        video_duration = video_item.get("duration", 0)

                # Compute quantities based on sample
                total_seconds_watched = int(video_duration * pct_completed / 100.0)
                points = int(750 * pct_completed / 100.0)

                # Choose a rate of videos, based on their effort level.
                #   Compute the latest possible start time.
                #   Then sample a start time between their start time
                #   and the latest possible start_time
                if did_exercise:
                    # More jitter if you learn fast, less jitter if you try harder (more diligent)
                    date_jitter = datetime.timedelta(
                        days=max(0, random.gauss(1, user_settings["speed_of_learning"] / user_settings["effort_level"]))
                    )
                    date_completed = exercise_log[0].completion_timestamp - date_jitter
                else:
                    rate_of_videos = (
                        0.66 * user_settings["effort_level"] + 0.33 * user_settings["speed_of_learning"]
                    )  # exercises per day
                    time_for_watching = total_seconds_watched
                    time_delta_completed = datetime.timedelta(
                        seconds=random.randint(
                            int(time_for_watching), int(datediff(date_diff_started, units="seconds"))
                        )
                    )
                    date_completed = datetime.datetime.now() - time_delta_completed

                try:
                    vlog = VideoLog.objects.get(user=facility_user, video_id=video_id)
                except VideoLog.DoesNotExist:

                    logging.info(
                        "Creating video log: %-12s: %-45s (%4.1f%% watched, %d points)%s"
                        % (
                            facility_user.first_name,
                            video["title"],
                            pct_completed,
                            points,
                            " COMPLETE on %s!" % date_completed if pct_completed == 100 else "",
                        )
                    )
                    youtube_id = video.get("youtube_id", video_id)
                    vlog = VideoLog(
                        user=facility_user,
                        video_id=video_id,
                        youtube_id=youtube_id,
                        total_seconds_watched=total_seconds_watched,
                        points=points,
                        complete=(pct_completed == 100.0),
                        completion_timestamp=date_completed,
                        latest_activity_timestamp=date_completed,
                    )
                    try:
                        vlog.save()  # avoid userlog issues
                    except Exception as e:
                        logging.error("Error saving video log: %s" % e)
                        continue

                video_logs.append(vlog)

    return video_logs
Exemple #5
0
def tabular_view(request, report_type="exercise"):
    """Tabular view also gets data server-side."""
    # important for setting the defaults for the coach nav bar

    language = lcode_to_django_lang(request.language)

    facility, group_id, context = coach_nav_context(request, "tabular")

    # Define how students are ordered--used to be as efficient as possible.
    student_ordering = ["last_name", "first_name", "username"]

    # Get a list of topics (sorted) and groups
    topics = [
        get_node_cache("Topic", language=language).get(tid["id"])
        for tid in get_knowledgemap_topics(language=language)
        if report_type.title() in tid["contains"]
    ]
    playlists = Playlist.all()

    (groups, facilities,
     ungrouped_available) = get_accessible_objects_from_logged_in_user(
         request, facility=facility)

    context.update(plotting_metadata_context(request, facility=facility))

    context.update({
        # For translators: the following two translations are nouns
        "report_types": ({
            "value": "exercise",
            "name": _("exercise")
        }, {
            "value": "video",
            "name": _("video")
        }),
        "request_report_type":
        report_type,
        "topics": [{
            "id": t["id"],
            "title": t["title"]
        } for t in topics if t],
        "playlists": [{
            "id": p.id,
            "title": p.title,
            "tag": p.tag
        } for p in playlists if p],
    })

    # get querystring info
    topic_id = request.GET.get("topic", "")
    playlist_id = request.GET.get("playlist", "")
    # No valid data; just show generic
    # Exactly one of topic_id or playlist_id should be present
    if not ((topic_id or playlist_id) and not (topic_id and playlist_id)):
        if playlists:
            messages.add_message(request, WARNING,
                                 _("Please select a playlist."))
        elif topics:
            messages.add_message(request, WARNING, _("Please select a topic."))
        return context

    playlist = (filter(lambda p: p.id == playlist_id, Playlist.all())
                or [None])[0]

    if group_id:
        # Narrow by group
        if group_id == control_panel_api_resources.UNGROUPED_KEY:
            users = FacilityUser.objects.filter(group__isnull=True,
                                                is_teacher=False)
            if facility:
                # filter only those ungrouped students for the facility
                users = users.filter(facility=facility)
                users = users.order_by(*student_ordering)
            else:
                # filter all ungroup students
                users = FacilityUser.objects.filter(
                    group__isnull=True,
                    is_teacher=False).order_by(*student_ordering)

        else:
            users = FacilityUser.objects.filter(
                group=group_id, is_teacher=False).order_by(*student_ordering)

    elif facility:
        # Narrow by facility
        search_groups = [
            groups_dict["groups"] for groups_dict in groups
            if groups_dict["facility"] == facility.id
        ]
        assert len(search_groups) <= 1, "Should only have one or zero matches."

        # Return groups and ungrouped
        search_groups = search_groups[
            0]  # make sure to include ungrouped students
        users = FacilityUser.objects.filter(
            Q(group__in=search_groups) | Q(group=None, facility=facility),
            is_teacher=False).order_by(*student_ordering)

    else:
        # Show all (including ungrouped)
        search_groups = []
        for groups_dict in groups:
            search_groups += groups_dict["groups"]
        users = FacilityUser.objects.filter(
            Q(group__in=search_groups) | Q(group=None),
            is_teacher=False).order_by(*student_ordering)

    # We have enough data to render over a group of students
    # Get type-specific information
    if report_type == "exercise":
        # Fill in exercises
        if topic_id:
            exercises = get_topic_exercises(topic_id=topic_id)
        elif playlist:
            exercises = playlist.get_playlist_entries("Exercise",
                                                      language=language)

        context["exercises"] = exercises

        # More code, but much faster
        exercise_names = [ex["id"] for ex in context["exercises"]]
        # Get students
        context["students"] = []
        exlogs = ExerciseLog.objects \
            .filter(user__in=users, exercise_id__in=exercise_names) \
            .order_by(*["user__%s" % field for field in student_ordering]) \
            .values("user__id", "struggling", "complete", "exercise_id")
        exlogs = list(exlogs)  # force the query to be evaluated

        exlog_idx = 0
        for user in users:
            log_table = {}
            while exlog_idx < len(
                    exlogs) and exlogs[exlog_idx]["user__id"] == user.id:
                log_table[exlogs[exlog_idx]["exercise_id"]] = exlogs[exlog_idx]
                exlog_idx += 1

            context["students"].append({  # this could be DRYer
                "first_name": user.first_name,
                "last_name": user.last_name,
                "username": user.username,
                "name": user.get_name(),
                "id": user.id,
                "exercise_logs": log_table,
            })

    elif report_type == "video":
        # Fill in videos
        if topic_id:
            context["videos"] = get_topic_videos(topic_id=topic_id)
        elif playlist:
            context["videos"] = playlist.get_playlist_entries(
                "Video", language=language)

        # More code, but much faster
        video_ids = [vid["id"] for vid in context["videos"]]
        # Get students
        context["students"] = []
        vidlogs = VideoLog.objects \
            .filter(user__in=users, video_id__in=video_ids) \
            .order_by(*["user__%s" % field for field in student_ordering])\
            .values("user__id", "complete", "video_id", "total_seconds_watched", "points")
        vidlogs = list(vidlogs)  # force the query to be executed now

        vidlog_idx = 0
        for user in users:
            log_table = {}
            while vidlog_idx < len(
                    vidlogs) and vidlogs[vidlog_idx]["user__id"] == user.id:
                log_table[vidlogs[vidlog_idx]
                          ["video_id"]] = vidlogs[vidlog_idx]
                vidlog_idx += 1

            context["students"].append({  # this could be DRYer
                "first_name": user.first_name,
                "last_name": user.last_name,
                "username": user.username,
                "name": user.get_name(),
                "id": user.id,
                "video_logs": log_table,
            })
    else:
        raise Http404(
            _("Unknown report_type: %(report_type)s") %
            {"report_type": report_type})

    # Validate results by showing user messages.
    if not users:
        # 1. check group facility groups
        if len(groups) > 0 and not groups[0]['groups']:
            # 1. No groups available (for facility) and "no students" returned.
            messages.add_message(
                request, WARNING,
                _("No learner accounts have been created for selected facility/group."
                  ))
        elif topic_id and playlist_id:
            # 2. Both topic and playlist are selected.
            messages.add_message(
                request, WARNING,
                _("Please select either a topic or a playlist above, but not both."
                  ))
        elif not topic_id and not playlist_id:
            # 3. Group was selected, but data not queried because a topic or playlist was not selected.
            if playlists:
                # 4. No playlist was selected.
                messages.add_message(request, WARNING,
                                     _("Please select a playlist."))
            elif topics:
                # 5. No topic was selected.
                messages.add_message(request, WARNING,
                                     _("Please select a topic."))
        else:
            # 6. Everything specified, but no users fit the query.
            messages.add_message(
                request, WARNING,
                _("No learner accounts in this group have been created."))
    # End: Validate results by showing user messages.

    log_coach_report_view(request)

    return context
Exemple #6
0
def tabular_view(request, facility, report_type="exercise"):
    """Tabular view also gets data server-side."""
    # Define how students are ordered--used to be as efficient as possible.
    student_ordering = ["last_name", "first_name", "username"]

    # Get a list of topics (sorted) and groups
    topics = [get_node_cache("Topic").get(tid) for tid in get_knowledgemap_topics()]
    (groups, facilities) = get_accessible_objects_from_logged_in_user(request, facility=facility)
    context = plotting_metadata_context(request, facility=facility)
    context.update({
        # For translators: the following two translations are nouns
        "report_types": (_("exercise"), _("video")),
        "request_report_type": report_type,
        "topics": [{"id": t[0]["id"], "title": t[0]["title"]} for t in topics if t],
    })

    # get querystring info
    topic_id = request.GET.get("topic", "")
    # No valid data; just show generic
    if not topic_id or not re.match("^[\w\-]+$", topic_id):
        return context

    group_id = request.GET.get("group", "")
    if group_id:
        # Narrow by group
        users = FacilityUser.objects.filter(
            group=group_id, is_teacher=False).order_by(*student_ordering)

    elif facility:
        # Narrow by facility
        search_groups = [groups_dict["groups"] for groups_dict in groups if groups_dict["facility"] == facility.id]
        assert len(search_groups) <= 1, "Should only have one or zero matches."

        # Return groups and ungrouped
        search_groups = search_groups[0]  # make sure to include ungrouped students
        users = FacilityUser.objects.filter(
            Q(group__in=search_groups) | Q(group=None, facility=facility), is_teacher=False).order_by(*student_ordering)

    else:
        # Show all (including ungrouped)
        for groups_dict in groups:
            search_groups += groups_dict["groups"]
        users = FacilityUser.objects.filter(
            Q(group__in=search_groups) | Q(group=None), is_teacher=False).order_by(*student_ordering)

    # We have enough data to render over a group of students
    # Get type-specific information
    if report_type == "exercise":
        # Fill in exercises
        exercises = get_topic_exercises(topic_id=topic_id)
        exercises = sorted(exercises, key=lambda e: (e["h_position"], e["v_position"]))
        context["exercises"] = exercises

        # More code, but much faster
        exercise_names = [ex["name"] for ex in context["exercises"]]
        # Get students
        context["students"] = []
        exlogs = ExerciseLog.objects \
            .filter(user__in=users, exercise_id__in=exercise_names) \
            .order_by(*["user__%s" % field for field in student_ordering]) \
            .values("user__id", "struggling", "complete", "exercise_id")
        exlogs = list(exlogs)  # force the query to be evaluated

        exlog_idx = 0
        for user in users:
            log_table = {}
            while exlog_idx < len(exlogs) and exlogs[exlog_idx]["user__id"] == user.id:
                log_table[exlogs[exlog_idx]["exercise_id"]] = exlogs[exlog_idx]
                exlog_idx += 1

            context["students"].append({  # this could be DRYer
                "first_name": user.first_name,
                "last_name": user.last_name,
                "username": user.username,
                "name": user.get_name(),
                "id": user.id,
                "exercise_logs": log_table,
            })

    elif report_type == "video":
        # Fill in videos
        context["videos"] = get_topic_videos(topic_id=topic_id)

        # More code, but much faster
        video_ids = [vid["id"] for vid in context["videos"]]
        # Get students
        context["students"] = []
        vidlogs = VideoLog.objects \
            .filter(user__in=users, video_id__in=video_ids) \
            .order_by(*["user__%s" % field for field in student_ordering])\
            .values("user__id", "complete", "video_id", "total_seconds_watched", "points")
        vidlogs = list(vidlogs)  # force the query to be executed now

        vidlog_idx = 0
        for user in users:
            log_table = {}
            while vidlog_idx < len(vidlogs) and vidlogs[vidlog_idx]["user__id"] == user.id:
                log_table[vidlogs[vidlog_idx]["video_id"]] = vidlogs[vidlog_idx]
                vidlog_idx += 1

            context["students"].append({  # this could be DRYer
                "first_name": user.first_name,
                "last_name": user.last_name,
                "username": user.username,
                "name": user.get_name(),
                "id": user.id,
                "video_logs": log_table,
            })

    else:
        raise Http404(_("Unknown report_type: %(report_type)s") % {"report_type": report_type})

    if "facility_user" in request.session:
        try:
            # Log a "begin" and end here
            user = request.session["facility_user"]
            UserLog.begin_user_activity(user, activity_type="coachreport")
            UserLog.update_user_activity(user, activity_type="login")  # to track active login time for teachers
            UserLog.end_user_activity(user, activity_type="coachreport")
        except ValidationError as e:
            # Never report this error; don't want this logging to block other functionality.
            logging.error("Failed to update Teacher userlog activity login: %s" % e)

    return context
Exemple #7
0
def student_view_context(request, xaxis="pct_mastery", yaxis="ex:attempts"):
    """
    Context done separately, to be importable for similar pages.
    """
    user = get_user_from_request(request=request)
    if not user:
        raise Http404("User not found.")

    node_cache = get_node_cache()
    topic_ids = get_knowledgemap_topics()
    topic_ids = topic_ids + [ch["id"] for node in get_topic_tree()["children"] for ch in node["children"] if node["id"] != "math"]
    topics = [node_cache["Topic"][id][0] for id in topic_ids]

    user_id = user.id
    exercise_logs = list(ExerciseLog.objects \
        .filter(user=user) \
        .values("exercise_id", "complete", "points", "attempts", "streak_progress", "struggling", "completion_timestamp"))
    video_logs = list(VideoLog.objects \
        .filter(user=user) \
        .values("video_id", "complete", "total_seconds_watched", "points", "completion_timestamp"))

    exercise_sparklines = dict()
    stats = dict()
    topic_exercises = dict()
    topic_videos = dict()
    exercises_by_topic = dict()
    videos_by_topic = dict()

    # Categorize every exercise log into a "midlevel" exercise
    for elog in exercise_logs:
        if not elog["exercise_id"] in node_cache["Exercise"]:
            # Sometimes KA updates their topic tree and eliminates exercises;
            #   we also want to support 3rd party switching of trees arbitrarily.
            logging.debug("Skip unknown exercise log for %s/%s" % (user_id, elog["exercise_id"]))
            continue

        parent_ids = [topic for ex in node_cache["Exercise"][elog["exercise_id"]] for topic in ex["ancestor_ids"]]
        topic = set(parent_ids).intersection(set(topic_ids))
        if not topic:
            logging.error("Could not find a topic for exercise %s (parents=%s)" % (elog["exercise_id"], parent_ids))
            continue
        topic = topic.pop()
        if not topic in topic_exercises:
            topic_exercises[topic] = get_topic_exercises(path=node_cache["Topic"][topic][0]["path"])
        exercises_by_topic[topic] = exercises_by_topic.get(topic, []) + [elog]

    # Categorize every video log into a "midlevel" exercise.
    for vlog in video_logs:
        if not vlog["video_id"] in node_cache["Video"]:
            # Sometimes KA updates their topic tree and eliminates videos;
            #   we also want to support 3rd party switching of trees arbitrarily.
            logging.debug("Skip unknown video log for %s/%s" % (user_id, vlog["video_id"]))
            continue

        parent_ids = [topic for vid in node_cache["Video"][vlog["video_id"]] for topic in vid["ancestor_ids"]]
        topic = set(parent_ids).intersection(set(topic_ids))
        if not topic:
            logging.error("Could not find a topic for video %s (parents=%s)" % (vlog["video_id"], parent_ids))
            continue
        topic = topic.pop()
        if not topic in topic_videos:
            topic_videos[topic] = get_topic_videos(path=node_cache["Topic"][topic][0]["path"])
        videos_by_topic[topic] = videos_by_topic.get(topic, []) + [vlog]


    # Now compute stats
    for id in topic_ids:#set(topic_exercises.keys()).union(set(topic_videos.keys())):
        n_exercises = len(topic_exercises.get(id, []))
        n_videos = len(topic_videos.get(id, []))

        exercises = exercises_by_topic.get(id, [])
        videos = videos_by_topic.get(id, [])
        n_exercises_touched = len(exercises)
        n_videos_touched = len(videos)

        exercise_sparklines[id] = [el["completion_timestamp"] for el in filter(lambda n: n["complete"], exercises)]

        # total streak currently a pct, but expressed in max 100; convert to
        # proportion (like other percentages here)
        stats[id] = {
            "ex:pct_mastery":      0 if not n_exercises_touched else sum([el["complete"] for el in exercises]) / float(n_exercises),
            "ex:pct_started":      0 if not n_exercises_touched else n_exercises_touched / float(n_exercises),
            "ex:average_points":   0 if not n_exercises_touched else sum([el["points"] for el in exercises]) / float(n_exercises_touched),
            "ex:average_attempts": 0 if not n_exercises_touched else sum([el["attempts"] for el in exercises]) / float(n_exercises_touched),
            "ex:average_streak":   0 if not n_exercises_touched else sum([el["streak_progress"] for el in exercises]) / float(n_exercises_touched) / 100.,
            "ex:total_struggling": 0 if not n_exercises_touched else sum([el["struggling"] for el in exercises]),
            "ex:last_completed": None if not n_exercises_touched else max_none([el["completion_timestamp"] or None for el in exercises]),

            "vid:pct_started":      0 if not n_videos_touched else n_videos_touched / float(n_videos),
            "vid:pct_completed":    0 if not n_videos_touched else sum([vl["complete"] for vl in videos]) / float(n_videos),
            "vid:total_minutes":      0 if not n_videos_touched else sum([vl["total_seconds_watched"] for vl in videos]) / 60.,
            "vid:average_points":   0. if not n_videos_touched else float(sum([vl["points"] for vl in videos]) / float(n_videos_touched)),
            "vid:last_completed": None if not n_videos_touched else max_none([vl["completion_timestamp"] or None for vl in videos]),
        }

    context = plotting_metadata_context(request)

    return {
        "form": context["form"],
        "groups": context["groups"],
        "facilities": context["facilities"],
        "student": user,
        "topics": topics,
        "exercises": topic_exercises,
        "exercise_logs": exercises_by_topic,
        "video_logs": videos_by_topic,
        "exercise_sparklines": exercise_sparklines,
        "no_data": not exercise_logs and not video_logs,
        "stats": stats,
        "stat_defs": [  # this order determines the order of display
            {"key": "ex:pct_mastery",      "title": _("% Mastery"),        "type": "pct"},
            {"key": "ex:pct_started",      "title": _("% Started"),        "type": "pct"},
            {"key": "ex:average_points",   "title": _("Average Points"),   "type": "float"},
            {"key": "ex:average_attempts", "title": _("Average Attempts"), "type": "float"},
            {"key": "ex:average_streak",   "title": _("Average Streak"),   "type": "pct"},
            {"key": "ex:total_struggling", "title": _("Struggling"),       "type": "int"},
            {"key": "ex:last_completed",   "title": _("Last Completed"),   "type": "date"},
            {"key": "vid:pct_completed",   "title": _("% Completed"),      "type": "pct"},
            {"key": "vid:pct_started",     "title": _("% Started"),        "type": "pct"},
            {"key": "vid:total_minutes",   "title": _("Average Minutes Watched"),"type": "float"},
            {"key": "vid:average_points",  "title": _("Average Points"),   "type": "float"},
            {"key": "vid:last_completed",  "title": _("Last Completed"),   "type": "date"},
        ]
    }
Exemple #8
0
def generate_fake_video_logs(facility_user=None,
                             topics=topics,
                             start_date=datetime.datetime.now() -
                             datetime.timedelta(days=30 * 6)):
    """Add video logs for the given topics, for each of the given users.
    If no users are given, they are created.
    If no topics exist, they are taken from the list at the top of this file."""

    date_diff = datetime.datetime.now() - start_date
    video_logs = []

    # It's not a user: probably a list.
    # Recursive case
    if not hasattr(facility_user, "username"):
        # It's NONE :-/ generate the users first!
        if not facility_user:
            (facility_user, _, _) = generate_fake_facility_users()

        for topic in topics:
            for user in facility_user:
                video_logs.append(
                    generate_fake_video_logs(facility_user=user,
                                             topics=[topic],
                                             start_date=start_date))

    # Actually generate!
    else:
        # First, make videos for the associated logs

        # Then make some unassociated videos, to simulate both exploration
        #   and watching videos without finishing.
        # Get (or create) user type
        try:
            user_settings = json.loads(facility_user.notes)
        except:
            user_settings = sample_user_settings()
            facility_user.notes = json.dumps(user_settings)
            try:
                facility_user.save()
            except Exception as e:
                logging.error("Error saving facility user: %s" % e)

        date_diff_started = datetime.timedelta(
            seconds=datediff(date_diff, units="seconds") *
            user_settings["time_in_program"]
        )  # when this user started in the program, relative to NOW

        for topic in topics:
            videos = get_topic_videos(topic_id=topic)

            exercises = get_topic_exercises(topic_id=topic)
            exercise_ids = [
                ex["id"] if "id" in ex else ex['name'] for ex in exercises
            ]
            exercise_logs = ExerciseLog.objects.filter(user=facility_user,
                                                       id__in=exercise_ids)

            # Probability of watching a video, irrespective of the context
            p_video_outer = probability_of("video",
                                           user_settings=user_settings)
            logging.debug(
                "# videos: %d; p(videos)=%4.3f, user settings: %s\n" %
                (len(videos), p_video_outer, json.dumps(user_settings)))

            for video in videos:
                p_completed = probability_of("completed",
                                             user_settings=user_settings)

                # If we're just doing random videos, fine.
                # If these videos relate to exercises, then suppress non-exercise-related videos
                #   for this user.
                p_video = p_video_outer  # start with the context-free value
                did_exercise = False
                if exercise_logs.count() > 0:
                    # 5x less likely to watch a video if you haven't done the exercise,
                    if "related_exercise" not in video:
                        p_video /= 5  # suppress

                    # 5x more likely to watch a video if they've done the exercise
                    # 2x more likely to have finished it.
                    else:
                        exercise_log = ExerciseLog.objects.filter(
                            user=facility_user,
                            id=video["related_exercise"]["id"])
                        did_exercise = exercise_log.count() != 0
                        if did_exercise:
                            p_video *= 5
                            p_completed *= 2

                # Do the sampling
                if p_video < random.random():
                    continue
                    # didn't watch it
                elif p_completed > random.random():
                    pct_completed = 100.
                else:  # Slower students will use videos more.  Effort also important.
                    pct_completed = 100. * min(
                        1.,
                        sqrt(random.random() *
                             sqrt(user_settings["effort_level"] *
                                  user_settings["time_in_program"] /
                                  sqrt(user_settings["speed_of_learning"]))))
                # Compute quantities based on sample
                total_seconds_watched = int(video["duration"] * pct_completed /
                                            100.)
                points = int(750 * pct_completed / 100.)

                # Choose a rate of videos, based on their effort level.
                #   Compute the latest possible start time.
                #   Then sample a start time between their start time
                #   and the latest possible start_time
                if did_exercise:
                    # More jitter if you learn fast, less jitter if you try harder (more diligent)
                    date_jitter = datetime.timedelta(days=max(
                        0,
                        random.gauss(
                            1, user_settings["speed_of_learning"] /
                            user_settings["effort_level"])))
                    date_completed = exercise_log[
                        0].completion_timestamp - date_jitter
                else:
                    rate_of_videos = 0.66 * user_settings[
                        "effort_level"] + 0.33 * user_settings[
                            "speed_of_learning"]  # exercises per day
                    time_for_watching = total_seconds_watched
                    time_delta_completed = datetime.timedelta(
                        seconds=random.randint(
                            int(time_for_watching),
                            int(datediff(date_diff_started, units="seconds"))))
                    date_completed = datetime.datetime.now(
                    ) - time_delta_completed

                try:
                    vlog = VideoLog.objects.get(user=facility_user,
                                                video_id=video["id"])
                except VideoLog.DoesNotExist:

                    logging.info(
                        "Creating video log: %-12s: %-45s (%4.1f%% watched, %d points)%s"
                        % (
                            facility_user.first_name,
                            video["title"],
                            pct_completed,
                            points,
                            " COMPLETE on %s!" %
                            date_completed if pct_completed == 100 else "",
                        ))
                    vlog = VideoLog(
                        user=facility_user,
                        video_id=video["id"],
                        youtube_id=video["youtube_id"],
                        total_seconds_watched=total_seconds_watched,
                        points=points,
                        complete=(pct_completed == 100.),
                        completion_timestamp=date_completed,
                    )
                    try:
                        vlog.save()  # avoid userlog issues
                    except Exception as e:
                        logging.error("Error saving video log: %s" % e)
                        continue

                video_logs.append(vlog)

    return video_logs