Beispiel #1
0
    def refresh_topic_cache_wrapper_fn(request, cached_nodes={}, *args, **kwargs):
        """
        Centralized logic for how to refresh the topic cache, for each type of object.

        When the object is desired to be used, this code runs to refresh data,
        balancing between correctness and efficiency.
        """
        if not cached_nodes:
            cached_nodes = {"topics": topicdata.TOPICS}

        for node in cached_nodes.values():
            if not node:
                continue
            has_children = bool(node.get("children"))
            has_grandchildren = has_children and any(["children" in child for child in node["children"]])

            # Propertes not yet marked
            if node["kind"] == "Video":
                if force or "urls" not in node:  #
                    #stamp_urls_on_video(node, force=force)  # will be done by force below
                    recount_videos_and_invalidate_parents(node["parent"], force=True)

            elif node["kind"] == "Topic":
                if not force and (not has_grandchildren or "nvideos_local" not in node):
                    # if forcing, would do this here, and again below--so skip if forcing.
                    logging.debug("cache miss: stamping urls on videos")
                    for video in topic_tools.get_topic_videos(path=node["path"]):
                        stamp_urls_on_video(video, force=force)
                recount_videos_and_invalidate_parents(node, force=force or not has_grandchildren)

        kwargs.update(cached_nodes)
        return handler(request, *args, **kwargs)
    def handle(self, *args, **options):
        if settings.CENTRAL_SERVER:
            raise CommandError("This must only be run on the distributed server.")

        if not options["lang_code"]:
            raise CommandError("You must specify a language code.")

        # Get list of videos
        lang_code = lcode_to_ietf(options["lang_code"])
        video_map = get_dubbed_video_map(lang_code) or {}
        video_ids = options["video_ids"].split(",") if options["video_ids"] else None
        video_ids = video_ids or (
            [vid["id"] for vid in get_topic_videos(topic_id=options["topic_id"])] if options["topic_id"] else None
        )
        video_ids = video_ids or video_map.keys()

        # Download the videos
        for video_id in video_ids:
            if video_id in video_map:
                youtube_id = video_map[video_id]

            elif video_id in video_map.values():
                # Perhaps they sent in a youtube ID?  We can handle that!
                youtube_id = video_id
            else:
                logging.error("No mapping for video_id=%s; skipping" % video_id)
                continue

            try:
                scrape_video(youtube_id=youtube_id, format=options["format"], force=options["force"])
                # scrape_thumbnail(youtube_id=youtube_id)
                logging.info("Access video %s at %s" % (youtube_id, get_node_cache("Video")[video_id][0]["path"]))
            except Exception as e:
                logging.error("Failed to download video %s: %s" % (youtube_id, e))

        logging.info("Process complete.")
Beispiel #3
0
def tabular_view(request, facility, report_type="exercise"):
    """Tabular view also gets data server-side."""

    # Get a list of topics (sorted) and groups
    topics = get_knowledgemap_topics()
    (groups, facilities) = get_accessible_objects_from_logged_in_user(request)
    context = plotting_metadata_context(request, facility=facility)
    context.update({
        "report_types": ("exercise", "video"),
        "request_report_type": report_type,
        "topics": topics,
    })

    # get querystring info
    topic_id = request.GET.get("topic", "")
    # No valid data; just show generic
    if not topic_id or not re.match("^[\w\-]+$", topic_id):
        return context

    group_id = request.GET.get("group", "")
    if group_id:
        # Narrow by group
        users = FacilityUser.objects.filter(group=group_id,
                                            is_teacher=False).order_by(
                                                "last_name", "first_name")

    elif facility:
        # Narrow by facility
        search_groups = [
            dict["groups"] for dict in groups
            if dict["facility"] == facility.id
        ]
        assert len(search_groups) <= 1, "should only have one or zero matches."

        # Return groups and ungrouped
        search_groups = search_groups[
            0]  # make sure to include ungrouped students
        users = FacilityUser.objects.filter(
            Q(group__in=search_groups) | Q(group=None, facility=facility),
            is_teacher=False).order_by("last_name", "first_name")

    else:
        # Show all (including ungrouped)
        for groups_dict in groups:
            search_groups += groups_dict["groups"]
        users = FacilityUser.objects.filter(
            Q(group__in=search_groups) | Q(group=None),
            is_teacher=False).order_by("last_name", "first_name")

    # We have enough data to render over a group of students
    # Get type-specific information
    if report_type == "exercise":
        # Fill in exercises
        exercises = get_topic_exercises(topic_id=topic_id)
        exercises = sorted(exercises,
                           key=lambda e: (e["h_position"], e["v_position"]))
        context["exercises"] = exercises

        # More code, but much faster
        exercise_names = [ex["name"] for ex in context["exercises"]]
        # Get students
        context["students"] = []
        exlogs = ExerciseLog.objects \
            .filter(user__in=users, exercise_id__in=exercise_names) \
            .order_by("user__last_name", "user__first_name")\
            .values("user__id", "struggling", "complete", "exercise_id")
        exlogs = list(exlogs)  # force the query to be evaluated

        exlog_idx = 0
        for user in users:
            log_table = {}
            while exlog_idx < len(
                    exlogs) and exlogs[exlog_idx]["user__id"] == user.id:
                log_table[exlogs[exlog_idx]["exercise_id"]] = exlogs[exlog_idx]
                exlog_idx += 1

            context["students"].append({  # this could be DRYer
                "first_name": user.first_name,
                "last_name": user.last_name,
                "username": user.username,
                "name": user.get_name(),
                "id": user.id,
                "exercise_logs": log_table,
            })

    elif report_type == "video":
        # Fill in videos
        context["videos"] = get_topic_videos(topic_id=topic_id)

        # More code, but much faster
        video_ids = [vid["youtube_id"] for vid in context["videos"]]
        # Get students
        context["students"] = []
        vidlogs = VideoLog.objects \
            .filter(user__in=users, youtube_id__in=video_ids) \
            .order_by("user__last_name", "user__first_name")\
            .values("user__id", "complete", "youtube_id", "total_seconds_watched", "points")
        vidlogs = list(vidlogs)  # force the query to be executed now

        vidlog_idx = 0
        for user in users:
            log_table = {}
            while vidlog_idx < len(
                    vidlogs) and vidlogs[vidlog_idx]["user__id"] == user.id:
                log_table[vidlogs[vidlog_idx]
                          ["youtube_id"]] = vidlogs[vidlog_idx]
                vidlog_idx += 1

            context["students"].append({  # this could be DRYer
                "first_name": user.first_name,
                "last_name": user.last_name,
                "username": user.username,
                "name": user.get_name(),
                "id": user.id,
                "video_logs": log_table,
            })

    else:
        raise Http404("Unknown report_type: %s" % report_type)

    if "facility_user" in request.session:
        try:
            # Log a "begin" and end here
            user = request.session["facility_user"]
            UserLog.begin_user_activity(user, activity_type="coachreport")
            UserLog.update_user_activity(
                user, activity_type="login"
            )  # to track active login time for teachers
            UserLog.end_user_activity(user, activity_type="coachreport")
        except ValidationError as e:
            # Never report this error; don't want this logging to block other functionality.
            logging.error(
                "Failed to update Teacher userlog activity login: %s" % e)

    return context
Beispiel #4
0
def student_view_context(request, xaxis="pct_mastery", yaxis="ex:attempts"):
    """
    Context done separately, to be importable for similar pages.
    """
    user = get_user_from_request(request=request)
    topic_slugs = [t["id"] for t in get_knowledgemap_topics()]
    topics = [NODE_CACHE["Topic"][slug] for slug in topic_slugs]

    user_id = user.id
    exercise_logs = list(ExerciseLog.objects \
        .filter(user=user) \
        .values("exercise_id", "complete", "points", "attempts", "streak_progress", "struggling", "completion_timestamp"))
    video_logs = list(VideoLog.objects \
        .filter(user=user) \
        .values("youtube_id", "complete", "total_seconds_watched", "points", "completion_timestamp"))

    exercise_sparklines = dict()
    stats = dict()
    topic_exercises = dict()
    topic_videos = dict()
    exercises_by_topic = dict()
    videos_by_topic = dict()

    # Categorize every exercise log into a "midlevel" exercise
    for elog in exercise_logs:
        parents = NODE_CACHE["Exercise"][elog["exercise_id"]]["parents"]
        topic = set(parents).intersection(set(topic_slugs))
        if not topic:
            logging.error(
                "Could not find a topic for exercise %s (parents=%s)" %
                (elog["exercise_id"], parents))
            continue
        topic = topic.pop()
        if not topic in topic_exercises:
            topic_exercises[topic] = get_topic_exercises(
                path=NODE_CACHE["Topic"][topic]["path"])
        exercises_by_topic[topic] = exercises_by_topic.get(topic, []) + [elog]

    # Categorize every video log into a "midlevel" exercise.
    for vlog in video_logs:
        parents = NODE_CACHE["Video"][ID2SLUG_MAP[
            vlog["youtube_id"]]]["parents"]
        topic = set(parents).intersection(set(topic_slugs))
        if not topic:
            logging.error("Could not find a topic for video %s (parents=%s)" %
                          (vlog["youtube_id"], parents))
            continue
        topic = topic.pop()
        if not topic in topic_videos:
            topic_videos[topic] = get_topic_videos(
                path=NODE_CACHE["Topic"][topic]["path"])
        videos_by_topic[topic] = videos_by_topic.get(topic, []) + [vlog]

    # Now compute stats
    for topic in topic_slugs:  #set(topic_exercises.keys()).union(set(topic_videos.keys())):
        n_exercises = len(topic_exercises.get(topic, []))
        n_videos = len(topic_videos.get(topic, []))

        exercises = exercises_by_topic.get(topic, [])
        videos = videos_by_topic.get(topic, [])
        n_exercises_touched = len(exercises)
        n_videos_touched = len(videos)

        exercise_sparklines[topic] = [
            el["completion_timestamp"]
            for el in filter(lambda n: n["complete"], exercises)
        ]

        # total streak currently a pct, but expressed in max 100; convert to
        # proportion (like other percentages here)
        stats[topic] = {
            "ex:pct_mastery":
            0 if not n_exercises_touched else
            sum([el["complete"] for el in exercises]) / float(n_exercises),
            "ex:pct_started":
            0 if not n_exercises_touched else n_exercises_touched /
            float(n_exercises),
            "ex:average_points":
            0 if not n_exercises_touched else
            sum([el["points"]
                 for el in exercises]) / float(n_exercises_touched),
            "ex:average_attempts":
            0 if not n_exercises_touched else
            sum([el["attempts"]
                 for el in exercises]) / float(n_exercises_touched),
            "ex:average_streak":
            0 if not n_exercises_touched else
            sum([el["streak_progress"]
                 for el in exercises]) / float(n_exercises_touched) / 100.,
            "ex:total_struggling":
            0 if not n_exercises_touched else sum(
                [el["struggling"] for el in exercises]),
            "ex:last_completed":
            None if not n_exercises_touched else max_none(
                [el["completion_timestamp"] or None for el in exercises]),
            "vid:pct_started":
            0 if not n_videos_touched else n_videos_touched / float(n_videos),
            "vid:pct_completed":
            0 if not n_videos_touched else
            sum([vl["complete"] for vl in videos]) / float(n_videos),
            "vid:total_minutes":
            0 if not n_videos_touched else
            sum([vl["total_seconds_watched"] for vl in videos]) / 60.,
            "vid:average_points":
            0. if not n_videos_touched else float(
                sum([vl["points"]
                     for vl in videos]) / float(n_videos_touched)),
            "vid:last_completed":
            None if not n_videos_touched else max_none(
                [vl["completion_timestamp"] or None for vl in videos]),
        }

    context = plotting_metadata_context(request)

    return {
        "form":
        context["form"],
        "groups":
        context["groups"],
        "facilities":
        context["facilities"],
        "student":
        user,
        "topics":
        topics,
        "exercises":
        topic_exercises,
        "exercise_logs":
        exercises_by_topic,
        "video_logs":
        videos_by_topic,
        "exercise_sparklines":
        exercise_sparklines,
        "no_data":
        not exercise_logs and not video_logs,
        "stats":
        stats,
        "stat_defs": [  # this order determines the order of display
            {
                "key": "ex:pct_mastery",
                "title": _("% Mastery"),
                "type": "pct"
            },
            {
                "key": "ex:pct_started",
                "title": _("% Started"),
                "type": "pct"
            },
            {
                "key": "ex:average_points",
                "title": _("Average Points"),
                "type": "float"
            },
            {
                "key": "ex:average_attempts",
                "title": _("Average Attempts"),
                "type": "float"
            },
            {
                "key": "ex:average_streak",
                "title": _("Average Streak"),
                "type": "pct"
            },
            {
                "key": "ex:total_struggling",
                "title": _("Struggling"),
                "type": "int"
            },
            {
                "key": "ex:last_completed",
                "title": _("Last Completed"),
                "type": "date"
            },
            {
                "key": "vid:pct_completed",
                "title": _("% Completed"),
                "type": "pct"
            },
            {
                "key": "vid:pct_started",
                "title": _("% Started"),
                "type": "pct"
            },
            {
                "key": "vid:total_minutes",
                "title": _("Average Minutes Watched"),
                "type": "float"
            },
            {
                "key": "vid:average_points",
                "title": _("Average Points"),
                "type": "float"
            },
            {
                "key": "vid:last_completed",
                "title": _("Last Completed"),
                "type": "date"
            },
        ]
    }
Beispiel #5
0
def tabular_view(request, facility, report_type="exercise"):
    """Tabular view also gets data server-side."""

    # Get a list of topics (sorted) and groups
    topics = get_knowledgemap_topics()
    (groups, facilities) = get_accessible_objects_from_logged_in_user(request)
    context = plotting_metadata_context(request, facility=facility)
    context.update({
        "report_types": ("exercise", "video"),
        "request_report_type": report_type,
        "topics": topics,
    })

    # get querystring info
    topic_id = request.GET.get("topic", "")
    # No valid data; just show generic
    if not topic_id or not re.match("^[\w\-]+$", topic_id):
        return context

    group_id = request.GET.get("group", "")
    if group_id:
        # Narrow by group
        users = FacilityUser.objects.filter(
            group=group_id, is_teacher=False).order_by("last_name", "first_name")

    elif facility:
        # Narrow by facility
        search_groups = [dict["groups"] for dict in groups if dict["facility"] == facility.id]
        assert len(search_groups) <= 1, "should only have one or zero matches."

        # Return groups and ungrouped
        search_groups = search_groups[0]  # make sure to include ungrouped students
        users = FacilityUser.objects.filter(
            Q(group__in=search_groups) | Q(group=None, facility=facility), is_teacher=False).order_by("last_name", "first_name")

    else:
        # Show all (including ungrouped)
        for groups_dict in groups:
            search_groups += groups_dict["groups"]
        users = FacilityUser.objects.filter(
            Q(group__in=search_groups) | Q(group=None), is_teacher=False).order_by("last_name", "first_name")

    # We have enough data to render over a group of students
    # Get type-specific information
    if report_type == "exercise":
        # Fill in exercises
        exercises = get_topic_exercises(topic_id=topic_id)
        exercises = sorted(exercises, key=lambda e: (e["h_position"], e["v_position"]))
        context["exercises"] = exercises

        # More code, but much faster
        exercise_names = [ex["name"] for ex in context["exercises"]]
        # Get students
        context["students"] = []
        exlogs = ExerciseLog.objects \
            .filter(user__in=users, exercise_id__in=exercise_names) \
            .order_by("user__last_name", "user__first_name")\
            .values("user__id", "struggling", "complete", "exercise_id")
        exlogs = list(exlogs)  # force the query to be evaluated

        exlog_idx = 0
        for user in users:
            log_table = {}
            while exlog_idx < len(exlogs) and exlogs[exlog_idx]["user__id"] == user.id:
                log_table[exlogs[exlog_idx]["exercise_id"]] = exlogs[exlog_idx]
                exlog_idx += 1

            context["students"].append({  # this could be DRYer
                "first_name": user.first_name,
                "last_name": user.last_name,
                "username": user.username,
                "name": user.get_name(),
                "id": user.id,
                "exercise_logs": log_table,
            })

    elif report_type == "video":
        # Fill in videos
        context["videos"] = get_topic_videos(topic_id=topic_id)

        # More code, but much faster
        video_ids = [vid["youtube_id"] for vid in context["videos"]]
        # Get students
        context["students"] = []
        vidlogs = VideoLog.objects \
            .filter(user__in=users, youtube_id__in=video_ids) \
            .order_by("user__last_name", "user__first_name")\
            .values("user__id", "complete", "youtube_id", "total_seconds_watched", "points")
        vidlogs = list(vidlogs)  # force the query to be executed now

        vidlog_idx = 0
        for user in users:
            log_table = {}
            while vidlog_idx < len(vidlogs) and vidlogs[vidlog_idx]["user__id"] == user.id:
                log_table[vidlogs[vidlog_idx]["youtube_id"]] = vidlogs[vidlog_idx]
                vidlog_idx += 1

            context["students"].append({  # this could be DRYer
                "first_name": user.first_name,
                "last_name": user.last_name,
                "username": user.username,
                "name": user.get_name(),
                "id": user.id,
                "video_logs": log_table,
            })

    else:
        raise Http404("Unknown report_type: %s" % report_type)

    if "facility_user" in request.session:
        try:
            # Log a "begin" and end here
            user = request.session["facility_user"]
            UserLog.begin_user_activity(user, activity_type="coachreport")
            UserLog.update_user_activity(user, activity_type="login")  # to track active login time for teachers
            UserLog.end_user_activity(user, activity_type="coachreport")
        except ValidationError as e:
            # Never report this error; don't want this logging to block other functionality.
            logging.error("Failed to update Teacher userlog activity login: %s" % e)

    return context
Beispiel #6
0
def student_view_context(request, xaxis="pct_mastery", yaxis="ex:attempts"):
    """
    Context done separately, to be importable for similar pages.
    """
    user = get_user_from_request(request=request)
    topic_slugs = [t["id"] for t in get_knowledgemap_topics()]
    topics = [NODE_CACHE["Topic"][slug] for slug in topic_slugs]

    user_id = user.id
    exercise_logs = list(ExerciseLog.objects \
        .filter(user=user) \
        .values("exercise_id", "complete", "points", "attempts", "streak_progress", "struggling", "completion_timestamp"))
    video_logs = list(VideoLog.objects \
        .filter(user=user) \
        .values("youtube_id", "complete", "total_seconds_watched", "points", "completion_timestamp"))

    exercise_sparklines = dict()
    stats = dict()
    topic_exercises = dict()
    topic_videos = dict()
    exercises_by_topic = dict()
    videos_by_topic = dict()

    # Categorize every exercise log into a "midlevel" exercise
    for elog in exercise_logs:
        parents = NODE_CACHE["Exercise"][elog["exercise_id"]]["parents"]
        topic = set(parents).intersection(set(topic_slugs))
        if not topic:
            logging.error("Could not find a topic for exercise %s (parents=%s)" % (elog["exercise_id"], parents))
            continue
        topic = topic.pop()
        if not topic in topic_exercises:
            topic_exercises[topic] = get_topic_exercises(path=NODE_CACHE["Topic"][topic]["path"])
        exercises_by_topic[topic] = exercises_by_topic.get(topic, []) + [elog]

    # Categorize every video log into a "midlevel" exercise.
    for vlog in video_logs:
        parents = NODE_CACHE["Video"][ID2SLUG_MAP[vlog["youtube_id"]]]["parents"]
        topic = set(parents).intersection(set(topic_slugs))
        if not topic:
            logging.error("Could not find a topic for video %s (parents=%s)" % (vlog["youtube_id"], parents))
            continue
        topic = topic.pop()
        if not topic in topic_videos:
            topic_videos[topic] = get_topic_videos(path=NODE_CACHE["Topic"][topic]["path"])
        videos_by_topic[topic] = videos_by_topic.get(topic, []) + [vlog]


    # Now compute stats
    for topic in topic_slugs:#set(topic_exercises.keys()).union(set(topic_videos.keys())):
        n_exercises = len(topic_exercises.get(topic, []))
        n_videos = len(topic_videos.get(topic, []))

        exercises = exercises_by_topic.get(topic, [])
        videos = videos_by_topic.get(topic, [])
        n_exercises_touched = len(exercises)
        n_videos_touched = len(videos)

        exercise_sparklines[topic] = [el["completion_timestamp"] for el in filter(lambda n: n["complete"], exercises)]

         # total streak currently a pct, but expressed in max 100; convert to
         # proportion (like other percentages here)
        stats[topic] = {
            "ex:pct_mastery":      0 if not n_exercises_touched else sum([el["complete"] for el in exercises]) / float(n_exercises),
            "ex:pct_started":      0 if not n_exercises_touched else n_exercises_touched / float(n_exercises),
            "ex:average_points":   0 if not n_exercises_touched else sum([el["points"] for el in exercises]) / float(n_exercises_touched),
            "ex:average_attempts": 0 if not n_exercises_touched else sum([el["attempts"] for el in exercises]) / float(n_exercises_touched),
            "ex:average_streak":   0 if not n_exercises_touched else sum([el["streak_progress"] for el in exercises]) / float(n_exercises_touched) / 100.,
            "ex:total_struggling": 0 if not n_exercises_touched else sum([el["struggling"] for el in exercises]),
            "ex:last_completed": None if not n_exercises_touched else max_none([el["completion_timestamp"] or None for el in exercises]),

            "vid:pct_started":      0 if not n_videos_touched else n_videos_touched / float(n_videos),
            "vid:pct_completed":    0 if not n_videos_touched else sum([vl["complete"] for vl in videos]) / float(n_videos),
            "vid:total_minutes":      0 if not n_videos_touched else sum([vl["total_seconds_watched"] for vl in videos]) / 60.,
            "vid:average_points":   0. if not n_videos_touched else float(sum([vl["points"] for vl in videos]) / float(n_videos_touched)),
            "vid:last_completed": None if not n_videos_touched else max_none([vl["completion_timestamp"] or None for vl in videos]),
        }

    context = plotting_metadata_context(request)

    return {
        "form": context["form"],
        "groups": context["groups"],
        "facilities": context["facilities"],
        "student": user,
        "topics": topics,
        "exercises": topic_exercises,
        "exercise_logs": exercises_by_topic,
        "video_logs": videos_by_topic,
        "exercise_sparklines": exercise_sparklines,
        "no_data": not exercise_logs and not video_logs,
        "stats": stats,
        "stat_defs": [  # this order determines the order of display
            {"key": "ex:pct_mastery",      "title": _("% Mastery"),        "type": "pct"},
            {"key": "ex:pct_started",      "title": _("% Started"),        "type": "pct"},
            {"key": "ex:average_points",   "title": _("Average Points"),   "type": "float"},
            {"key": "ex:average_attempts", "title": _("Average Attempts"), "type": "float"},
            {"key": "ex:average_streak",   "title": _("Average Streak"),   "type": "pct"},
            {"key": "ex:total_struggling", "title": _("Struggling"),       "type": "int"},
            {"key": "ex:last_completed",   "title": _("Last Completed"),   "type": "date"},
            {"key": "vid:pct_completed",   "title": _("% Completed"),      "type": "pct"},
            {"key": "vid:pct_started",     "title": _("% Started"),        "type": "pct"},
            {"key": "vid:total_minutes",   "title": _("Average Minutes Watched"),"type": "float"},
            {"key": "vid:average_points",  "title": _("Average Points"),   "type": "float"},
            {"key": "vid:last_completed",  "title": _("Last Completed"),   "type": "date"},
        ]
    }
Beispiel #7
0
import json
import os

import settings
from shared import topic_tools
from shared.videos import get_video_urls, get_video_counts


TOPICS          = topic_tools.get_topic_tree()
NODE_CACHE      = topic_tools.get_node_cache()
ID2SLUG_MAP     = topic_tools.get_id2slug_map()

# Add initial video counts
get_video_counts(topic=TOPICS, videos_path=settings.CONTENT_ROOT)

# Compute video URLs.  Must use videos from topics, as the NODE_CACHE doesn't contain all video objects. :-/
for video in topic_tools.get_topic_videos(path="/"):
    video["available"] = video["on_disk"] or bool(settings.BACKUP_VIDEO_SOURCE)
    (video["stream_url"], video["thumbnail_url"], video["subtitles_url"]) = get_video_urls(video["youtube_id"], "mp4", video["on_disk"])

def generate_fake_video_logs(facility_user=None, topics=topics, start_date=datetime.datetime.now() - datetime.timedelta(days=30 * 6)):
    """Add video logs for the given topics, for each of the given users.
    If no users are given, they are created.
    If no topics exist, they are taken from the list at the top of this file."""

    own_device = Device.get_own_device()
    date_diff = datetime.datetime.now() - start_date
    video_logs = []

    # It's not a user: probably a list.
    # Recursive case
    if not hasattr(facility_user, "username"):
        # It's NONE :-/ generate the users first!
        if not facility_user:
            (facility_user, _, _) = generate_fake_facility_users()

        for topic in topics:
            for user in facility_user:
                video_logs.append(generate_fake_video_logs(facility_user=user, topics=[topic], start_date=start_date))

    # Actually generate!
    else:
        # First, make videos for the associated logs

        # Then make some unassociated videos, to simulate both exploration
        #   and watching videos without finishing.
        # Get (or create) user type
        try:
            user_settings = json.loads(facility_user.notes)
        except:
            user_settings = sample_user_settings()
            facility_user.notes = json.dumps(user_settings)
            facility_user.save()

        date_diff_started = datetime.timedelta(seconds=datediff(date_diff, units="seconds") * user_settings["time_in_program"])  # when this user started in the program, relative to NOW

        for topic in topics:
            videos = get_topic_videos(topic_id=topic)

            exercises = get_topic_exercises(topic_id=topic)
            exercise_ids = [ex["id"] if "id" in ex else ex['name'] for ex in exercises]
            exercise_logs = ExerciseLog.objects.filter(user=facility_user, id__in=exercise_ids)

            # Probability of watching a video, irrespective of the context
            p_video_outer = probability_of("video", user_settings=user_settings)
            logging.debug("# videos: %d; p(videos)=%4.3f, user settings: %s\n" % (len(videos), p_video_outer, json.dumps(user_settings)))

            for video in videos:
                p_completed = probability_of("completed", user_settings=user_settings)

                # If we're just doing random videos, fine.
                # If these videos relate to exercises, then suppress non-exercise-related videos
                #   for this user.
                p_video = p_video_outer  # start with the context-free value
                did_exercise = False
                if exercise_logs.count() > 0:
                    # 5x less likely to watch a video if you haven't done the exercise,
                    if "related_exercise" not in video:
                        p_video /= 5  # suppress

                    # 5x more likely to watch a video if they've done the exercise
                    # 2x more likely to have finished it.
                    else:
                        exercise_log = ExerciseLog.objects.filter(user=facility_user, id=video["related_exercise"]["id"])
                        did_exercise = exercise_log.count() != 0
                        if did_exercise:
                            p_video *= 5
                            p_completed *= 2

                # Do the sampling
                if p_video < random.random():
                    continue
                    # didn't watch it
                elif p_completed > random.random():
                    pct_completed = 100.
                else:      # Slower students will use videos more.  Effort also important.
                    pct_completed = 100. * min(1., sqrt(random.random() * sqrt(user_settings["effort_level"] * user_settings["time_in_program"] / sqrt(user_settings["speed_of_learning"]))))
                # Compute quantities based on sample
                total_seconds_watched = int(video["duration"] * pct_completed / 100.)
                points = int(750 * pct_completed / 100.)

                # Choose a rate of videos, based on their effort level.
                #   Compute the latest possible start time.
                #   Then sample a start time between their start time
                #   and the latest possible start_time
                if did_exercise:
                    # More jitter if you learn fast, less jitter if you try harder (more diligent)
                    date_jitter = datetime.timedelta(days=max(0, random.gauss(1, user_settings["speed_of_learning"] / user_settings["effort_level"])))
                    date_completed = exercise_log[0].completion_timestamp - date_jitter
                else:
                    rate_of_videos = 0.66 * user_settings["effort_level"] + 0.33 * user_settings["speed_of_learning"]  # exercises per day
                    time_for_watching = total_seconds_watched
                    time_delta_completed = datetime.timedelta(seconds=random.randint(int(time_for_watching), int(datediff(date_diff_started, units="seconds"))))
                    date_completed = datetime.datetime.now() - time_delta_completed

                try:
                    vlog = VideoLog.objects.get(user=facility_user, video_id=video["id"])
                except VideoLog.DoesNotExist:

                    logging.info("Creating video log: %-12s: %-45s (%4.1f%% watched, %d points)%s" % (
                        facility_user.first_name,
                        video["title"],
                        pct_completed,
                        points,
                        " COMPLETE on %s!" % date_completed if pct_completed == 100 else "",
                    ))
                    vlog = VideoLog(
                        user=facility_user,
                        video_id=video["id"],
                        youtube_id=video["youtube_id"],
                        total_seconds_watched=total_seconds_watched,
                        points=points,
                        complete=(pct_completed == 100.),
                        completion_timestamp=date_completed,
                    )
                    vlog.save(update_userlog=False)  # avoid userlog issues

                video_logs.append(vlog)

    return video_logs
Beispiel #9
0
def generate_fake_video_logs(facility_user=None, topics=topics, start_date=datetime.datetime.now() - datetime.timedelta(days=30 * 6)):
    """Add video logs for the given topics, for each of the given users.
    If no users are given, they are created.
    If no topics exist, they are taken from the list at the top of this file."""

    own_device = Device.get_own_device()
    date_diff = datetime.datetime.now() - start_date
    video_logs = []

    # It's not a user: probably a list.
    # Recursive case
    if not hasattr(facility_user, "username"):
        # It's NONE :-/ generate the users first!
        if not facility_user:
            (facility_user, _, _) = generate_fake_facility_users()

        for topic in topics:
            for user in facility_user:
                video_logs.append(generate_fake_video_logs(facility_user=user, topics=[topic], start_date=start_date))

    # Actually generate!
    else:
        # First, make videos for the associated logs

        # Then make some unassociated videos, to simulate both exploration
        #   and watching videos without finishing.
        # Get (or create) user type
        try:
            user_settings = json.loads(facility_user.notes)
        except:
            user_settings = sample_user_settings()
            facility_user.notes = json.dumps(user_settings)
            facility_user.save()

        date_diff_started = datetime.timedelta(seconds=datediff(date_diff, units="seconds") * user_settings["time_in_program"])  # when this user started in the program, relative to NOW

        for topic in topics:
            videos = get_topic_videos(topic_id=topic)

            exercises = get_topic_exercises(topic_id=topic)
            exercise_ids = [ex["id"] if "id" in ex else ex['name'] for ex in exercises]
            exercise_logs = ExerciseLog.objects.filter(user=facility_user, id__in=exercise_ids)

            # Probability of watching a video, irrespective of the context
            p_video_outer = probability_of("video", user_settings=user_settings)
            logging.debug("# videos: %d; p(videos)=%4.3f, user settings: %s\n" % (len(videos), p_video_outer, json.dumps(user_settings)))

            for video in videos:
                p_completed = probability_of("completed", user_settings=user_settings)

                # If we're just doing random videos, fine.
                # If these videos relate to exercises, then suppress non-exercise-related videos
                #   for this user.
                p_video = p_video_outer  # start with the context-free value
                did_exercise = False
                if exercise_logs.count() > 0:
                    # 5x less likely to watch a video if you haven't done the exercise,
                    if "related_exercise" not in video:
                        p_video /= 5  # suppress

                    # 5x more likely to watch a video if they've done the exercise
                    # 2x more likely to have finished it.
                    else:
                        exercise_log = ExerciseLog.objects.filter(user=facility_user, id=video["related_exercise"]["id"])
                        did_exercise = exercise_log.count() != 0
                        if did_exercise:
                            p_video *= 5
                            p_completed *= 2

                # Do the sampling
                if p_video < random.random():
                    continue
                    # didn't watch it
                elif p_completed > random.random():
                    pct_completed = 100.
                else:      # Slower students will use videos more.  Effort also important.
                    pct_completed = 100. * min(1., sqrt(random.random() * sqrt(user_settings["effort_level"] * user_settings["time_in_program"] / sqrt(user_settings["speed_of_learning"]))))
                # Compute quantities based on sample
                total_seconds_watched = int(video["duration"] * pct_completed / 100.)
                points = int(750 * pct_completed / 100.)

                # Choose a rate of videos, based on their effort level.
                #   Compute the latest possible start time.
                #   Then sample a start time between their start time
                #   and the latest possible start_time
                if did_exercise:
                    # More jitter if you learn fast, less jitter if you try harder (more diligent)
                    date_jitter = datetime.timedelta(days=max(0, random.gauss(1, user_settings["speed_of_learning"] / user_settings["effort_level"])))
                    date_completed = exercise_log[0].completion_timestamp - date_jitter
                else:
                    rate_of_videos = 0.66 * user_settings["effort_level"] + 0.33 * user_settings["speed_of_learning"]  # exercises per day
                    time_for_watching = total_seconds_watched
                    time_delta_completed = datetime.timedelta(seconds=random.randint(int(time_for_watching), int(datediff(date_diff_started, units="seconds"))))
                    date_completed = datetime.datetime.now() - time_delta_completed

                try:
                    vlog = VideoLog.objects.get(user=facility_user, youtube_id=video["youtube_id"])
                except VideoLog.DoesNotExist:

                    logging.info("Creating video log: %-12s: %-45s (%4.1f%% watched, %d points)%s" % (
                        facility_user.first_name,
                        video["title"],
                        pct_completed,
                        points,
                        " COMPLETE on %s!" % date_completed if pct_completed == 100 else "",
                    ))
                    vlog = VideoLog(
                        user=facility_user,
                        youtube_id=video["youtube_id"],
                        total_seconds_watched=total_seconds_watched,
                        points=points,
                        complete=(pct_completed == 100.),
                        completion_timestamp=date_completed,
                        completion_counter=datediff(date_completed, start_date, units="seconds"),
                    )
                    vlog.full_clean()
                    # TODO(bcipolli): bulk saving of logs
                    vlog.counter = own_device.increment_and_get_counter()
                    vlog.sign(own_device)  # have to sign after setting the counter
                    vlog.save(imported=True)  # avoid userlog issues


                video_logs.append(vlog)

    return video_logs