def handle(self, *args, **options): if settings.CENTRAL_SERVER: raise CommandError( "This must only be run on the distributed server.") if not options["lang_code"]: raise CommandError("You must specify a language code.") # ensure_dir(settings.CONTENT_ROOT) # Get list of videos lang_code = lcode_to_ietf(options["lang_code"]) video_map = get_dubbed_video_map(lang_code) or {} video_ids = options["video_ids"].split( ",") if options["video_ids"] else None video_ids = video_ids or ([ vid["id"] for vid in get_topic_videos(topic_id=options["topic_id"]) ] if options["topic_id"] else None) video_ids = video_ids or video_map.keys() # Download the videos for video_id in video_ids: if video_id in video_map: youtube_id = video_map[video_id] elif video_id in video_map.values(): # Perhaps they sent in a youtube ID? We can handle that! youtube_id = video_id else: logging.error("No mapping for video_id=%s; skipping" % video_id) continue try: scrape_video(youtube_id=youtube_id, format=options["format"], force=options["force"]) #scrape_thumbnail(youtube_id=youtube_id) logging.info( "Access video %s at %s" % (youtube_id, get_node_cache("Video")[video_id][0]["path"])) except Exception as e: logging.error("Failed to download video %s: %s" % (youtube_id, e)) logging.info("Process complete.")
def handle(self, *args, **options): if settings.CENTRAL_SERVER: raise CommandError("This must only be run on the distributed server.") if not options["lang_code"]: raise CommandError("You must specify a language code.") # ensure_dir(settings.CONTENT_ROOT) # Get list of videos lang_code = lcode_to_ietf(options["lang_code"]) video_map = get_dubbed_video_map(lang_code) or {} video_ids = options["video_ids"].split(",") if options["video_ids"] else None video_ids = video_ids or ([vid["id"] for vid in get_topic_videos(topic_id=options["topic_id"])] if options["topic_id"] else None) video_ids = video_ids or video_map.keys() # Download the videos for video_id in video_ids: if video_id in video_map: youtube_id = video_map[video_id] elif video_id in video_map.values(): # Perhaps they sent in a youtube ID? We can handle that! youtube_id = video_id else: logging.error("No mapping for video_id=%s; skipping" % video_id) continue try: scrape_video(youtube_id=youtube_id, format=options["format"], force=options["force"]) #scrape_thumbnail(youtube_id=youtube_id) logging.info("Access video %s at %s" % (youtube_id, get_node_cache("Video")[video_id][0]["path"])) except Exception as e: logging.error("Failed to download video %s: %s" % (youtube_id, e)) logging.info("Process complete.")
def tabular_view(request, facility, report_type="exercise"): """Tabular view also gets data server-side.""" # Define how students are ordered--used to be as efficient as possible. student_ordering = ["last_name", "first_name", "username"] # Get a list of topics (sorted) and groups topics = [get_node_cache("Topic").get(tid) for tid in get_knowledgemap_topics()] (groups, facilities) = get_accessible_objects_from_logged_in_user(request, facility=facility) context = plotting_metadata_context(request, facility=facility) context.update( { # For translators: the following two translations are nouns "report_types": (_("exercise"), _("video")), "request_report_type": report_type, "topics": [{"id": t[0]["id"], "title": t[0]["title"]} for t in topics if t], } ) # get querystring info topic_id = request.GET.get("topic", "") # No valid data; just show generic if not topic_id or not re.match("^[\w\-]+$", topic_id): return context group_id = request.GET.get("group", "") if group_id: # Narrow by group users = FacilityUser.objects.filter(group=group_id, is_teacher=False).order_by(*student_ordering) elif facility: # Narrow by facility search_groups = [groups_dict["groups"] for groups_dict in groups if groups_dict["facility"] == facility.id] assert len(search_groups) <= 1, "Should only have one or zero matches." # Return groups and ungrouped search_groups = search_groups[0] # make sure to include ungrouped students users = FacilityUser.objects.filter( Q(group__in=search_groups) | Q(group=None, facility=facility), is_teacher=False ).order_by(*student_ordering) else: # Show all (including ungrouped) for groups_dict in groups: search_groups += groups_dict["groups"] users = FacilityUser.objects.filter(Q(group__in=search_groups) | Q(group=None), is_teacher=False).order_by( *student_ordering ) # We have enough data to render over a group of students # Get type-specific information if report_type == "exercise": # Fill in exercises exercises = get_topic_exercises(topic_id=topic_id) exercises = sorted(exercises, key=lambda e: (e["h_position"], e["v_position"])) context["exercises"] = exercises # More code, but much faster exercise_names = [ex["name"] for ex in context["exercises"]] # Get students context["students"] = [] exlogs = ( ExerciseLog.objects.filter(user__in=users, exercise_id__in=exercise_names) .order_by(*["user__%s" % field for field in student_ordering]) .values("user__id", "struggling", "complete", "exercise_id") ) exlogs = list(exlogs) # force the query to be evaluated exlog_idx = 0 for user in users: log_table = {} while exlog_idx < len(exlogs) and exlogs[exlog_idx]["user__id"] == user.id: log_table[exlogs[exlog_idx]["exercise_id"]] = exlogs[exlog_idx] exlog_idx += 1 context["students"].append( { # this could be DRYer "first_name": user.first_name, "last_name": user.last_name, "username": user.username, "name": user.get_name(), "id": user.id, "exercise_logs": log_table, } ) elif report_type == "video": # Fill in videos context["videos"] = get_topic_videos(topic_id=topic_id) # More code, but much faster video_ids = [vid["id"] for vid in context["videos"]] # Get students context["students"] = [] vidlogs = ( VideoLog.objects.filter(user__in=users, video_id__in=video_ids) .order_by(*["user__%s" % field for field in student_ordering]) .values("user__id", "complete", "video_id", "total_seconds_watched", "points") ) vidlogs = list(vidlogs) # force the query to be executed now vidlog_idx = 0 for user in users: log_table = {} while vidlog_idx < len(vidlogs) and vidlogs[vidlog_idx]["user__id"] == user.id: log_table[vidlogs[vidlog_idx]["video_id"]] = vidlogs[vidlog_idx] vidlog_idx += 1 context["students"].append( { # this could be DRYer "first_name": user.first_name, "last_name": user.last_name, "username": user.username, "name": user.get_name(), "id": user.id, "video_logs": log_table, } ) else: raise Http404(_("Unknown report_type: %(report_type)s") % {"report_type": report_type}) if "facility_user" in request.session: try: # Log a "begin" and end here user = request.session["facility_user"] UserLog.begin_user_activity(user, activity_type="coachreport") UserLog.update_user_activity(user, activity_type="login") # to track active login time for teachers UserLog.end_user_activity(user, activity_type="coachreport") except ValidationError as e: # Never report this error; don't want this logging to block other functionality. logging.error("Failed to update Teacher userlog activity login: %s" % e) return context
def student_view_context(request, xaxis="pct_mastery", yaxis="ex:attempts"): """ Context done separately, to be importable for similar pages. """ user = get_user_from_request(request=request) if not user: raise Http404("User not found.") node_cache = get_node_cache() topic_ids = get_knowledgemap_topics() topic_ids += [ch["id"] for node in get_topic_tree()["children"] for ch in node["children"] if node["id"] != "math"] topics = [node_cache["Topic"][id][0] for id in topic_ids] user_id = user.id exercise_logs = list( ExerciseLog.objects.filter(user=user).values( "exercise_id", "complete", "points", "attempts", "streak_progress", "struggling", "completion_timestamp" ) ) video_logs = list( VideoLog.objects.filter(user=user).values( "video_id", "complete", "total_seconds_watched", "points", "completion_timestamp" ) ) exercise_sparklines = dict() stats = dict() topic_exercises = dict() topic_videos = dict() exercises_by_topic = dict() videos_by_topic = dict() # Categorize every exercise log into a "midlevel" exercise for elog in exercise_logs: if not elog["exercise_id"] in node_cache["Exercise"]: # Sometimes KA updates their topic tree and eliminates exercises; # we also want to support 3rd party switching of trees arbitrarily. logging.debug("Skip unknown exercise log for %s/%s" % (user_id, elog["exercise_id"])) continue parent_ids = [topic for ex in node_cache["Exercise"][elog["exercise_id"]] for topic in ex["ancestor_ids"]] topic = set(parent_ids).intersection(set(topic_ids)) if not topic: logging.error("Could not find a topic for exercise %s (parents=%s)" % (elog["exercise_id"], parent_ids)) continue topic = topic.pop() if not topic in topic_exercises: topic_exercises[topic] = get_topic_exercises(path=node_cache["Topic"][topic][0]["path"]) exercises_by_topic[topic] = exercises_by_topic.get(topic, []) + [elog] # Categorize every video log into a "midlevel" exercise. for vlog in video_logs: if not vlog["video_id"] in node_cache["Video"]: # Sometimes KA updates their topic tree and eliminates videos; # we also want to support 3rd party switching of trees arbitrarily. logging.debug("Skip unknown video log for %s/%s" % (user_id, vlog["video_id"])) continue parent_ids = [topic for vid in node_cache["Video"][vlog["video_id"]] for topic in vid["ancestor_ids"]] topic = set(parent_ids).intersection(set(topic_ids)) if not topic: logging.error("Could not find a topic for video %s (parents=%s)" % (vlog["video_id"], parent_ids)) continue topic = topic.pop() if not topic in topic_videos: topic_videos[topic] = get_topic_videos(path=node_cache["Topic"][topic][0]["path"]) videos_by_topic[topic] = videos_by_topic.get(topic, []) + [vlog] # Now compute stats for id in topic_ids: # set(topic_exercises.keys()).union(set(topic_videos.keys())): n_exercises = len(topic_exercises.get(id, [])) n_videos = len(topic_videos.get(id, [])) exercises = exercises_by_topic.get(id, []) videos = videos_by_topic.get(id, []) n_exercises_touched = len(exercises) n_videos_touched = len(videos) exercise_sparklines[id] = [el["completion_timestamp"] for el in filter(lambda n: n["complete"], exercises)] # total streak currently a pct, but expressed in max 100; convert to # proportion (like other percentages here) stats[id] = { "ex:pct_mastery": 0 if not n_exercises_touched else sum([el["complete"] for el in exercises]) / float(n_exercises), "ex:pct_started": 0 if not n_exercises_touched else n_exercises_touched / float(n_exercises), "ex:average_points": 0 if not n_exercises_touched else sum([el["points"] for el in exercises]) / float(n_exercises_touched), "ex:average_attempts": 0 if not n_exercises_touched else sum([el["attempts"] for el in exercises]) / float(n_exercises_touched), "ex:average_streak": 0 if not n_exercises_touched else sum([el["streak_progress"] for el in exercises]) / float(n_exercises_touched) / 100.0, "ex:total_struggling": 0 if not n_exercises_touched else sum([el["struggling"] for el in exercises]), "ex:last_completed": None if not n_exercises_touched else max_none([el["completion_timestamp"] or None for el in exercises]), "vid:pct_started": 0 if not n_videos_touched else n_videos_touched / float(n_videos), "vid:pct_completed": 0 if not n_videos_touched else sum([vl["complete"] for vl in videos]) / float(n_videos), "vid:total_minutes": 0 if not n_videos_touched else sum([vl["total_seconds_watched"] for vl in videos]) / 60.0, "vid:average_points": 0.0 if not n_videos_touched else float(sum([vl["points"] for vl in videos]) / float(n_videos_touched)), "vid:last_completed": None if not n_videos_touched else max_none([vl["completion_timestamp"] or None for vl in videos]), } context = plotting_metadata_context(request) return { "form": context["form"], "groups": context["groups"], "facilities": context["facilities"], "student": user, "topics": topics, "exercises": topic_exercises, "exercise_logs": exercises_by_topic, "video_logs": videos_by_topic, "exercise_sparklines": exercise_sparklines, "no_data": not exercise_logs and not video_logs, "stats": stats, "stat_defs": [ # this order determines the order of display {"key": "ex:pct_mastery", "title": _("% Mastery"), "type": "pct"}, {"key": "ex:pct_started", "title": _("% Started"), "type": "pct"}, {"key": "ex:average_points", "title": _("Average Points"), "type": "float"}, {"key": "ex:average_attempts", "title": _("Average Attempts"), "type": "float"}, {"key": "ex:average_streak", "title": _("Average Streak"), "type": "pct"}, {"key": "ex:total_struggling", "title": _("Struggling"), "type": "int"}, {"key": "ex:last_completed", "title": _("Last Completed"), "type": "date"}, {"key": "vid:pct_completed", "title": _("% Completed"), "type": "pct"}, {"key": "vid:pct_started", "title": _("% Started"), "type": "pct"}, {"key": "vid:total_minutes", "title": _("Average Minutes Watched"), "type": "float"}, {"key": "vid:average_points", "title": _("Average Points"), "type": "float"}, {"key": "vid:last_completed", "title": _("Last Completed"), "type": "date"}, ], }
def tabular_view(request, facility, report_type="exercise"): """Tabular view also gets data server-side.""" # Define how students are ordered--used to be as efficient as possible. student_ordering = ["last_name", "first_name", "username"] # Get a list of topics (sorted) and groups topics = [ get_node_cache("Topic").get(tid) for tid in get_knowledgemap_topics() ] (groups, facilities) = get_accessible_objects_from_logged_in_user( request, facility=facility) context = plotting_metadata_context(request, facility=facility) context.update({ # For translators: the following two translations are nouns "report_types": (_("exercise"), _("video")), "request_report_type": report_type, "topics": [{ "id": t[0]["id"], "title": t[0]["title"] } for t in topics if t], }) # get querystring info topic_id = request.GET.get("topic", "") # No valid data; just show generic if not topic_id or not re.match("^[\w\-]+$", topic_id): return context group_id = request.GET.get("group", "") if group_id: # Narrow by group users = FacilityUser.objects.filter( group=group_id, is_teacher=False).order_by(*student_ordering) elif facility: # Narrow by facility search_groups = [ groups_dict["groups"] for groups_dict in groups if groups_dict["facility"] == facility.id ] assert len(search_groups) <= 1, "Should only have one or zero matches." # Return groups and ungrouped search_groups = search_groups[ 0] # make sure to include ungrouped students users = FacilityUser.objects.filter( Q(group__in=search_groups) | Q(group=None, facility=facility), is_teacher=False).order_by(*student_ordering) else: # Show all (including ungrouped) for groups_dict in groups: search_groups += groups_dict["groups"] users = FacilityUser.objects.filter( Q(group__in=search_groups) | Q(group=None), is_teacher=False).order_by(*student_ordering) # We have enough data to render over a group of students # Get type-specific information if report_type == "exercise": # Fill in exercises exercises = get_topic_exercises(topic_id=topic_id) exercises = sorted(exercises, key=lambda e: (e["h_position"], e["v_position"])) context["exercises"] = exercises # More code, but much faster exercise_names = [ex["name"] for ex in context["exercises"]] # Get students context["students"] = [] exlogs = ExerciseLog.objects \ .filter(user__in=users, exercise_id__in=exercise_names) \ .order_by(*["user__%s" % field for field in student_ordering]) \ .values("user__id", "struggling", "complete", "exercise_id") exlogs = list(exlogs) # force the query to be evaluated exlog_idx = 0 for user in users: log_table = {} while exlog_idx < len( exlogs) and exlogs[exlog_idx]["user__id"] == user.id: log_table[exlogs[exlog_idx]["exercise_id"]] = exlogs[exlog_idx] exlog_idx += 1 context["students"].append({ # this could be DRYer "first_name": user.first_name, "last_name": user.last_name, "username": user.username, "name": user.get_name(), "id": user.id, "exercise_logs": log_table, }) elif report_type == "video": # Fill in videos context["videos"] = get_topic_videos(topic_id=topic_id) # More code, but much faster video_ids = [vid["id"] for vid in context["videos"]] # Get students context["students"] = [] vidlogs = VideoLog.objects \ .filter(user__in=users, video_id__in=video_ids) \ .order_by(*["user__%s" % field for field in student_ordering])\ .values("user__id", "complete", "video_id", "total_seconds_watched", "points") vidlogs = list(vidlogs) # force the query to be executed now vidlog_idx = 0 for user in users: log_table = {} while vidlog_idx < len( vidlogs) and vidlogs[vidlog_idx]["user__id"] == user.id: log_table[vidlogs[vidlog_idx] ["video_id"]] = vidlogs[vidlog_idx] vidlog_idx += 1 context["students"].append({ # this could be DRYer "first_name": user.first_name, "last_name": user.last_name, "username": user.username, "name": user.get_name(), "id": user.id, "video_logs": log_table, }) else: raise Http404( _("Unknown report_type: %(report_type)s") % {"report_type": report_type}) if "facility_user" in request.session: try: # Log a "begin" and end here user = request.session["facility_user"] UserLog.begin_user_activity(user, activity_type="coachreport") UserLog.update_user_activity( user, activity_type="login" ) # to track active login time for teachers UserLog.end_user_activity(user, activity_type="coachreport") except ValidationError as e: # Never report this error; don't want this logging to block other functionality. logging.error( "Failed to update Teacher userlog activity login: %s" % e) return context
def student_view_context(request, xaxis="pct_mastery", yaxis="ex:attempts"): """ Context done separately, to be importable for similar pages. """ user = get_user_from_request(request=request) if not user: raise Http404("User not found.") node_cache = get_node_cache() topic_ids = get_knowledgemap_topics() topic_ids += [ ch["id"] for node in get_topic_tree()["children"] for ch in node["children"] if node["id"] != "math" ] topics = [node_cache["Topic"][id][0] for id in topic_ids] user_id = user.id exercise_logs = list(ExerciseLog.objects \ .filter(user=user) \ .values("exercise_id", "complete", "points", "attempts", "streak_progress", "struggling", "completion_timestamp")) video_logs = list(VideoLog.objects \ .filter(user=user) \ .values("video_id", "complete", "total_seconds_watched", "points", "completion_timestamp")) exercise_sparklines = dict() stats = dict() topic_exercises = dict() topic_videos = dict() exercises_by_topic = dict() videos_by_topic = dict() # Categorize every exercise log into a "midlevel" exercise for elog in exercise_logs: if not elog["exercise_id"] in node_cache["Exercise"]: # Sometimes KA updates their topic tree and eliminates exercises; # we also want to support 3rd party switching of trees arbitrarily. logging.debug("Skip unknown exercise log for %s/%s" % (user_id, elog["exercise_id"])) continue parent_ids = [ topic for ex in node_cache["Exercise"][elog["exercise_id"]] for topic in ex["ancestor_ids"] ] topic = set(parent_ids).intersection(set(topic_ids)) if not topic: logging.error( "Could not find a topic for exercise %s (parents=%s)" % (elog["exercise_id"], parent_ids)) continue topic = topic.pop() if not topic in topic_exercises: topic_exercises[topic] = get_topic_exercises( path=node_cache["Topic"][topic][0]["path"]) exercises_by_topic[topic] = exercises_by_topic.get(topic, []) + [elog] # Categorize every video log into a "midlevel" exercise. for vlog in video_logs: if not vlog["video_id"] in node_cache["Video"]: # Sometimes KA updates their topic tree and eliminates videos; # we also want to support 3rd party switching of trees arbitrarily. logging.debug("Skip unknown video log for %s/%s" % (user_id, vlog["video_id"])) continue parent_ids = [ topic for vid in node_cache["Video"][vlog["video_id"]] for topic in vid["ancestor_ids"] ] topic = set(parent_ids).intersection(set(topic_ids)) if not topic: logging.error("Could not find a topic for video %s (parents=%s)" % (vlog["video_id"], parent_ids)) continue topic = topic.pop() if not topic in topic_videos: topic_videos[topic] = get_topic_videos( path=node_cache["Topic"][topic][0]["path"]) videos_by_topic[topic] = videos_by_topic.get(topic, []) + [vlog] # Now compute stats for id in topic_ids: #set(topic_exercises.keys()).union(set(topic_videos.keys())): n_exercises = len(topic_exercises.get(id, [])) n_videos = len(topic_videos.get(id, [])) exercises = exercises_by_topic.get(id, []) videos = videos_by_topic.get(id, []) n_exercises_touched = len(exercises) n_videos_touched = len(videos) exercise_sparklines[id] = [ el["completion_timestamp"] for el in filter(lambda n: n["complete"], exercises) ] # total streak currently a pct, but expressed in max 100; convert to # proportion (like other percentages here) stats[id] = { "ex:pct_mastery": 0 if not n_exercises_touched else sum([el["complete"] for el in exercises]) / float(n_exercises), "ex:pct_started": 0 if not n_exercises_touched else n_exercises_touched / float(n_exercises), "ex:average_points": 0 if not n_exercises_touched else sum([el["points"] for el in exercises]) / float(n_exercises_touched), "ex:average_attempts": 0 if not n_exercises_touched else sum([el["attempts"] for el in exercises]) / float(n_exercises_touched), "ex:average_streak": 0 if not n_exercises_touched else sum([el["streak_progress"] for el in exercises]) / float(n_exercises_touched) / 100., "ex:total_struggling": 0 if not n_exercises_touched else sum( [el["struggling"] for el in exercises]), "ex:last_completed": None if not n_exercises_touched else max_none( [el["completion_timestamp"] or None for el in exercises]), "vid:pct_started": 0 if not n_videos_touched else n_videos_touched / float(n_videos), "vid:pct_completed": 0 if not n_videos_touched else sum([vl["complete"] for vl in videos]) / float(n_videos), "vid:total_minutes": 0 if not n_videos_touched else sum([vl["total_seconds_watched"] for vl in videos]) / 60., "vid:average_points": 0. if not n_videos_touched else float( sum([vl["points"] for vl in videos]) / float(n_videos_touched)), "vid:last_completed": None if not n_videos_touched else max_none( [vl["completion_timestamp"] or None for vl in videos]), } context = plotting_metadata_context(request) return { "form": context["form"], "groups": context["groups"], "facilities": context["facilities"], "student": user, "topics": topics, "exercises": topic_exercises, "exercise_logs": exercises_by_topic, "video_logs": videos_by_topic, "exercise_sparklines": exercise_sparklines, "no_data": not exercise_logs and not video_logs, "stats": stats, "stat_defs": [ # this order determines the order of display { "key": "ex:pct_mastery", "title": _("% Mastery"), "type": "pct" }, { "key": "ex:pct_started", "title": _("% Started"), "type": "pct" }, { "key": "ex:average_points", "title": _("Average Points"), "type": "float" }, { "key": "ex:average_attempts", "title": _("Average Attempts"), "type": "float" }, { "key": "ex:average_streak", "title": _("Average Streak"), "type": "pct" }, { "key": "ex:total_struggling", "title": _("Struggling"), "type": "int" }, { "key": "ex:last_completed", "title": _("Last Completed"), "type": "date" }, { "key": "vid:pct_completed", "title": _("% Completed"), "type": "pct" }, { "key": "vid:pct_started", "title": _("% Started"), "type": "pct" }, { "key": "vid:total_minutes", "title": _("Average Minutes Watched"), "type": "float" }, { "key": "vid:average_points", "title": _("Average Points"), "type": "float" }, { "key": "vid:last_completed", "title": _("Last Completed"), "type": "date" }, ] }
def generate_fake_video_logs(facility_user=None, topics=topics, start_date=datetime.datetime.now() - datetime.timedelta(days=30 * 6)): """Add video logs for the given topics, for each of the given users. If no users are given, they are created. If no topics exist, they are taken from the list at the top of this file.""" own_device = Device.get_own_device() date_diff = datetime.datetime.now() - start_date video_logs = [] # It's not a user: probably a list. # Recursive case if not hasattr(facility_user, "username"): # It's NONE :-/ generate the users first! if not facility_user: (facility_user, _, _) = generate_fake_facility_users() for topic in topics: for user in facility_user: video_logs.append( generate_fake_video_logs(facility_user=user, topics=[topic], start_date=start_date)) # Actually generate! else: # First, make videos for the associated logs # Then make some unassociated videos, to simulate both exploration # and watching videos without finishing. # Get (or create) user type try: user_settings = json.loads(facility_user.notes) except: user_settings = sample_user_settings() facility_user.notes = json.dumps(user_settings) try: facility_user.save() except Exception as e: logging.error("Error saving facility user: %s" % e) date_diff_started = datetime.timedelta( seconds=datediff(date_diff, units="seconds") * user_settings["time_in_program"] ) # when this user started in the program, relative to NOW for topic in topics: videos = get_topic_videos(topic_id=topic) exercises = get_topic_exercises(topic_id=topic) exercise_ids = [ ex["id"] if "id" in ex else ex['name'] for ex in exercises ] exercise_logs = ExerciseLog.objects.filter(user=facility_user, id__in=exercise_ids) # Probability of watching a video, irrespective of the context p_video_outer = probability_of("video", user_settings=user_settings) logging.debug( "# videos: %d; p(videos)=%4.3f, user settings: %s\n" % (len(videos), p_video_outer, json.dumps(user_settings))) for video in videos: p_completed = probability_of("completed", user_settings=user_settings) # If we're just doing random videos, fine. # If these videos relate to exercises, then suppress non-exercise-related videos # for this user. p_video = p_video_outer # start with the context-free value did_exercise = False if exercise_logs.count() > 0: # 5x less likely to watch a video if you haven't done the exercise, if "related_exercise" not in video: p_video /= 5 # suppress # 5x more likely to watch a video if they've done the exercise # 2x more likely to have finished it. else: exercise_log = ExerciseLog.objects.filter( user=facility_user, id=video["related_exercise"]["id"]) did_exercise = exercise_log.count() != 0 if did_exercise: p_video *= 5 p_completed *= 2 # Do the sampling if p_video < random.random(): continue # didn't watch it elif p_completed > random.random(): pct_completed = 100. else: # Slower students will use videos more. Effort also important. pct_completed = 100. * min( 1., sqrt(random.random() * sqrt(user_settings["effort_level"] * user_settings["time_in_program"] / sqrt(user_settings["speed_of_learning"])))) # Compute quantities based on sample total_seconds_watched = int(video["duration"] * pct_completed / 100.) points = int(750 * pct_completed / 100.) # Choose a rate of videos, based on their effort level. # Compute the latest possible start time. # Then sample a start time between their start time # and the latest possible start_time if did_exercise: # More jitter if you learn fast, less jitter if you try harder (more diligent) date_jitter = datetime.timedelta(days=max( 0, random.gauss( 1, user_settings["speed_of_learning"] / user_settings["effort_level"]))) date_completed = exercise_log[ 0].completion_timestamp - date_jitter else: rate_of_videos = 0.66 * user_settings[ "effort_level"] + 0.33 * user_settings[ "speed_of_learning"] # exercises per day time_for_watching = total_seconds_watched time_delta_completed = datetime.timedelta( seconds=random.randint( int(time_for_watching), int(datediff(date_diff_started, units="seconds")))) date_completed = datetime.datetime.now( ) - time_delta_completed try: vlog = VideoLog.objects.get(user=facility_user, video_id=video["id"]) except VideoLog.DoesNotExist: logging.info( "Creating video log: %-12s: %-45s (%4.1f%% watched, %d points)%s" % ( facility_user.first_name, video["title"], pct_completed, points, " COMPLETE on %s!" % date_completed if pct_completed == 100 else "", )) vlog = VideoLog( user=facility_user, video_id=video["id"], youtube_id=video["youtube_id"], total_seconds_watched=total_seconds_watched, points=points, complete=(pct_completed == 100.), completion_timestamp=date_completed, ) try: vlog.save(update_userlog=False) # avoid userlog issues except Exception as e: logging.error("Error saving video log: %s" % e) continue video_logs.append(vlog) return video_logs
def generate_fake_video_logs(facility_user=None, topics=topics, start_date=datetime.datetime.now() - datetime.timedelta(days=30 * 6)): """Add video logs for the given topics, for each of the given users. If no users are given, they are created. If no topics exist, they are taken from the list at the top of this file.""" own_device = Device.get_own_device() date_diff = datetime.datetime.now() - start_date video_logs = [] # It's not a user: probably a list. # Recursive case if not hasattr(facility_user, "username"): # It's NONE :-/ generate the users first! if not facility_user: (facility_user, _, _) = generate_fake_facility_users() for topic in topics: for user in facility_user: video_logs.append(generate_fake_video_logs(facility_user=user, topics=[topic], start_date=start_date)) # Actually generate! else: # First, make videos for the associated logs # Then make some unassociated videos, to simulate both exploration # and watching videos without finishing. # Get (or create) user type try: user_settings = json.loads(facility_user.notes) except: user_settings = sample_user_settings() facility_user.notes = json.dumps(user_settings) try: facility_user.save() except Exception as e: logging.error("Error saving facility user: %s" % e) date_diff_started = datetime.timedelta(seconds=datediff(date_diff, units="seconds") * user_settings["time_in_program"]) # when this user started in the program, relative to NOW for topic in topics: videos = get_topic_videos(topic_id=topic) exercises = get_topic_exercises(topic_id=topic) exercise_ids = [ex["id"] if "id" in ex else ex['name'] for ex in exercises] exercise_logs = ExerciseLog.objects.filter(user=facility_user, id__in=exercise_ids) # Probability of watching a video, irrespective of the context p_video_outer = probability_of("video", user_settings=user_settings) logging.debug("# videos: %d; p(videos)=%4.3f, user settings: %s\n" % (len(videos), p_video_outer, json.dumps(user_settings))) for video in videos: p_completed = probability_of("completed", user_settings=user_settings) # If we're just doing random videos, fine. # If these videos relate to exercises, then suppress non-exercise-related videos # for this user. p_video = p_video_outer # start with the context-free value did_exercise = False if exercise_logs.count() > 0: # 5x less likely to watch a video if you haven't done the exercise, if "related_exercise" not in video: p_video /= 5 # suppress # 5x more likely to watch a video if they've done the exercise # 2x more likely to have finished it. else: exercise_log = ExerciseLog.objects.filter(user=facility_user, id=video["related_exercise"]["id"]) did_exercise = exercise_log.count() != 0 if did_exercise: p_video *= 5 p_completed *= 2 # Do the sampling if p_video < random.random(): continue # didn't watch it elif p_completed > random.random(): pct_completed = 100. else: # Slower students will use videos more. Effort also important. pct_completed = 100. * min(1., sqrt(random.random() * sqrt(user_settings["effort_level"] * user_settings["time_in_program"] / sqrt(user_settings["speed_of_learning"])))) # Compute quantities based on sample total_seconds_watched = int(video["duration"] * pct_completed / 100.) points = int(750 * pct_completed / 100.) # Choose a rate of videos, based on their effort level. # Compute the latest possible start time. # Then sample a start time between their start time # and the latest possible start_time if did_exercise: # More jitter if you learn fast, less jitter if you try harder (more diligent) date_jitter = datetime.timedelta(days=max(0, random.gauss(1, user_settings["speed_of_learning"] / user_settings["effort_level"]))) date_completed = exercise_log[0].completion_timestamp - date_jitter else: rate_of_videos = 0.66 * user_settings["effort_level"] + 0.33 * user_settings["speed_of_learning"] # exercises per day time_for_watching = total_seconds_watched time_delta_completed = datetime.timedelta(seconds=random.randint(int(time_for_watching), int(datediff(date_diff_started, units="seconds")))) date_completed = datetime.datetime.now() - time_delta_completed try: vlog = VideoLog.objects.get(user=facility_user, video_id=video["id"]) except VideoLog.DoesNotExist: logging.info("Creating video log: %-12s: %-45s (%4.1f%% watched, %d points)%s" % ( facility_user.first_name, video["title"], pct_completed, points, " COMPLETE on %s!" % date_completed if pct_completed == 100 else "", )) vlog = VideoLog( user=facility_user, video_id=video["id"], youtube_id=video["youtube_id"], total_seconds_watched=total_seconds_watched, points=points, complete=(pct_completed == 100.), completion_timestamp=date_completed, ) try: vlog.save(update_userlog=False) # avoid userlog issues except Exception as e: logging.error("Error saving video log: %s" % e) continue video_logs.append(vlog) return video_logs