def initialize_content_caches(force=False): """ Catch all function to regenerate any content caches in memory that need annotation with file availability """ logging.info("Preloading content data.") topic_tools.get_content_cache(force=force, annotate=True) logging.info("Preloading topic tree data.") topic_tools.get_topic_tree(force=force, annotate=True)
def initialize_content_caches(force=False): """ Catch all function to regenerate any content caches in memory that need annotation with file availability """ for lang in i18n.get_installed_language_packs(force=True).keys(): logging.info("Preloading exercise data for language {lang}.".format(lang=lang)) topic_tools.get_exercise_cache(force=force, language=lang) logging.info("Preloading content data for language {lang}.".format(lang=lang)) topic_tools.get_content_cache(force=force, annotate=True, language=lang) logging.info("Preloading topic tree data for language {lang}.".format(lang=lang)) topic_tools.get_topic_tree(force=force, annotate=True, language=lang)
def splat_handler(request, splat): slugs = filter(lambda x: x, splat.split("/")) current_node = topic_tools.get_topic_tree() while current_node: match = [ch for ch in (current_node.get('children') or []) if request.path.startswith(ch["path"])] if len(match) > 1: # can only happen for leaf nodes (only when one node is blank?) match = [m for m in match if request.path == m["path"]] if not match: raise Http404 current_node = match[0] if request.path == current_node["path"]: break if current_node["kind"] == "Topic": return topic_handler(request, cached_nodes={"topic": current_node}) elif current_node["kind"] == "Video": prev, next = topic_tools.get_neighbor_nodes(current_node, neighbor_kind=current_node["kind"]) return video_handler(request, cached_nodes={"video": current_node, "prev": prev, "next": next}) elif current_node["kind"] == "Exercise": cached_nodes = topic_tools.get_related_videos(current_node, limit_to_available=False) cached_nodes["exercise"] = current_node cached_nodes["prev"], cached_nodes["next"] = topic_tools.get_neighbor_nodes(current_node, neighbor_kind=current_node['kind']) return exercise_handler(request, cached_nodes=cached_nodes) else: raise Http404
def get_annotated_topic_tree(request, lang_code=None): call_command("videoscan") # Could potentially be very slow, blocking request... but at least it's via an API request! lang_code = lang_code or request.language # Get annotations for the current language. statusdict = dict(VideoFile.objects.values_list("youtube_id", "percent_complete")) return JsonResponse(annotate_topic_tree(get_topic_tree(language=lang_code), statusdict=statusdict, lang_code=lang_code))
def setUp(self): super(UpdatesTestCase, self).setUp() # Set up the topic tree stamp_availability_on_topic(get_topic_tree(), force=True, stamp_urls=True)
def initialize_content_caches(force=False): """ Catch all function to regenerate any content caches in memory that need annotation with file availability """ for lang in i18n.get_installed_language_packs(force=True).keys(): logging.info( "Preloading exercise data for language {lang}.".format(lang=lang)) topic_tools.get_exercise_cache(force=force, language=lang) logging.info( "Preloading content data for language {lang}.".format(lang=lang)) topic_tools.get_content_cache(force=force, annotate=True, language=lang) logging.info("Preloading topic tree data for language {lang}.".format( lang=lang)) topic_tools.get_topic_tree(force=force, annotate=True, language=lang)
def regenerate_all_pages_related_to_videos(video_ids): """Regenerate all webpages related to a specific list of videos. This is good for increasing new server performance.""" paths_to_regenerate = set() # unique set for video_id in video_ids: for video_path in topic_tools.get_video_page_paths(video_id=video_id): paths_to_regenerate = paths_to_regenerate.union( generate_all_paths(path=video_path, base_path=topic_tools.get_topic_tree()["path"]) ) # start at the root for exercise_path in topic_tools.get_exercise_page_paths(video_id=video_id): paths_to_regenerate = paths_to_regenerate.union( generate_all_paths(path=exercise_path, base_path=topic_tools.get_topic_tree()["path"]) ) # start at the root # Now, regenerate any page. for path in paths_to_regenerate: create_cache_entry(path=path, force=True) return paths_to_regenerate
def get_annotated_topic_tree(request, lang_code=None): call_command( "videoscan" ) # Could potentially be very slow, blocking request... but at least it's via an API request! lang_code = lang_code or request.language # Get annotations for the current language. statusdict = dict( VideoFile.objects.values_list("youtube_id", "percent_complete")) return JsonResponse( annotate_topic_tree(get_topic_tree(), statusdict=statusdict, lang_code=lang_code))
def regenerate_all_pages_related_to_videos(video_ids): """Regenerate all webpages related to a specific list of videos. This is good for increasing new server performance.""" paths_to_regenerate = set() # unique set for video_id in video_ids: for video_path in topic_tools.get_video_page_paths(video_id=video_id): paths_to_regenerate = paths_to_regenerate.union( generate_all_paths(path=video_path, base_path=topic_tools.get_topic_tree() ['path'])) # start at the root for exercise_path in topic_tools.get_exercise_page_paths( video_id=video_id): paths_to_regenerate = paths_to_regenerate.union( generate_all_paths(path=exercise_path, base_path=topic_tools.get_topic_tree() ['path'])) # start at the root # Now, regenerate any page. for path in paths_to_regenerate: create_cache_entry(path=path, force=True) return paths_to_regenerate
def invalidate_all_pages_related_to_video(video_id=None): """Given a video file, recurse backwards up the hierarchy and invalidate all pages. Also include video pages and related exercise pages. """ # Expire all video files and related paths video_paths = topic_tools.get_video_page_paths(video_id=video_id) exercise_paths = topic_tools.get_exercise_page_paths(video_id=video_id) leaf_paths = set(video_paths).union(set(exercise_paths)) for leaf_path in leaf_paths: all_paths = generate_all_paths(path=leaf_path, base_path=topic_tools.get_topic_tree()["path"]) for path in filter(has_cache_key, all_paths): # start at the root expire_page(path=path)
def invalidate_all_pages_related_to_video(video_id=None): """Given a video file, recurse backwards up the hierarchy and invalidate all pages. Also include video pages and related exercise pages. """ # Expire all video files and related paths video_paths = topic_tools.get_video_page_paths(video_id=video_id) exercise_paths = topic_tools.get_exercise_page_paths(video_id=video_id) leaf_paths = set(video_paths).union(set(exercise_paths)) for leaf_path in leaf_paths: all_paths = generate_all_paths( path=leaf_path, base_path=topic_tools.get_topic_tree()['path']) for path in filter(has_cache_key, all_paths): # start at the root expire_page(path=path)
def refresh_topic_cache_wrapper_fn(request, cached_nodes={}, force=False, *args, **kwargs): """ Centralized logic for how to refresh the topic cache, for each type of object. When the object is desired to be used, this code runs to refresh data, balancing between correctness and efficiency. """ if not cached_nodes: cached_nodes = {"topics": topic_tools.get_topic_tree()} def has_computed_urls(node): return "subtitles" in node.get("availability", {}).get("en", {}) for node in cached_nodes.values(): if not node: continue has_children = bool(node.get("children")) # Propertes not yet marked if node["kind"] == "Video": if force or not has_computed_urls(node): recount_videos_and_invalidate_parents(topic_tools.get_parent(node), force=True, stamp_urls=True) elif node["kind"] == "Exercise": for video in topic_tools.get_related_videos(exercise=node).values(): if not has_computed_urls(node): stamp_availability_on_video(video, force=True) # will be done by force below elif node["kind"] == "Topic": bottom_layer_topic = "Topic" not in node["contains"] # always run do_video_counts_need_update_question_mark(), to make sure the (internal) counts stay up to date. force = do_video_counts_need_update_question_mark() or force or bottom_layer_topic recount_videos_and_invalidate_parents( node, force=force, stamp_urls=bottom_layer_topic, ) kwargs.update(cached_nodes) return handler(request, *args, **kwargs)
def get_neighbors_at_dist_1(topic_index, subtopic_index, topic): """Return a list of the neighbors at distance 1 from the specified subtopic.""" neighbors = [] #neighbor list to be returned tree = get_topic_tree(parent="root") #pointers to the previous and next subtopic (list indices) prev = subtopic_index - 1 next = subtopic_index + 1 #if there is a previous topic (neighbor to left) if(prev > -1 ): neighbors.append(topic['children'][prev] + ' 1') # neighbor on the left side #else check if there is a neighboring topic (left) else: if (topic_index-1) > -1: neighbor_length = len(tree[(topic_index-1)]['children']) neighbors.append(tree[(topic_index-1)]['children'][(neighbor_length-1)] + ' 4') else: neighbors.append(' ') # no neighbor to the left #if there is a neighbor to the right if(next < len(topic['children'])): neighbors.append(topic['children'][next] + ' 1') # neighbor on the right side #else check if there is a neighboring topic (right) else: if (topic_index + 1) < len(tree): #the 4 denotes the # of nodes in path to this other node, will always be 4 neighbors.append(tree[(topic_index+1)]['children'][0] + ' 4') else: neighbors.append(' ') # no neighbor on right side return neighbors
def get_exercise_parents_lookup_table(): """Return a dictionary with exercise ids as keys and topic_ids as values.""" global exercise_parents_lookup_table if exercise_parents_lookup_table: return exercise_parents_lookup_table ### topic tree for traversal### tree = get_topic_tree(parent="root") #3 possible layers for topic in tree: for subtopic_id in topic['children']: exercises = get_topic_exercises(subtopic_id) for ex in exercises: if ex['id'] not in exercise_parents_lookup_table: exercise_parents_lookup_table[ ex['id'] ] = { "subtopic_id": subtopic_id, "topic_id": topic['id'], } return exercise_parents_lookup_table
def preload_global_data(): logging.info("Preloading topic data.") stamp_availability_on_topic(get_topic_tree(), force=True, stamp_urls=True)
def topic_tree(request, channel): parent = request.GET.get("parent") return JsonResponse( get_topic_tree(channel=channel, language=request.language, parent=parent))
def topic_tree(request, channel): parent = request.GET.get("parent") return JsonResponse(get_topic_tree(channel=channel, language=request.language, parent=parent))
def watch_home(request): """Dummy wrapper function for topic_handler with url=/""" return topic_handler(request, cached_nodes={"topic": topic_tools.get_topic_tree()})
def topic_tree(request, channel): return JsonResponse(get_topic_tree(channel=channel))
def generate_recommendation_data(): """Traverses topic tree to generate a dictionary with related subtopics per subtopic.""" global recommendation_data if recommendation_data: return recommendation_data ### populate data exploiting structure of topic tree ### tree = get_topic_tree(parent="root") ######## DYNAMIC ALG ######### ## # ITERATION 1 - grabs all immediate neighbors of each subtopic ## #array indices for the current topic and subtopic topic_index = 0 subtopic_index = 0 #for each topic for topic in tree: subtopic_index = 0 #for each subtopic add the neighbors at distance 0 and 1 (at dist one has 2 for each) for subtopic_id in topic['children']: neighbors_dist_1 = get_neighbors_at_dist_1(topic_index, subtopic_index, topic) #add to recommendation_data - distance 0 (itself) + distance 1 recommendation_data[ subtopic_id ] = { 'related_subtopics' : ([subtopic_id + ' 0'] + neighbors_dist_1) } subtopic_index+=1 topic_index+=1 ## # ITERATION 2 - grabs all subsequent neighbors of each subtopic via # Breadth-first search (BFS) ## #loop through all subtopics currently in recommendation_data dict for subtopic in recommendation_data: related = recommendation_data[subtopic]['related_subtopics'] # list of related subtopics (right now only 2) other_neighbors = get_subsequent_neighbors(related, recommendation_data, subtopic) recommendation_data[subtopic]['related_subtopics'] += other_neighbors ##append new neighbors ## # ITERATION 2.5 - Sort all results by increasing distance and to strip the final # result of all distance values in recommendation_data (note that there are only 3 possible: 0,1,4). ## #for each item in recommendation_data for subtopic in recommendation_data: at_dist_4 = [] #array to hold the subtopic ids of recs at distance 4 at_dist_lt_4 = [] #array to hold subtopic ids of recs at distance 0 or 1 #for this item, loop through all recommendations for recc in recommendation_data[subtopic]['related_subtopics']: if recc.split(" ")[1] == '4': #if at dist 4, add to the array at_dist_4.append(recc.split(" ")[0]) else: at_dist_lt_4.append(recc.split(" ")[0]) sorted_related = at_dist_lt_4 + at_dist_4 #append later items at end of earlier recommendation_data[subtopic]['related_subtopics'] = sorted_related return recommendation_data
def student_view_context(request, xaxis="pct_mastery", yaxis="ex:attempts"): """ Context done separately, to be importable for similar pages. """ user = get_user_from_request(request=request) if not user: raise Http404("User not found.") node_cache = get_node_cache() topic_ids = get_knowledgemap_topics() topic_ids = topic_ids + [ch["id"] for node in get_topic_tree()["children"] for ch in node["children"] if node["id"] != "math"] topics = [node_cache["Topic"][id][0] for id in topic_ids] user_id = user.id exercise_logs = list(ExerciseLog.objects \ .filter(user=user) \ .values("exercise_id", "complete", "points", "attempts", "streak_progress", "struggling", "completion_timestamp")) video_logs = list(VideoLog.objects \ .filter(user=user) \ .values("video_id", "complete", "total_seconds_watched", "points", "completion_timestamp")) exercise_sparklines = dict() stats = dict() topic_exercises = dict() topic_videos = dict() exercises_by_topic = dict() videos_by_topic = dict() # Categorize every exercise log into a "midlevel" exercise for elog in exercise_logs: if not elog["exercise_id"] in node_cache["Exercise"]: # Sometimes KA updates their topic tree and eliminates exercises; # we also want to support 3rd party switching of trees arbitrarily. logging.debug("Skip unknown exercise log for %s/%s" % (user_id, elog["exercise_id"])) continue parent_ids = [topic for ex in node_cache["Exercise"][elog["exercise_id"]] for topic in ex["ancestor_ids"]] topic = set(parent_ids).intersection(set(topic_ids)) if not topic: logging.error("Could not find a topic for exercise %s (parents=%s)" % (elog["exercise_id"], parent_ids)) continue topic = topic.pop() if not topic in topic_exercises: topic_exercises[topic] = get_topic_exercises(path=node_cache["Topic"][topic][0]["path"]) exercises_by_topic[topic] = exercises_by_topic.get(topic, []) + [elog] # Categorize every video log into a "midlevel" exercise. for vlog in video_logs: if not vlog["video_id"] in node_cache["Video"]: # Sometimes KA updates their topic tree and eliminates videos; # we also want to support 3rd party switching of trees arbitrarily. logging.debug("Skip unknown video log for %s/%s" % (user_id, vlog["video_id"])) continue parent_ids = [topic for vid in node_cache["Video"][vlog["video_id"]] for topic in vid["ancestor_ids"]] topic = set(parent_ids).intersection(set(topic_ids)) if not topic: logging.error("Could not find a topic for video %s (parents=%s)" % (vlog["video_id"], parent_ids)) continue topic = topic.pop() if not topic in topic_videos: topic_videos[topic] = get_topic_videos(path=node_cache["Topic"][topic][0]["path"]) videos_by_topic[topic] = videos_by_topic.get(topic, []) + [vlog] # Now compute stats for id in topic_ids:#set(topic_exercises.keys()).union(set(topic_videos.keys())): n_exercises = len(topic_exercises.get(id, [])) n_videos = len(topic_videos.get(id, [])) exercises = exercises_by_topic.get(id, []) videos = videos_by_topic.get(id, []) n_exercises_touched = len(exercises) n_videos_touched = len(videos) exercise_sparklines[id] = [el["completion_timestamp"] for el in filter(lambda n: n["complete"], exercises)] # total streak currently a pct, but expressed in max 100; convert to # proportion (like other percentages here) stats[id] = { "ex:pct_mastery": 0 if not n_exercises_touched else sum([el["complete"] for el in exercises]) / float(n_exercises), "ex:pct_started": 0 if not n_exercises_touched else n_exercises_touched / float(n_exercises), "ex:average_points": 0 if not n_exercises_touched else sum([el["points"] for el in exercises]) / float(n_exercises_touched), "ex:average_attempts": 0 if not n_exercises_touched else sum([el["attempts"] for el in exercises]) / float(n_exercises_touched), "ex:average_streak": 0 if not n_exercises_touched else sum([el["streak_progress"] for el in exercises]) / float(n_exercises_touched) / 100., "ex:total_struggling": 0 if not n_exercises_touched else sum([el["struggling"] for el in exercises]), "ex:last_completed": None if not n_exercises_touched else max_none([el["completion_timestamp"] or None for el in exercises]), "vid:pct_started": 0 if not n_videos_touched else n_videos_touched / float(n_videos), "vid:pct_completed": 0 if not n_videos_touched else sum([vl["complete"] for vl in videos]) / float(n_videos), "vid:total_minutes": 0 if not n_videos_touched else sum([vl["total_seconds_watched"] for vl in videos]) / 60., "vid:average_points": 0. if not n_videos_touched else float(sum([vl["points"] for vl in videos]) / float(n_videos_touched)), "vid:last_completed": None if not n_videos_touched else max_none([vl["completion_timestamp"] or None for vl in videos]), } context = plotting_metadata_context(request) return { "form": context["form"], "groups": context["groups"], "facilities": context["facilities"], "student": user, "topics": topics, "exercises": topic_exercises, "exercise_logs": exercises_by_topic, "video_logs": videos_by_topic, "exercise_sparklines": exercise_sparklines, "no_data": not exercise_logs and not video_logs, "stats": stats, "stat_defs": [ # this order determines the order of display {"key": "ex:pct_mastery", "title": _("% Mastery"), "type": "pct"}, {"key": "ex:pct_started", "title": _("% Started"), "type": "pct"}, {"key": "ex:average_points", "title": _("Average Points"), "type": "float"}, {"key": "ex:average_attempts", "title": _("Average Attempts"), "type": "float"}, {"key": "ex:average_streak", "title": _("Average Streak"), "type": "pct"}, {"key": "ex:total_struggling", "title": _("Struggling"), "type": "int"}, {"key": "ex:last_completed", "title": _("Last Completed"), "type": "date"}, {"key": "vid:pct_completed", "title": _("% Completed"), "type": "pct"}, {"key": "vid:pct_started", "title": _("% Started"), "type": "pct"}, {"key": "vid:total_minutes", "title": _("Average Minutes Watched"),"type": "float"}, {"key": "vid:average_points", "title": _("Average Points"), "type": "float"}, {"key": "vid:last_completed", "title": _("Last Completed"), "type": "date"}, ] }
def topic_tree(request, channel): return JsonResponse( get_topic_tree(channel=channel, language=request.language))