def recurse_nodes_to_extract_knowledge_map(node, node_cache): """ Internal function for recursing the topic tree and building the knowledge map. Requires rebranding of metadata done by recurse_nodes function. """ assert node["kind"] == "Topic" if node.get("in_knowledge_map", None): if node["slug"] not in knowledge_map["topics"]: logging.debug("Not in knowledge map: %s" % node["slug"]) node["in_knowledge_map"] = False for node in node_cache["Topic"][node["slug"]]: node["in_knowledge_map"] = False knowledge_topics[node["slug"]] = topic_tools.get_all_leaves(node, leaf_type="Exercise") if not knowledge_topics[node["slug"]]: sys.stderr.write("Removing topic from topic tree: no exercises. %s" % node["slug"]) del knowledge_topics[node["slug"]] del knowledge_map["topics"][node["slug"]] node["in_knowledge_map"] = False for node in node_cache["Topic"][node["slug"]]: node["in_knowledge_map"] = False else: if node["slug"] in knowledge_map["topics"]: sys.stderr.write("Removing topic from topic tree; does not belong. '%s'" % node["slug"]) logging.warn("Removing from knowledge map: %s" % node["slug"]) del knowledge_map["topics"][node["slug"]] for child in [n for n in node.get("children", []) if n["kind"] == "Topic"]: recurse_nodes_to_extract_knowledge_map(child, node_cache)
def validate_data(topictree, node_cache, slug2id_map, knowledge_map): # Validate related videos for exercise_nodes in node_cache['Exercise'].values(): exercise = exercise_nodes[0] exercise_path = os.path.join(settings.PROJECT_PATH, "static", "js", "khan-exercises", "exercises", "%s.html" % exercise["slug"]) if not os.path.exists(exercise_path): sys.stderr.write("Could not find exercise HTML file: %s\n" % exercise_path) for vid_slug in exercise.get("related_video_slugs", []): if vid_slug not in slug2id_map or slug2id_map[vid_slug] not in node_cache["Video"]: sys.stderr.write("Could not find related video %s in node_cache (from exercise %s)\n" % (vid_slug, exercise["slug"])) # Validate related exercises for video_nodes in node_cache["Video"].values(): video = video_nodes[0] ex = video["related_exercise"] if ex and ex["slug"] not in node_cache["Exercise"]: sys.stderr.write("Could not find related exercise %s in node_cache (from video %s)\n" % (ex["slug"], video["slug"])) # Validate all topics have leaves for topic_nodes in node_cache["Topic"].values(): topic = topic_nodes[0] if not topic_tools.get_topic_by_path(topic["path"], root_node=topictree).get("children"): sys.stderr.write("Could not find any children for topic %s\n" % (topic["path"])) # Validate all topics in knowledge map are in the node cache for slug in knowledge_map["topics"]: if slug not in node_cache["Topic"]: sys.stderr.write("Unknown topic in knowledge map: %s\n" % slug) topicdata_path = os.path.join(settings.PROJECT_PATH + "/static/data/", "topicdata", "%s.json" % slug) if not os.path.exists(topicdata_path): sys.stderr.write("Could not find topic data in topicdata directory: '%s'\n" % slug) # Validate all topics in node-cache are in (or out) of knowledge map, as requested. for topic_nodes in node_cache["Topic"].values(): topic = topic_nodes[0] if topic["in_knowledge_map"] and not topic["slug"] in knowledge_map["topics"]: sys.stderr.write("Topic '%-40s' not in knowledge map, but node_cache says it should be.\n" % topic["slug"]) elif not topic["in_knowledge_map"] and topic["slug"] in knowledge_map["topics"]: sys.stderr.write("Topic '%-40s' in knowledge map, but node_cache says it shouldn't be.\n" % topic["slug"]) elif topic["in_knowledge_map"] and not topic_tools.get_topic_by_path(topic["path"], root_node=topictree).get("children"): sys.stderr.write("Topic '%-40s' in knowledge map, but has no children.\n" % topic["slug"]) elif topic["in_knowledge_map"] and not topic_tools.get_all_leaves(topic_tools.get_topic_by_path(topic["path"], root_node=topictree), leaf_type="Exercise"): sys.stderr.write("Topic '%40s' in knowledge map, but has no exercises.\n" % topic["slug"])
def recurse_nodes_to_extract_knowledge_map(node, node_cache): """ Internal function for recursing the topic tree and building the knowledge map. Requires rebranding of metadata done by recurse_nodes function. """ assert node["kind"] == "Topic" if node.get("in_knowledge_map", None): if node["slug"] not in knowledge_map["topics"]: logging.debug("Not in knowledge map: %s" % node["slug"]) node["in_knowledge_map"] = False for node in node_cache["Topic"][node["slug"]]: node["in_knowledge_map"] = False knowledge_topics[node["slug"]] = topic_tools.get_all_leaves( node, leaf_type="Exercise") if not knowledge_topics[node["slug"]]: sys.stderr.write( "Removing topic from topic tree: no exercises. %s" % node["slug"]) del knowledge_topics[node["slug"]] del knowledge_map["topics"][node["slug"]] node["in_knowledge_map"] = False for node in node_cache["Topic"][node["slug"]]: node["in_knowledge_map"] = False else: if node["slug"] in knowledge_map["topics"]: sys.stderr.write( "Removing topic from topic tree; does not belong. '%s'" % node["slug"]) logging.warn("Removing from knowledge map: %s" % node["slug"]) del knowledge_map["topics"][node["slug"]] for child in [ n for n in node.get("children", []) if n["kind"] == "Topic" ]: recurse_nodes_to_extract_knowledge_map(child, node_cache)
def validate_data(topictree, node_cache, knowledge_map): # Validate related videos for exercise in node_cache['Exercise'].values(): exercise_path = os.path.join(settings.PROJECT_PATH, "static", "js", "khan-exercises", "exercises", "%s.html" % exercise["slug"]) if not os.path.exists(exercise_path): sys.stderr.write("Could not find exercise HTML file: %s\n" % exercise_path) for vid in exercise.get("related_video_readable_ids", []): if not vid in node_cache["Video"]: sys.stderr.write( "Could not find related video %s in node_cache (from exercise %s)\n" % (vid, exercise["slug"])) # Validate related exercises for video in node_cache["Video"].values(): ex = video["related_exercise"] if ex and not ex["slug"] in node_cache["Exercise"]: sys.stderr.write( "Could not find related exercise %s in node_cache (from video %s)\n" % (ex["slug"], video["slug"])) # Validate all topics have leaves for topic in node_cache["Topic"].values(): if not topic_tools.get_topic_by_path( topic["path"], root_node=topictree).get("children"): sys.stderr.write("Could not find any children for topic %s\n" % (topic["path"])) # Validate all topics in knowledge map are in the node cache for slug in knowledge_map["topics"]: if slug not in node_cache["Topic"]: sys.stderr.write("Unknown topic in knowledge map: %s\n" % slug) topicdata_path = os.path.join(settings.PROJECT_PATH + "/static/data/", "topicdata", "%s.json" % slug) if not os.path.exists(topicdata_path): sys.stderr.write( "Could not find topic data in topicdata directory: '%s'\n" % slug) # Validate all topics in node-cache are in (or out) of knowledge map, as requested. for topic in node_cache["Topic"].values(): if topic["in_knowledge_map"] and not topic["slug"] in knowledge_map[ "topics"]: sys.stderr.write( "Topic '%-40s' not in knowledge map, but node_cache says it should be.\n" % topic["slug"]) elif not topic["in_knowledge_map"] and topic["slug"] in knowledge_map[ "topics"]: sys.stderr.write( "Topic '%-40s' in knowledge map, but node_cache says it shouldn't be.\n" % topic["slug"]) elif topic["in_knowledge_map"] and not topic_tools.get_topic_by_path( topic["path"], root_node=topictree).get("children"): sys.stderr.write( "Topic '%-40s' in knowledge map, but has no children.\n" % topic["slug"]) elif topic["in_knowledge_map"] and not topic_tools.get_all_leaves( topic_tools.get_topic_by_path(topic["path"], root_node=topictree), leaf_type="Exercise"): sys.stderr.write( "Topic '%40s' in knowledge map, but has no exercises.\n" % topic["slug"])