Beispiel #1
0
def initialize_content_caches(force=False):
    """
    Catch all function to regenerate any content caches in memory that need annotation
    with file availability
    """
    logging.info("Preloading content data.")
    topic_tools.get_content_cache(force=force, annotate=True)
    logging.info("Preloading topic tree data.")
    topic_tools.get_topic_tree(force=force, annotate=True)
Beispiel #2
0
def initialize_content_caches(force=False):
    """
    Catch all function to regenerate any content caches in memory that need annotation
    with file availability
    """
    for lang in i18n.get_installed_language_packs(force=True).keys():
        logging.info("Preloading exercise data for language {lang}.".format(lang=lang))
        topic_tools.get_exercise_cache(force=force, language=lang)
        logging.info("Preloading content data for language {lang}.".format(lang=lang))
        topic_tools.get_content_cache(force=force, annotate=True, language=lang)
        logging.info("Preloading topic tree data for language {lang}.".format(lang=lang))
        topic_tools.get_topic_tree(force=force, annotate=True, language=lang)
Beispiel #3
0
def splat_handler(request, splat):
    slugs = filter(lambda x: x, splat.split("/"))
    current_node = topic_tools.get_topic_tree()
    while current_node:
        match = [ch for ch in (current_node.get('children') or []) if request.path.startswith(ch["path"])]
        if len(match) > 1:  # can only happen for leaf nodes (only when one node is blank?)
            match = [m for m in match if request.path == m["path"]]
        if not match:
            raise Http404
        current_node = match[0]
        if request.path == current_node["path"]:
            break

    if current_node["kind"] == "Topic":
        return topic_handler(request, cached_nodes={"topic": current_node})
    elif current_node["kind"] == "Video":
        prev, next = topic_tools.get_neighbor_nodes(current_node, neighbor_kind=current_node["kind"])
        return video_handler(request, cached_nodes={"video": current_node, "prev": prev, "next": next})
    elif current_node["kind"] == "Exercise":
        cached_nodes = topic_tools.get_related_videos(current_node, limit_to_available=False)
        cached_nodes["exercise"] = current_node
        cached_nodes["prev"], cached_nodes["next"] = topic_tools.get_neighbor_nodes(current_node, neighbor_kind=current_node['kind'])
        return exercise_handler(request, cached_nodes=cached_nodes)
    else:
        raise Http404
Beispiel #4
0
def get_annotated_topic_tree(request, lang_code=None):
    call_command("videoscan")  # Could potentially be very slow, blocking request... but at least it's via an API request!

    lang_code = lang_code or request.language      # Get annotations for the current language.
    statusdict = dict(VideoFile.objects.values_list("youtube_id", "percent_complete"))

    return JsonResponse(annotate_topic_tree(get_topic_tree(language=lang_code), statusdict=statusdict, lang_code=lang_code))
Beispiel #5
0
    def setUp(self):
        super(UpdatesTestCase, self).setUp()

        # Set up the topic tree
        stamp_availability_on_topic(get_topic_tree(),
                                    force=True,
                                    stamp_urls=True)
Beispiel #6
0
def initialize_content_caches(force=False):
    """
    Catch all function to regenerate any content caches in memory that need annotation
    with file availability
    """
    for lang in i18n.get_installed_language_packs(force=True).keys():
        logging.info(
            "Preloading exercise data for language {lang}.".format(lang=lang))
        topic_tools.get_exercise_cache(force=force, language=lang)
        logging.info(
            "Preloading content data for language {lang}.".format(lang=lang))
        topic_tools.get_content_cache(force=force,
                                      annotate=True,
                                      language=lang)
        logging.info("Preloading topic tree data for language {lang}.".format(
            lang=lang))
        topic_tools.get_topic_tree(force=force, annotate=True, language=lang)
Beispiel #7
0
def regenerate_all_pages_related_to_videos(video_ids):
    """Regenerate all webpages related to a specific list of videos.  This is good for increasing new server performance."""
    paths_to_regenerate = set()  # unique set
    for video_id in video_ids:

        for video_path in topic_tools.get_video_page_paths(video_id=video_id):
            paths_to_regenerate = paths_to_regenerate.union(
                generate_all_paths(path=video_path, base_path=topic_tools.get_topic_tree()["path"])
            )  # start at the root
        for exercise_path in topic_tools.get_exercise_page_paths(video_id=video_id):
            paths_to_regenerate = paths_to_regenerate.union(
                generate_all_paths(path=exercise_path, base_path=topic_tools.get_topic_tree()["path"])
            )  # start at the root

    # Now, regenerate any page.
    for path in paths_to_regenerate:
        create_cache_entry(path=path, force=True)

    return paths_to_regenerate
Beispiel #8
0
def get_annotated_topic_tree(request, lang_code=None):
    call_command(
        "videoscan"
    )  # Could potentially be very slow, blocking request... but at least it's via an API request!

    lang_code = lang_code or request.language  # Get annotations for the current language.
    statusdict = dict(
        VideoFile.objects.values_list("youtube_id", "percent_complete"))

    return JsonResponse(
        annotate_topic_tree(get_topic_tree(),
                            statusdict=statusdict,
                            lang_code=lang_code))
Beispiel #9
0
def regenerate_all_pages_related_to_videos(video_ids):
    """Regenerate all webpages related to a specific list of videos.  This is good for increasing new server performance."""
    paths_to_regenerate = set()  # unique set
    for video_id in video_ids:

        for video_path in topic_tools.get_video_page_paths(video_id=video_id):
            paths_to_regenerate = paths_to_regenerate.union(
                generate_all_paths(path=video_path,
                                   base_path=topic_tools.get_topic_tree()
                                   ['path']))  # start at the root
        for exercise_path in topic_tools.get_exercise_page_paths(
                video_id=video_id):
            paths_to_regenerate = paths_to_regenerate.union(
                generate_all_paths(path=exercise_path,
                                   base_path=topic_tools.get_topic_tree()
                                   ['path']))  # start at the root

    # Now, regenerate any page.
    for path in paths_to_regenerate:
        create_cache_entry(path=path, force=True)

    return paths_to_regenerate
Beispiel #10
0
def invalidate_all_pages_related_to_video(video_id=None):
    """Given a video file, recurse backwards up the hierarchy and invalidate all pages.
    Also include video pages and related exercise pages.
    """

    # Expire all video files and related paths
    video_paths = topic_tools.get_video_page_paths(video_id=video_id)
    exercise_paths = topic_tools.get_exercise_page_paths(video_id=video_id)
    leaf_paths = set(video_paths).union(set(exercise_paths))

    for leaf_path in leaf_paths:
        all_paths = generate_all_paths(path=leaf_path, base_path=topic_tools.get_topic_tree()["path"])
        for path in filter(has_cache_key, all_paths):  # start at the root
            expire_page(path=path)
Beispiel #11
0
def invalidate_all_pages_related_to_video(video_id=None):
    """Given a video file, recurse backwards up the hierarchy and invalidate all pages.
    Also include video pages and related exercise pages.
    """

    # Expire all video files and related paths
    video_paths = topic_tools.get_video_page_paths(video_id=video_id)
    exercise_paths = topic_tools.get_exercise_page_paths(video_id=video_id)
    leaf_paths = set(video_paths).union(set(exercise_paths))

    for leaf_path in leaf_paths:
        all_paths = generate_all_paths(
            path=leaf_path, base_path=topic_tools.get_topic_tree()['path'])
        for path in filter(has_cache_key, all_paths):  # start at the root
            expire_page(path=path)
Beispiel #12
0
    def refresh_topic_cache_wrapper_fn(request, cached_nodes={}, force=False, *args, **kwargs):
        """
        Centralized logic for how to refresh the topic cache, for each type of object.

        When the object is desired to be used, this code runs to refresh data,
        balancing between correctness and efficiency.
        """
        if not cached_nodes:
            cached_nodes = {"topics": topic_tools.get_topic_tree()}

        def has_computed_urls(node):
            return "subtitles" in node.get("availability", {}).get("en", {})

        for node in cached_nodes.values():
            if not node:
                continue
            has_children = bool(node.get("children"))

            # Propertes not yet marked
            if node["kind"] == "Video":
                if force or not has_computed_urls(node):
                    recount_videos_and_invalidate_parents(topic_tools.get_parent(node), force=True, stamp_urls=True)

            elif node["kind"] == "Exercise":
                for video in topic_tools.get_related_videos(exercise=node).values():
                    if not has_computed_urls(node):
                        stamp_availability_on_video(video, force=True)  # will be done by force below

            elif node["kind"] == "Topic":
                bottom_layer_topic =  "Topic" not in node["contains"]
                # always run do_video_counts_need_update_question_mark(), to make sure the (internal) counts stay up to date.
                force = do_video_counts_need_update_question_mark() or force or bottom_layer_topic
                recount_videos_and_invalidate_parents(
                    node,
                    force=force,
                    stamp_urls=bottom_layer_topic,
                )

        kwargs.update(cached_nodes)
        return handler(request, *args, **kwargs)
def get_neighbors_at_dist_1(topic_index, subtopic_index, topic):
    """Return a list of the neighbors at distance 1 from the specified subtopic."""

    neighbors = []  #neighbor list to be returned

    tree = get_topic_tree(parent="root")

    #pointers to the previous and next subtopic (list indices)
    prev = subtopic_index - 1 
    next = subtopic_index + 1

    #if there is a previous topic (neighbor to left)
    if(prev > -1 ):
        neighbors.append(topic['children'][prev] + ' 1') # neighbor on the left side

    #else check if there is a neighboring topic (left)    
    else:
        if (topic_index-1) > -1:
            neighbor_length = len(tree[(topic_index-1)]['children'])
            neighbors.append(tree[(topic_index-1)]['children'][(neighbor_length-1)] + ' 4')

        else:
            neighbors.append(' ') # no neighbor to the left

    #if there is a neighbor to the right
    if(next < len(topic['children'])):
        neighbors.append(topic['children'][next] + ' 1') # neighbor on the right side

    #else check if there is a neighboring topic (right)
    else:
        if (topic_index + 1) < len(tree):
            #the 4 denotes the # of nodes in path to this other node, will always be 4
            neighbors.append(tree[(topic_index+1)]['children'][0] + ' 4') 

        else:
            neighbors.append(' ') # no neighbor on right side


    return neighbors
Beispiel #14
0
def get_neighbors_at_dist_1(topic_index, subtopic_index, topic):
    """Return a list of the neighbors at distance 1 from the specified subtopic."""

    neighbors = []  #neighbor list to be returned

    tree = get_topic_tree(parent="root")

    #pointers to the previous and next subtopic (list indices)
    prev = subtopic_index - 1 
    next = subtopic_index + 1

    #if there is a previous topic (neighbor to left)
    if(prev > -1 ):
        neighbors.append(topic['children'][prev] + ' 1') # neighbor on the left side

    #else check if there is a neighboring topic (left)    
    else:
        if (topic_index-1) > -1:
            neighbor_length = len(tree[(topic_index-1)]['children'])
            neighbors.append(tree[(topic_index-1)]['children'][(neighbor_length-1)] + ' 4')

        else:
            neighbors.append(' ') # no neighbor to the left

    #if there is a neighbor to the right
    if(next < len(topic['children'])):
        neighbors.append(topic['children'][next] + ' 1') # neighbor on the right side

    #else check if there is a neighboring topic (right)
    else:
        if (topic_index + 1) < len(tree):
            #the 4 denotes the # of nodes in path to this other node, will always be 4
            neighbors.append(tree[(topic_index+1)]['children'][0] + ' 4') 

        else:
            neighbors.append(' ') # no neighbor on right side


    return neighbors
Beispiel #15
0
def get_exercise_parents_lookup_table():
    """Return a dictionary with exercise ids as keys and topic_ids as values."""

    global exercise_parents_lookup_table

    if exercise_parents_lookup_table:
        return exercise_parents_lookup_table

    ### topic tree for traversal###
    tree = get_topic_tree(parent="root")

    #3 possible layers
    for topic in tree:
        for subtopic_id in topic['children']:
            exercises = get_topic_exercises(subtopic_id)

            for ex in exercises:
                if ex['id'] not in exercise_parents_lookup_table:
                    exercise_parents_lookup_table[ ex['id'] ] = {
                        "subtopic_id": subtopic_id,
                        "topic_id": topic['id'],
                    }

    return exercise_parents_lookup_table
def get_exercise_parents_lookup_table():
    """Return a dictionary with exercise ids as keys and topic_ids as values."""

    global exercise_parents_lookup_table

    if exercise_parents_lookup_table:
        return exercise_parents_lookup_table

    ### topic tree for traversal###
    tree = get_topic_tree(parent="root")

    #3 possible layers
    for topic in tree:
        for subtopic_id in topic['children']:
            exercises = get_topic_exercises(subtopic_id)

            for ex in exercises:
                if ex['id'] not in exercise_parents_lookup_table:
                    exercise_parents_lookup_table[ ex['id'] ] = {
                        "subtopic_id": subtopic_id,
                        "topic_id": topic['id'],
                    }

    return exercise_parents_lookup_table
Beispiel #17
0
 def preload_global_data():
     logging.info("Preloading topic data.")
     stamp_availability_on_topic(get_topic_tree(), force=True, stamp_urls=True)
Beispiel #18
0
def topic_tree(request, channel):
    parent = request.GET.get("parent")
    return JsonResponse(
        get_topic_tree(channel=channel,
                       language=request.language,
                       parent=parent))
Beispiel #19
0
    def setUp(self):
        super(UpdatesTestCase, self).setUp()

        # Set up the topic tree
        stamp_availability_on_topic(get_topic_tree(), force=True, stamp_urls=True)
Beispiel #20
0
def topic_tree(request, channel):
    parent = request.GET.get("parent")
    return JsonResponse(get_topic_tree(channel=channel, language=request.language, parent=parent))
Beispiel #21
0
def watch_home(request):
    """Dummy wrapper function for topic_handler with url=/"""
    return topic_handler(request, cached_nodes={"topic": topic_tools.get_topic_tree()})
Beispiel #22
0
def watch_home(request):
    """Dummy wrapper function for topic_handler with url=/"""
    return topic_handler(request, cached_nodes={"topic": topic_tools.get_topic_tree()})
Beispiel #23
0
def topic_tree(request, channel):
    return JsonResponse(get_topic_tree(channel=channel))
Beispiel #24
0
 def preload_global_data():
     logging.info("Preloading topic data.")
     stamp_availability_on_topic(get_topic_tree(),
                                 force=True,
                                 stamp_urls=True)
def generate_recommendation_data():
    """Traverses topic tree to generate a dictionary with related subtopics per subtopic."""

    global recommendation_data
    if recommendation_data:
        return recommendation_data

    ### populate data exploiting structure of topic tree ###
    tree = get_topic_tree(parent="root")

    ######## DYNAMIC ALG #########

    ##
    # ITERATION 1 - grabs all immediate neighbors of each subtopic
    ##

    #array indices for the current topic and subtopic
    topic_index = 0
    subtopic_index = 0

    #for each topic 
    for topic in tree:

        subtopic_index = 0

        #for each subtopic add the neighbors at distance 0 and 1 (at dist one has 2 for each)
        for subtopic_id in topic['children']:

            neighbors_dist_1 = get_neighbors_at_dist_1(topic_index, subtopic_index, topic)

            #add to recommendation_data - distance 0 (itself) + distance 1
            recommendation_data[ subtopic_id ] = { 'related_subtopics' : ([subtopic_id + ' 0'] + neighbors_dist_1) }
            subtopic_index+=1
            
        topic_index+=1

    ##
    # ITERATION 2 - grabs all subsequent neighbors of each subtopic via 
    # Breadth-first search (BFS)
    ##

    #loop through all subtopics currently in recommendation_data dict
    for subtopic in recommendation_data:
        related = recommendation_data[subtopic]['related_subtopics'] # list of related subtopics (right now only 2)
        other_neighbors = get_subsequent_neighbors(related, recommendation_data, subtopic)
        recommendation_data[subtopic]['related_subtopics'] += other_neighbors ##append new neighbors


    ##
    # ITERATION 2.5 - Sort all results by increasing distance and to strip the final
    # result of all distance values in recommendation_data (note that there are only 3 possible: 0,1,4).
    ##

    #for each item in recommendation_data
    for subtopic in recommendation_data:
        at_dist_4 = []          #array to hold the subtopic ids of recs at distance 4
        at_dist_lt_4 = []       #array to hold subtopic ids of recs at distance 0 or 1

        #for this item, loop through all recommendations
        for recc in recommendation_data[subtopic]['related_subtopics']:
            if recc.split(" ")[1] == '4':   #if at dist 4, add to the array
                at_dist_4.append(recc.split(" ")[0]) 
            else:
                at_dist_lt_4.append(recc.split(" ")[0])

       
        sorted_related = at_dist_lt_4 + at_dist_4 #append later items at end of earlier
        recommendation_data[subtopic]['related_subtopics'] = sorted_related



    return recommendation_data
Beispiel #26
0
def student_view_context(request, xaxis="pct_mastery", yaxis="ex:attempts"):
    """
    Context done separately, to be importable for similar pages.
    """
    user = get_user_from_request(request=request)
    if not user:
        raise Http404("User not found.")

    node_cache = get_node_cache()
    topic_ids = get_knowledgemap_topics()
    topic_ids = topic_ids + [ch["id"] for node in get_topic_tree()["children"] for ch in node["children"] if node["id"] != "math"]
    topics = [node_cache["Topic"][id][0] for id in topic_ids]

    user_id = user.id
    exercise_logs = list(ExerciseLog.objects \
        .filter(user=user) \
        .values("exercise_id", "complete", "points", "attempts", "streak_progress", "struggling", "completion_timestamp"))
    video_logs = list(VideoLog.objects \
        .filter(user=user) \
        .values("video_id", "complete", "total_seconds_watched", "points", "completion_timestamp"))

    exercise_sparklines = dict()
    stats = dict()
    topic_exercises = dict()
    topic_videos = dict()
    exercises_by_topic = dict()
    videos_by_topic = dict()

    # Categorize every exercise log into a "midlevel" exercise
    for elog in exercise_logs:
        if not elog["exercise_id"] in node_cache["Exercise"]:
            # Sometimes KA updates their topic tree and eliminates exercises;
            #   we also want to support 3rd party switching of trees arbitrarily.
            logging.debug("Skip unknown exercise log for %s/%s" % (user_id, elog["exercise_id"]))
            continue

        parent_ids = [topic for ex in node_cache["Exercise"][elog["exercise_id"]] for topic in ex["ancestor_ids"]]
        topic = set(parent_ids).intersection(set(topic_ids))
        if not topic:
            logging.error("Could not find a topic for exercise %s (parents=%s)" % (elog["exercise_id"], parent_ids))
            continue
        topic = topic.pop()
        if not topic in topic_exercises:
            topic_exercises[topic] = get_topic_exercises(path=node_cache["Topic"][topic][0]["path"])
        exercises_by_topic[topic] = exercises_by_topic.get(topic, []) + [elog]

    # Categorize every video log into a "midlevel" exercise.
    for vlog in video_logs:
        if not vlog["video_id"] in node_cache["Video"]:
            # Sometimes KA updates their topic tree and eliminates videos;
            #   we also want to support 3rd party switching of trees arbitrarily.
            logging.debug("Skip unknown video log for %s/%s" % (user_id, vlog["video_id"]))
            continue

        parent_ids = [topic for vid in node_cache["Video"][vlog["video_id"]] for topic in vid["ancestor_ids"]]
        topic = set(parent_ids).intersection(set(topic_ids))
        if not topic:
            logging.error("Could not find a topic for video %s (parents=%s)" % (vlog["video_id"], parent_ids))
            continue
        topic = topic.pop()
        if not topic in topic_videos:
            topic_videos[topic] = get_topic_videos(path=node_cache["Topic"][topic][0]["path"])
        videos_by_topic[topic] = videos_by_topic.get(topic, []) + [vlog]


    # Now compute stats
    for id in topic_ids:#set(topic_exercises.keys()).union(set(topic_videos.keys())):
        n_exercises = len(topic_exercises.get(id, []))
        n_videos = len(topic_videos.get(id, []))

        exercises = exercises_by_topic.get(id, [])
        videos = videos_by_topic.get(id, [])
        n_exercises_touched = len(exercises)
        n_videos_touched = len(videos)

        exercise_sparklines[id] = [el["completion_timestamp"] for el in filter(lambda n: n["complete"], exercises)]

        # total streak currently a pct, but expressed in max 100; convert to
        # proportion (like other percentages here)
        stats[id] = {
            "ex:pct_mastery":      0 if not n_exercises_touched else sum([el["complete"] for el in exercises]) / float(n_exercises),
            "ex:pct_started":      0 if not n_exercises_touched else n_exercises_touched / float(n_exercises),
            "ex:average_points":   0 if not n_exercises_touched else sum([el["points"] for el in exercises]) / float(n_exercises_touched),
            "ex:average_attempts": 0 if not n_exercises_touched else sum([el["attempts"] for el in exercises]) / float(n_exercises_touched),
            "ex:average_streak":   0 if not n_exercises_touched else sum([el["streak_progress"] for el in exercises]) / float(n_exercises_touched) / 100.,
            "ex:total_struggling": 0 if not n_exercises_touched else sum([el["struggling"] for el in exercises]),
            "ex:last_completed": None if not n_exercises_touched else max_none([el["completion_timestamp"] or None for el in exercises]),

            "vid:pct_started":      0 if not n_videos_touched else n_videos_touched / float(n_videos),
            "vid:pct_completed":    0 if not n_videos_touched else sum([vl["complete"] for vl in videos]) / float(n_videos),
            "vid:total_minutes":      0 if not n_videos_touched else sum([vl["total_seconds_watched"] for vl in videos]) / 60.,
            "vid:average_points":   0. if not n_videos_touched else float(sum([vl["points"] for vl in videos]) / float(n_videos_touched)),
            "vid:last_completed": None if not n_videos_touched else max_none([vl["completion_timestamp"] or None for vl in videos]),
        }

    context = plotting_metadata_context(request)

    return {
        "form": context["form"],
        "groups": context["groups"],
        "facilities": context["facilities"],
        "student": user,
        "topics": topics,
        "exercises": topic_exercises,
        "exercise_logs": exercises_by_topic,
        "video_logs": videos_by_topic,
        "exercise_sparklines": exercise_sparklines,
        "no_data": not exercise_logs and not video_logs,
        "stats": stats,
        "stat_defs": [  # this order determines the order of display
            {"key": "ex:pct_mastery",      "title": _("% Mastery"),        "type": "pct"},
            {"key": "ex:pct_started",      "title": _("% Started"),        "type": "pct"},
            {"key": "ex:average_points",   "title": _("Average Points"),   "type": "float"},
            {"key": "ex:average_attempts", "title": _("Average Attempts"), "type": "float"},
            {"key": "ex:average_streak",   "title": _("Average Streak"),   "type": "pct"},
            {"key": "ex:total_struggling", "title": _("Struggling"),       "type": "int"},
            {"key": "ex:last_completed",   "title": _("Last Completed"),   "type": "date"},
            {"key": "vid:pct_completed",   "title": _("% Completed"),      "type": "pct"},
            {"key": "vid:pct_started",     "title": _("% Started"),        "type": "pct"},
            {"key": "vid:total_minutes",   "title": _("Average Minutes Watched"),"type": "float"},
            {"key": "vid:average_points",  "title": _("Average Points"),   "type": "float"},
            {"key": "vid:last_completed",  "title": _("Last Completed"),   "type": "date"},
        ]
    }
Beispiel #27
0
def topic_tree(request, channel):
    return JsonResponse(
        get_topic_tree(channel=channel, language=request.language))
Beispiel #28
0
def generate_recommendation_data():
    """Traverses topic tree to generate a dictionary with related subtopics per subtopic."""

    global recommendation_data
    if recommendation_data:
        return recommendation_data

    ### populate data exploiting structure of topic tree ###
    tree = get_topic_tree(parent="root")

    ######## DYNAMIC ALG #########

    ##
    # ITERATION 1 - grabs all immediate neighbors of each subtopic
    ##

    #array indices for the current topic and subtopic
    topic_index = 0
    subtopic_index = 0

    #for each topic 
    for topic in tree:

        subtopic_index = 0

        #for each subtopic add the neighbors at distance 0 and 1 (at dist one has 2 for each)
        for subtopic_id in topic['children']:

            neighbors_dist_1 = get_neighbors_at_dist_1(topic_index, subtopic_index, topic)

            #add to recommendation_data - distance 0 (itself) + distance 1
            recommendation_data[ subtopic_id ] = { 'related_subtopics' : ([subtopic_id + ' 0'] + neighbors_dist_1) }
            subtopic_index+=1
            
        topic_index+=1

    ##
    # ITERATION 2 - grabs all subsequent neighbors of each subtopic via 
    # Breadth-first search (BFS)
    ##

    #loop through all subtopics currently in recommendation_data dict
    for subtopic in recommendation_data:
        related = recommendation_data[subtopic]['related_subtopics'] # list of related subtopics (right now only 2)
        other_neighbors = get_subsequent_neighbors(related, recommendation_data, subtopic)
        recommendation_data[subtopic]['related_subtopics'] += other_neighbors ##append new neighbors


    ##
    # ITERATION 2.5 - Sort all results by increasing distance and to strip the final
    # result of all distance values in recommendation_data (note that there are only 3 possible: 0,1,4).
    ##

    #for each item in recommendation_data
    for subtopic in recommendation_data:
        at_dist_4 = []          #array to hold the subtopic ids of recs at distance 4
        at_dist_lt_4 = []       #array to hold subtopic ids of recs at distance 0 or 1

        #for this item, loop through all recommendations
        for recc in recommendation_data[subtopic]['related_subtopics']:
            if recc.split(" ")[1] == '4':   #if at dist 4, add to the array
                at_dist_4.append(recc.split(" ")[0]) 
            else:
                at_dist_lt_4.append(recc.split(" ")[0])

       
        sorted_related = at_dist_lt_4 + at_dist_4 #append later items at end of earlier
        recommendation_data[subtopic]['related_subtopics'] = sorted_related



    return recommendation_data