def get_recommendation_tree(data): """Returns a dictionary of related exercises for each subtopic. Args: data -- a dictionary with each subtopic and its related_subtopics (from generate_recommendation_data()) """ recommendation_tree = {} # tree to return #loop through all subtopics passed in data for subtopic in data: recommendation_tree[str(subtopic)] = [ ] #initialize an empty list for the current s.t. related_subtopics = data[subtopic][ 'related_subtopics'] #list of related subtopic ids #loop through all of the related subtopics for rel_subtopic in related_subtopics: #make sure related is not an empty string (shouldn't happen but to be safe) if rel_subtopic: exercises = get_topic_contents(topic_id=rel_subtopic, kinds=["Exercise"]) for ex in exercises: recommendation_tree[str(subtopic)].append(ex['id']) return recommendation_tree
def get_exercise_parents_lookup_table(): """Return a dictionary with exercise ids as keys and topic_ids as values.""" global exercise_parents_lookup_table if exercise_parents_lookup_table: return exercise_parents_lookup_table ### topic tree for traversal### tree = get_topic_nodes_with_children(parent="root") #3 possible layers for topic in tree: for subtopic_id in topic['children']: exercises = get_topic_contents(topic_id=subtopic_id, kinds=["Exercise"]) for ex in exercises: if ex['id'] not in exercise_parents_lookup_table: exercise_parents_lookup_table[ex['id']] = { "subtopic_id": subtopic_id, "topic_id": topic['id'], } return exercise_parents_lookup_table
def get_recommendation_tree(data): """Returns a dictionary of related exercises for each subtopic. Args: data -- a dictionary with each subtopic and its related_subtopics (from generate_recommendation_data()) """ recommendation_tree = {} # tree to return #loop through all subtopics passed in data for subtopic in data: recommendation_tree[str(subtopic)] = [] #initialize an empty list for the current s.t. related_subtopics = data[subtopic]['related_subtopics'] #list of related subtopic ids #loop through all of the related subtopics for rel_subtopic in related_subtopics: #make sure related is not an empty string (shouldn't happen but to be safe) if rel_subtopic: exercises = get_topic_contents(topic_id=rel_subtopic, kinds=["Exercise"]) for ex in exercises: recommendation_tree[str(subtopic)].append(ex['id']) return recommendation_tree
def get_exercise_parents_lookup_table(): """Return a dictionary with exercise ids as keys and topic_ids as values.""" global exercise_parents_lookup_table if exercise_parents_lookup_table: return exercise_parents_lookup_table ### topic tree for traversal### tree = get_topic_nodes_with_children(parent="root") #3 possible layers for topic in tree: for subtopic_id in topic['children']: exercises = get_topic_contents(topic_id=subtopic_id, kinds=["Exercise"]) if exercises is None: raise RuntimeError("Caught exception, tried to find topic contents for {}".format(subtopic_id)) for ex in exercises: if ex['id'] not in exercise_parents_lookup_table: exercise_parents_lookup_table[ ex['id'] ] = { "subtopic_id": subtopic_id, "topic_id": topic['id'], } return exercise_parents_lookup_table
def return_log_type_details(log_type, topic_ids=None, language=None): fields = [ "user", "points", "complete", "completion_timestamp", "completion_counter", "latest_activity_timestamp" ] if log_type == "exercise": LogModel = ExerciseLog fields.extend([ "exercise_id", "attempts", "struggling", "streak_progress", "attempts_before_completion" ]) obj_id_field = "exercise_id__in" elif log_type == "video": LogModel = VideoLog fields.extend(["video_id", "total_seconds_watched"]) obj_id_field = "video_id__in" elif log_type == "content": LogModel = ContentLog fields.extend(["content_id", "progress"]) obj_id_field = "content_id__in" else: return None id_field = obj_id_field.split("__")[0] if topic_ids: objects = [ obj for topic_id in topic_ids for obj in get_topic_contents( topic_id=topic_id, kinds=[log_type.title()], language=language) ] obj_ids = {obj_id_field: [obj.get("id") for obj in objects]} else: objects = [] obj_ids = {} return LogModel, fields, id_field, obj_ids, objects
def get_playlist_entry_ids(cls, playlist): """Return a tuple of the playlist's video ids and exercise ids as sets""" items = get_topic_contents(topic_id=playlist.get("id")) pl_video_ids = set( [item.get("id") for item in items if item.get("kind") == "Video"]) pl_exercise_ids = set([ item.get("id") for item in items if item.get("kind") == "Exercise" ]) return (pl_video_ids, pl_exercise_ids)
def get_exercises_from_topics(topicId_list): """Return an ordered list of the first 5 exercise ids under a given subtopic/topic.""" exs = [] for topic in topicId_list: if topic: exercises = get_topic_contents(topic_id=topic, kinds=["Exercise"])[:5] #can change this line to allow for more to be returned for e in exercises: exs.append(e['id']) # only add the id to the list return exs
def get_exercises_from_topics(topicId_list): """Return an ordered list of the first 5 exercise ids under a given subtopic/topic.""" exs = [] for topic in topicId_list: if topic: exercises = get_topic_contents(topic_id=topic, kinds=[ "Exercise" ])[:5] #can change this line to allow for more to be returned for e in exercises: exs += [e['id']] #only add the id to the list return exs
def return_log_type_details(log_type, topic_ids=None): fields = ["user", "points", "complete", "completion_timestamp", "completion_counter"] if log_type == "exercise": LogModel = ExerciseLog fields.extend(["exercise_id", "attempts", "struggling", "streak_progress", "attempts_before_completion"]) obj_id_field = "exercise_id__in" elif log_type == "video": LogModel = VideoLog fields.extend(["video_id", "total_seconds_watched"]) obj_id_field = "video_id__in" elif log_type == "content": LogModel = ContentLog fields.extend(["content_id", "progress"]) obj_id_field = "content_id__in" else: return None id_field = obj_id_field.split("__")[0] if topic_ids: objects = [obj for topic_id in topic_ids for obj in get_topic_contents(topic_id=topic_id, kinds=[log_type.title()])] obj_ids = {obj_id_field: [obj.get("id") for obj in objects]} else: objects = [] obj_ids = {} return LogModel, fields, id_field, obj_ids, objects
def generate_fake_video_logs(facility_user=None, topics=topics, start_date=datetime.datetime.now() - datetime.timedelta(days=30 * 6)): """Add video logs for the given topics, for each of the given users. If no users are given, they are created. If no topics exist, they are taken from the list at the top of this file.""" date_diff = datetime.datetime.now() - start_date video_logs = [] # It's not a user: probably a list. # Recursive case if not hasattr(facility_user, "username"): # It's NONE :-/ generate the users first! if not facility_user: (facility_user, _, _) = generate_fake_facility_users() for topic in topics: for user in facility_user: video_logs.append(generate_fake_video_logs(facility_user=user, topics=[topic], start_date=start_date)) # Actually generate! else: # First, make videos for the associated logs # Then make some unassociated videos, to simulate both exploration # and watching videos without finishing. # Get (or create) user type try: user_settings = json.loads(facility_user.notes) except: user_settings = sample_user_settings() facility_user.notes = json.dumps(user_settings) try: facility_user.save() except Exception as e: logging.error("Error saving facility user: %s" % e) date_diff_started = datetime.timedelta(seconds=datediff(date_diff, units="seconds") * user_settings["time_in_program"]) # when this user started in the program, relative to NOW for topic in topics: videos = get_topic_contents(topic_id=topic, kinds=["Video"]) exercises = get_topic_contents(topic_id=topic, kinds=["Exercise"]) exercise_ids = [ex["id"] if "id" in ex else ex['name'] for ex in exercises] exercise_logs = ExerciseLog.objects.filter(user=facility_user, id__in=exercise_ids) # Probability of watching a video, irrespective of the context p_video_outer = probability_of("video", user_settings=user_settings) logging.debug("# videos: %d; p(videos)=%4.3f, user settings: %s\n" % (len(videos), p_video_outer, json.dumps(user_settings))) for video in videos: p_completed = probability_of("completed", user_settings=user_settings) # If we're just doing random videos, fine. # If these videos relate to exercises, then suppress non-exercise-related videos # for this user. p_video = p_video_outer # start with the context-free value did_exercise = False if exercise_logs.count() > 0: # 5x less likely to watch a video if you haven't done the exercise, if "related_exercise" not in video: p_video /= 5 # suppress # 5x more likely to watch a video if they've done the exercise # 2x more likely to have finished it. else: exercise_log = ExerciseLog.objects.filter(user=facility_user, id=video["related_exercise"]["id"]) did_exercise = exercise_log.count() != 0 if did_exercise: p_video *= 5 p_completed *= 2 # Do the sampling if p_video < random.random(): continue # didn't watch it elif p_completed > random.random(): pct_completed = 100. else: # Slower students will use videos more. Effort also important. pct_completed = 100. * min(1., sqrt(random.random() * sqrt(user_settings["effort_level"] * user_settings["time_in_program"] / sqrt(user_settings["speed_of_learning"])))) # get the video duration on the video video_id = video.get("id", "") video_duration = video.get("duration", 0) # Compute quantities based on sample total_seconds_watched = int(video_duration * pct_completed / 100.) points = int(750 * pct_completed / 100.) # Choose a rate of videos, based on their effort level. # Compute the latest possible start time. # Then sample a start time between their start time # and the latest possible start_time if did_exercise: # More jitter if you learn fast, less jitter if you try harder (more diligent) date_jitter = datetime.timedelta(days=max(0, random.gauss(1, user_settings["speed_of_learning"] / user_settings["effort_level"]))) date_completed = exercise_log[0].completion_timestamp - date_jitter else: rate_of_videos = 0.66 * user_settings["effort_level"] + 0.33 * user_settings["speed_of_learning"] # exercises per day time_for_watching = total_seconds_watched time_delta_completed = datetime.timedelta(seconds=random.randint(int(time_for_watching), int(datediff(date_diff_started, units="seconds")))) date_completed = datetime.datetime.now() - time_delta_completed try: vlog = VideoLog.objects.get(user=facility_user, video_id=video_id) except VideoLog.DoesNotExist: logging.info("Creating video log: %-12s: %-45s (%4.1f%% watched, %d points)%s" % ( facility_user.first_name, video["title"], pct_completed, points, " COMPLETE on %s!" % date_completed if pct_completed == 100 else "", )) youtube_id = video.get("youtube_id", video_id) vlog = VideoLog( user=facility_user, video_id=video_id, youtube_id=youtube_id, total_seconds_watched=total_seconds_watched, points=points, complete=(pct_completed == 100.), completion_timestamp=date_completed, latest_activity_timestamp=date_completed, ) try: vlog.save() # avoid userlog issues except Exception as e: logging.error("Error saving video log: %s" % e) continue video_logs.append(vlog) return video_logs
def generate_fake_exercise_logs(facility_user=None, topics=topics, start_date=datetime.datetime.now() - datetime.timedelta(days=30 * 6)): """Add exercise logs for the given topics, for each of the given users. If no users are given, they are created. If no topics exist, they are taken from the list at the top of this file. By default, users start learning randomly between 6 months ago and now. """ date_diff = datetime.datetime.now() - start_date exercise_logs = [] user_logs = [] # It's not a user: probably a list. # Recursive case if not hasattr(facility_user, "username"): # It's NONE :-/ generate the users first! if not facility_user: (facility_user, _, _) = generate_fake_facility_users() for topic in topics: for user in facility_user: (elogs, ulogs) = generate_fake_exercise_logs(facility_user=user, topics=[topic], start_date=start_date) exercise_logs.append(elogs) user_logs.append(ulogs) # Actually generate! else: # Get (or create) user type try: user_settings = json.loads(facility_user.notes) except: user_settings = sample_user_settings() facility_user.notes = json.dumps(user_settings) facility_user.save() date_diff_started = datetime.timedelta(seconds=datediff(date_diff, units="seconds") * user_settings["time_in_program"]) # when this user started in the program, relative to NOW for topic in topics: # Get all exercises related to the topic exercises = get_topic_contents(topic_id=topic, kinds=["Exercise"]) # Problem: # Not realistic for students to have lots of unfinished exercises. # If they start them, they tend to get stuck, right? # # So, need to make it more probable that they will finish an exercise, # and less probable that they start one. # # What we need is P(streak|started), not P(streak) # Probability of doing any particular exercise p_exercise = probability_of(qty="exercise", user_settings=user_settings) logging.info("# exercises: %d; p(exercise)=%4.3f, user settings: %s\n" % (len(exercises), p_exercise, json.dumps(user_settings))) # of exercises is related to for j, exercise in enumerate(exercises): if random.random() > p_exercise: continue # Probability of completing this exercise, and .. proportion of attempts p_attempts = probability_of(qty="attempts", user_settings=user_settings) attempts = int(random.random() * p_attempts * 30 + 10) # always enough to have completed elog, created = ExerciseLog.objects.get_or_create(user=facility_user, exercise_id=exercise["id"]) alogs = [] for i in range(0, attempts): alog = AttemptLog.objects.create(user=facility_user, exercise_id=exercise["id"], timestamp=start_date + date_diff*i/attempts) alogs.append(alog) if random.random() < user_settings["speed_of_learning"]: alog.correct = True alog.points = 10 alog.save() elog.attempts = attempts elog.latest_activity_timestamp = start_date + date_diff elog.streak_progress = sum([log.correct for log in alogs][-10:])*10 elog.points = sum([log.points for log in alogs][-10:]) elog.save() exercise_logs.append(elog) ulog = UserLog( user=facility_user, activity_type=1, start_datetime = start_date, end_datetime = start_date + date_diff, last_active_datetime = start_date + date_diff, ) ulog.save() user_logs.append(ulog) return (exercise_logs, user_logs)
def generate_fake_exercise_logs(facility_user=None, topics=topics, start_date=datetime.datetime.now() - datetime.timedelta(days=30 * 6)): """Add exercise logs for the given topics, for each of the given users. If no users are given, they are created. If no topics exist, they are taken from the list at the top of this file. By default, users start learning randomly between 6 months ago and now. """ date_diff = datetime.datetime.now() - start_date exercise_logs = [] user_logs = [] # It's not a user: probably a list. # Recursive case if not hasattr(facility_user, "username"): # It's NONE :-/ generate the users first! if not facility_user: (facility_user, _, _) = generate_fake_facility_users() for topic in topics: for user in facility_user: (elogs, ulogs) = generate_fake_exercise_logs(facility_user=user, topics=[topic], start_date=start_date) exercise_logs.append(elogs) user_logs.append(ulogs) # Actually generate! else: # Get (or create) user type try: user_settings = json.loads(facility_user.notes) except: user_settings = sample_user_settings() facility_user.notes = json.dumps(user_settings) facility_user.save() date_diff_started = datetime.timedelta(seconds=datediff(date_diff, units="seconds") * user_settings["time_in_program"]) # when this user started in the program, relative to NOW for topic in topics: # Get all exercises related to the topic exercises = get_topic_contents(topic_id=topic, kinds=["Exercise"]) # Problem: # Not realistic for students to have lots of unfinished exercises. # If they start them, they tend to get stuck, right? # # So, need to make it more probable that they will finish an exercise, # and less probable that they start one. # # What we need is P(streak|started), not P(streak) # Probability of doing any particular exercise p_exercise = probability_of(qty="exercise", user_settings=user_settings) logging.info("# exercises: %d; p(exercise)=%4.3f, user settings: %s\n" % (len(exercises), p_exercise, json.dumps(user_settings))) # of exercises is related to for j, exercise in enumerate(exercises): if random.random() > p_exercise: continue # Probability of completing this exercise, and .. proportion of attempts p_attempts = probability_of(qty="attempts", user_settings=user_settings) attempts = int(random.random() * p_attempts * 30 + 10) # always enough to have completed elog, created = ExerciseLog.objects.get_or_create(user=facility_user, exercise_id=exercise["id"]) alogs = [] for i in range(0, attempts): alog = AttemptLog.objects.create(user=facility_user, exercise_id=exercise["id"], timestamp=start_date + date_diff*i/attempts) alogs.append(alog) if random.random() < user_settings["speed_of_learning"]: alog.correct = True alog.points = 10 alog.save() elog.attempts = attempts elog.latest_activity_timestamp = start_date + date_diff elog.streak_progress = sum([log.correct for log in alogs][-10:])*10 elog.points = sum([log.points for log in alogs][-10:]) elog.save() exercise_logs.append(elog) # Generate a user log regarding exercises done duration = random.randint(10 * 60, 120 * 60) # 10 - 120 minutes in seconds exercise_start = start_date + timedelta(seconds=random.randint(0, int(date_diff.total_seconds() - duration))) exercise_end = exercise_start + timedelta(seconds=duration) ulog = UserLog( user=facility_user, activity_type=UserLog.get_activity_int("login"), start_datetime=exercise_start, end_datetime=exercise_end, last_active_datetime=exercise_end, ) ulog.save() user_logs.append(ulog) return (exercise_logs, user_logs)
def get_playlist_entry_ids(cls, playlist): """Return a tuple of the playlist's video ids and exercise ids as sets""" items = get_topic_contents(topic_id=playlist.get("id")) pl_video_ids = set([item.get("id") for item in items if item.get("kind") == "Video"]) pl_exercise_ids = set([item.get("id") for item in items if item.get("kind") == "Exercise"]) return (pl_video_ids, pl_exercise_ids)