def handle(self, *args, **options): if settings.CENTRAL_SERVER: raise CommandError("Run this command on the distributed server only.") # Load videos video_sizes = softload_json(REMOTE_VIDEO_SIZE_FILEPATH, logger=logging.debug) # Query current files all_video_filepaths = glob.glob(os.path.join(settings.CONTENT_ROOT, "*.mp4")) logging.info("Querying sizes for %d video(s)." % len(all_video_filepaths)) # Get all current sizes for video_filepath in all_video_filepaths: youtube_id = os.path.splitext(os.path.basename(video_filepath))[0] # Set to max, so that local compressed videos will not affect things. video_sizes[youtube_id] = max(video_sizes.get(youtube_id, 0), os.path.getsize(video_filepath)) # Sort results video_sizes = OrderedDict([(key, video_sizes[key]) for key in sorted(video_sizes.keys())]) logging.info("Saving results to disk.") ensure_dir(os.path.dirname(REMOTE_VIDEO_SIZE_FILEPATH)) with open(REMOTE_VIDEO_SIZE_FILEPATH, "w") as fp: json.dump(video_sizes, fp, indent=2)
def compute_data(data_types, who, where): """ Compute the data in "data_types" for each user in "who", for the topics selected by "where" who: list of users where: topic_path data_types can include: pct_mastery effort attempts """ # None indicates that the data hasn't been queried yet. # We'll query it on demand, for efficiency topics = None exercises = None videos = None # Initialize an empty dictionary of data, video logs, exercise logs, for each user data = OrderedDict(zip([w.id for w in who], [dict() for i in range(len(who))])) # maintain the order of the users vid_logs = dict(zip([w.id for w in who], [[] for i in range(len(who))])) ex_logs = dict(zip([w.id for w in who], [[] for i in range(len(who))])) if UserLog.is_enabled(): activity_logs = dict(zip([w.id for w in who], [[] for i in range(len(who))])) # Set up queries (but don't run them), so we have really easy aliases. # Only do them if they haven't been done yet (tell this by passing in a value to the lambda function) # Topics: topics. # Exercises: names (ids for ExerciseLog objects) # Videos: video_id (ids for VideoLog objects) # This lambda partial creates a function to return all items with a particular path from the NODE_CACHE. search_fun_single_path = partial(lambda t, p: t["path"].startswith(p), p=tuple(where)) # This lambda partial creates a function to return all items with paths matching a list of paths from NODE_CACHE. search_fun_multi_path = partial(lambda ts, p: any([t["path"].startswith(p) for t in ts]), p=tuple(where)) # Functions that use the functions defined above to return topics, exercises, and videos based on paths. query_topics = partial(lambda t, sf: t if t is not None else [t[0]["id"] for t in filter(sf, get_node_cache('Topic').values())], sf=search_fun_single_path) query_exercises = partial(lambda e, sf: e if e is not None else [ex[0]["id"] for ex in filter(sf, get_node_cache('Exercise').values())], sf=search_fun_multi_path) query_videos = partial(lambda v, sf: v if v is not None else [vid[0]["id"] for vid in filter(sf, get_node_cache('Video').values())], sf=search_fun_multi_path) # No users, don't bother. if len(who) > 0: # Query out all exercises, videos, exercise logs, and video logs before looping to limit requests. # This means we could pull data for n-dimensional coach report displays with the same number of requests! # Note: User activity is polled inside the loop, to prevent possible slowdown for exercise and video reports. exercises = query_exercises(exercises) videos = query_videos(videos) if exercises: ex_logs = query_logs(data.keys(), exercises, "exercise", ex_logs) if videos: vid_logs = query_logs(data.keys(), videos, "video", vid_logs) for data_type in (data_types if not hasattr(data_types, "lower") else [data_types]): # convert list from string, if necessary if data_type in data[data.keys()[0]]: # if the first user has it, then all do; no need to calc again. continue # # These are summary stats: you only get one per user # if data_type == "pct_mastery": # Efficient query out, spread out to dict for user in data.keys(): data[user][data_type] = 0 if not ex_logs[user] else 100. * sum([el['complete'] for el in ex_logs[user]]) / float(len(exercises)) elif data_type == "effort": if "ex:attempts" in data[data.keys()[0]] and "vid:total_seconds_watched" in data[data.keys()[0]]: # exercises and videos would be initialized already for user in data.keys(): avg_attempts = 0 if len(exercises) == 0 else sum(data[user]["ex:attempts"].values()) / float(len(exercises)) avg_video_points = 0 if len(videos) == 0 else sum(data[user]["vid:total_seconds_watched"].values()) / float(len(videos)) data[user][data_type] = 100. * (0.5 * avg_attempts / 10. + 0.5 * avg_video_points / 750.) else: data_types += ["ex:attempts", "vid:total_seconds_watched", "effort"] # # These are detail stats: you get many per user # # Just querying out data directly: Video elif data_type.startswith("vid:") and data_type[4:] in [f.name for f in VideoLog._meta.fields]: for user in data.keys(): data[user][data_type] = OrderedDict([(v['video_id'], v[data_type[4:]]) for v in vid_logs[user]]) # Just querying out data directly: Exercise elif data_type.startswith("ex:") and data_type[3:] in [f.name for f in ExerciseLog._meta.fields]: for user in data.keys(): data[user][data_type] = OrderedDict([(el['exercise_id'], el[data_type[3:]]) for el in ex_logs[user]]) # User Log Queries elif data_type.startswith("user:"******"", "activity", activity_logs) for user in data.keys(): data[user][data_type] = [log[data_type[5:]] for log in activity_logs[user]] # User Summary Queries elif data_type.startswith("usersum:") and data_type[8:] in [f.name for f in UserLogSummary._meta.fields] and UserLog.is_enabled(): activity_logs = query_logs(data.keys(), "", "summaryactivity", activity_logs) for user in data.keys(): data[user][data_type] = sum([log[data_type[8:]] for log in activity_logs[user]]) # Unknown requested quantity else: raise Exception("Unknown type: '%s' not in %s" % (data_type, str([f.name for f in ExerciseLog._meta.fields]))) # Returning empty list instead of None allows javascript on client # side to read 'length' property without error. exercises = exercises or [] videos = videos or [] return { "data": data, "topics": topics, "exercises": exercises, "videos": videos, }