def _get_installed_language_packs(): """ On-disk method to show currently installed languages and meta data. """ # There's always English... installed_language_packs = [{ 'code': 'en', 'software_version': SHORTVERSION, 'language_pack_version': 0, 'percent_translated': 100, 'subtitle_count': 0, 'name': 'English', 'native_name': 'English', }] # Loop through locale folders for locale_dir in settings.LOCALE_PATHS: if not os.path.exists(locale_dir): continue # Loop through folders in each locale dir # This is idiotic, it just assumes that every directory / file is # a valid language code for django_disk_code in os.listdir(locale_dir): # Skip if it's a file if not os.path.isdir(os.path.join(locale_dir, django_disk_code)): continue # Inside each folder, read from the JSON file - language name, % UI trans, version number try: # Get the metadata metadata_filepath = os.path.join( locale_dir, django_disk_code, "%s_metadata.json" % lcode_to_ietf(django_disk_code)) lang_meta = softload_json(metadata_filepath, raises=True) logging.debug("Found language pack %s" % (django_disk_code)) except IOError as e: if e.errno == errno.ENOENT: logging.info("Ignoring non-language pack %s in %s" % (django_disk_code, locale_dir)) else: logging.error("Error reading %s metadata (%s): %s" % (django_disk_code, metadata_filepath, e)) continue installed_language_packs.append(lang_meta) sorted_list = sorted(installed_language_packs, key=lambda m: m['name'].lower()) return OrderedDict([(lcode_to_ietf(val["code"]), val) for val in sorted_list])
def handle(self, *args, **options): if settings.CENTRAL_SERVER: raise CommandError("Run this command on the distributed server only.") # Load videos video_sizes = softload_json(REMOTE_VIDEO_SIZE_FILEPATH, logger=logging.debug) # Query current files all_video_filepaths = glob.glob(os.path.join(settings.CONTENT_ROOT, "*.mp4")) logging.info("Querying sizes for %d video(s)." % len(all_video_filepaths)) # Get all current sizes for video_filepath in all_video_filepaths: youtube_id = os.path.splitext(os.path.basename(video_filepath))[0] # Set to max, so that local compressed videos will not affect things. video_sizes[youtube_id] = max(video_sizes.get(youtube_id, 0), os.path.getsize(video_filepath)) # Sort results video_sizes = OrderedDict([(key, video_sizes[key]) for key in sorted(video_sizes.keys())]) logging.info("Saving results to disk.") ensure_dir(os.path.dirname(REMOTE_VIDEO_SIZE_FILEPATH)) with open(REMOTE_VIDEO_SIZE_FILEPATH, "w") as fp: json.dump(video_sizes, fp, indent=2)
def get_organizations(self): """ Return all organizations that this user manages. If this user is a super-user, then the headless org will be appended at the end. """ orgs = OrderedDict() # no dictionary comprehensions, so have to loop for org in self.user.organization_set.all().order_by( "name"): # add in order queries (alphabetical?) orgs[org.pk] = org # Add a headless organization for superusers, containing # any headless zones. # Make sure this is at the END of the list, so it is clearly special. if self.user.is_superuser: headless_org = Organization.get_or_create_headless_organization( refresh_zones=True) orgs[headless_org.pk] = headless_org return orgs
def _get_user_usage_data(users, groups=None, period_start=None, period_end=None, group_id=None): """ Returns facility user data, within the given date range. """ groups = groups or set([user.group for user in users]) # compute period start and end # Now compute stats, based on queried data num_exercises = len(get_node_cache('Exercise')) user_data = OrderedDict() group_data = OrderedDict() # Make queries efficiently exercise_logs = ExerciseLog.objects.filter(user__in=users, complete=True) video_logs = VideoLog.objects.filter(user__in=users) login_logs = UserLogSummary.objects.filter(user__in=users) # filter results if period_start: exercise_logs = exercise_logs.filter( completion_timestamp__gte=period_start) video_logs = video_logs.filter(completion_timestamp__gte=period_start) login_logs = login_logs.filter(start_datetime__gte=period_start) if period_end: exercise_logs = exercise_logs.filter( completion_timestamp__lte=period_end) video_logs = video_logs.filter(completion_timestamp__lte=period_end) login_logs = login_logs.filter(end_datetime__lte=period_end) # Force results in a single query exercise_logs = list(exercise_logs.values("exercise_id", "user__pk")) video_logs = list(video_logs.values("video_id", "user__pk")) login_logs = list( login_logs.values("activity_type", "total_seconds", "user__pk")) for user in users: user_data[user.pk] = OrderedDict() user_data[user.pk]["id"] = user.pk user_data[user.pk]["first_name"] = user.first_name user_data[user.pk]["last_name"] = user.last_name user_data[user.pk]["username"] = user.username user_data[user.pk]["group"] = user.group user_data[user.pk][ "total_report_views"] = 0 #report_stats["count__sum"] or 0 user_data[ user.pk]["total_logins"] = 0 # login_stats["count__sum"] or 0 user_data[user.pk][ "total_hours"] = 0 #login_stats["total_seconds__sum"] or 0)/3600. user_data[user.pk]["total_exercises"] = 0 user_data[user.pk]["pct_mastery"] = 0. user_data[user.pk]["exercises_mastered"] = [] user_data[user.pk]["total_videos"] = 0 user_data[user.pk]["videos_watched"] = [] for elog in exercise_logs: user_data[elog["user__pk"]]["total_exercises"] += 1 user_data[elog["user__pk"]]["pct_mastery"] += 1. / num_exercises user_data[elog["user__pk"]]["exercises_mastered"].append( elog["exercise_id"]) for vlog in video_logs: user_data[vlog["user__pk"]]["total_videos"] += 1 user_data[vlog["user__pk"]]["videos_watched"].append(vlog["video_id"]) for llog in login_logs: if llog["activity_type"] == UserLog.get_activity_int("coachreport"): user_data[llog["user__pk"]]["total_report_views"] += 1 elif llog["activity_type"] == UserLog.get_activity_int("login"): user_data[llog["user__pk"]]["total_hours"] += ( llog["total_seconds"]) / 3600. user_data[llog["user__pk"]]["total_logins"] += 1 for group in list(groups) + [None] * (group_id == None or _(group_id) == _( "Ungrouped")): # None for ungrouped, if no group_id passed. group_pk = getattr(group, "pk", None) group_name = getattr(group, "name", _("Ungrouped")) group_data[group_pk] = { "id": group_pk, "name": group_name, "total_logins": 0, "total_hours": 0, "total_users": 0, "total_videos": 0, "total_exercises": 0, "pct_mastery": 0, } # Add group data. Allow a fake group "Ungrouped" for user in users: group_pk = getattr(user.group, "pk", None) group_data[group_pk]["total_users"] += 1 group_data[group_pk]["total_logins"] += user_data[ user.pk]["total_logins"] group_data[group_pk]["total_hours"] += user_data[ user.pk]["total_hours"] group_data[group_pk]["total_videos"] += user_data[ user.pk]["total_videos"] group_data[group_pk]["total_exercises"] += user_data[ user.pk]["total_exercises"] total_mastery_so_far = (group_data[group_pk]["pct_mastery"] * (group_data[group_pk]["total_users"] - 1) + user_data[user.pk]["pct_mastery"]) group_data[group_pk][ "pct_mastery"] = total_mastery_so_far / group_data[group_pk][ "total_users"] if len(group_data) == 1 and group_data.has_key(None): if not group_data[None]["total_users"]: del group_data[None] return (user_data, group_data)
def show_deployment_cms(request): """ This does 3 queries: * Facilities, organized by organization. * Devices, organized by organization. * Organizations, organized by organization. It then combines results from these 3 queries to create a list of: * All Users that have facilities, have devices but no facilities, and have no devices. """ # Query 1: Organizations deployment_data = OrderedDict([(org["id"], { "org_name": org["name"], "owner": org["owner__username"], "total_users": 0, "sync_sessions": 0, "models_synced": 0, }) for org in list( Organization.objects.values("id", "name", "owner__username"))]) # Query 2: Organizations with users for org in list( Organization.objects.values("id", "users__username", "users__first_name", "users__last_name")): org_id = org["id"] deployment_data[org_id]["users"] = deployment_data[org_id].get( "users", {}) deployment_data[org_id]["users"][org["users__username"]] = { "first_name": org["users__first_name"], "last_name": org["users__last_name"], "email": org["users__username"], } # Query 3: Organizations with devices device_data = DeviceZone.objects \ .annotate( \ n_sessions=Count("device__client_sessions"), \ n_models=Sum("device__client_sessions__models_uploaded")) \ .values("n_sessions", "n_models", "device__id", "device__name", "zone__id", "zone__name", "zone__organization__id") \ .order_by("zone__name", "-n_models", "-n_sessions") for devzone in list(device_data): org_id = devzone["zone__organization__id"] if not org_id: continue deployment_data[org_id]["devices"] = deployment_data[org_id].get( "devices", {}) deployment_data[org_id]["devices"][devzone["device__id"]] = { "id": devzone["device__id"], "name": devzone["device__name"], "zone_name": devzone["zone__name"], "zone_id": devzone["zone__id"], "models_synced": devzone["n_models"], "sync_sessions": devzone["n_sessions"], } deployment_data[org_id]["models_synced"] += devzone["n_models"] or 0 deployment_data[org_id]["sync_sessions"] += devzone["n_sessions"] or 0 # Query 4: Organizations with facilities facilities_by_org = list(Facility.objects \ .filter(signed_by__devicemetadata__is_demo_device=False) \ .annotate( \ n_actual_users=Count("facilityuser")) \ .values( \ "n_actual_users", \ "name", "address", \ "latitude", "longitude", \ "contact_email", "contact_name", \ "user_count", \ "zone_fallback__organization__id", \ "signed_by__devicezone__zone__organization__id",) \ .order_by("-n_actual_users")) for fac in list(facilities_by_org): org_id = fac["signed_by__devicezone__zone__organization__id"] or fac[ "zone_fallback__organization__id"] deployment_data[org_id]["facilities"] = deployment_data[org_id].get( "facilities", {}) deployment_data[org_id]["facilities"][fac["name"]] = fac deployment_data[org_id]["total_users"] += fac["n_actual_users"] or 0 # Combine all data into a single data store. sort_fn = lambda dep: (dep["total_users"], dep["models_synced"], dep[ "sync_sessions"]) paged_data, page_urls = paginate_data( request, sorted(deployment_data.values(), key=sort_fn, reverse=True), page=int(request.GET.get("cur_page", 1)), per_page=int(request.GET.get("per_page", 25))) return { "pages": paged_data, "page_urls": page_urls, "title": _("Deployments CMS"), }
def zone_management(request, zone_id="None"): context = control_panel_context(request, zone_id=zone_id) own_device = Device.get_own_device() if not context["zone"] and (zone_id != "None" or own_device.get_zone() or settings.CENTRAL_SERVER): raise Http404() # on distributed server, we can make due if they're not registered. # Denote the zone as headless or not if context["zone"]: is_headless_zone = re.search(r"Zone for public key ", context["zone"].name) else: is_headless_zone = False # Accumulate device data device_data = OrderedDict() if context["zone"]: devices = Device.objects.filter(devicezone__zone=context["zone"]) else: devices = Device.objects.filter(devicemetadata__is_own_device=True) for device in list(devices.order_by("devicemetadata__is_demo_device", "name")): user_activity = UserLogSummary.objects.filter(device=device) sync_sessions = SyncSession.objects.filter(client_device=device) if ( not settings.CENTRAL_SERVER and device.id != own_device.id ): # Non-local sync sessions unavailable on distributed server sync_sessions = None exercise_activity = ExerciseLog.objects.filter(signed_by=device) device_data[device.id] = { "name": device.name or device.id, "num_times_synced": sync_sessions.count() if sync_sessions is not None else None, "last_time_synced": sync_sessions.aggregate(Max("timestamp"))["timestamp__max"] if sync_sessions is not None else None, "is_demo_device": device.get_metadata().is_demo_device, "is_own_device": device.get_metadata().is_own_device and not settings.CENTRAL_SERVER, "last_time_used": exercise_activity.order_by("-completion_timestamp")[0:1] if user_activity.count() == 0 else user_activity.order_by("-last_activity_datetime", "-end_datetime")[0], "counter": device.get_counter_position(), "is_registered": device.is_registered(), } # Accumulate facility data facility_data = OrderedDict() if context["zone"]: facilities = Facility.objects.by_zone(context["zone"]) else: facilities = Facility.objects.all() for facility in list(facilities.order_by("name")): user_activity = UserLogSummary.objects.filter(user__facility=facility) exercise_activity = ExerciseLog.objects.filter(user__facility=facility) facility_data[facility.id] = { "name": facility.name, "num_users": FacilityUser.objects.filter(facility=facility).count(), "num_groups": FacilityGroup.objects.filter(facility=facility).count(), "id": facility.id, "meta_data_in_need": check_meta_data(facility), "last_time_used": exercise_activity.order_by("-completion_timestamp")[0:1] if user_activity.count() == 0 else user_activity.order_by("-last_activity_datetime", "-end_datetime")[0], } context.update( { "is_headless_zone": is_headless_zone, "facilities": facility_data, "missing_meta": any([facility["meta_data_in_need"] for facility in facility_data.values()]), "devices": device_data, "upload_form": UploadFileForm(), "own_device_is_trusted": Device.get_own_device().get_metadata().is_trusted, } ) if not settings.CENTRAL_SERVER: context["base_template"] = "distributed/base_manage.html" return context
def compute_data(data_types, who, where): """ Compute the data in "data_types" for each user in "who", for the topics selected by "where" who: list of users where: topic_path data_types can include: pct_mastery effort attempts """ # None indicates that the data hasn't been queried yet. # We'll query it on demand, for efficiency topics = None exercises = None videos = None # Initialize an empty dictionary of data, video logs, exercise logs, for each user data = OrderedDict(zip([w.id for w in who], [dict() for i in range(len(who))])) # maintain the order of the users vid_logs = dict(zip([w.id for w in who], [[] for i in range(len(who))])) ex_logs = dict(zip([w.id for w in who], [[] for i in range(len(who))])) if UserLog.is_enabled(): activity_logs = dict(zip([w.id for w in who], [[] for i in range(len(who))])) # Set up queries (but don't run them), so we have really easy aliases. # Only do them if they haven't been done yet (tell this by passing in a value to the lambda function) # Topics: topics. # Exercises: names (ids for ExerciseLog objects) # Videos: video_id (ids for VideoLog objects) # This lambda partial creates a function to return all items with a particular path from the NODE_CACHE. search_fun_single_path = partial(lambda t, p: t["path"].startswith(p), p=tuple(where)) # This lambda partial creates a function to return all items with paths matching a list of paths from NODE_CACHE. search_fun_multi_path = partial(lambda ts, p: any([t["path"].startswith(p) for t in ts]), p=tuple(where)) # Functions that use the functions defined above to return topics, exercises, and videos based on paths. query_topics = partial(lambda t, sf: t if t is not None else [t[0]["id"] for t in filter(sf, get_node_cache('Topic').values())], sf=search_fun_single_path) query_exercises = partial(lambda e, sf: e if e is not None else [ex[0]["id"] for ex in filter(sf, get_node_cache('Exercise').values())], sf=search_fun_multi_path) query_videos = partial(lambda v, sf: v if v is not None else [vid[0]["id"] for vid in filter(sf, get_node_cache('Video').values())], sf=search_fun_multi_path) # No users, don't bother. if len(who) > 0: # Query out all exercises, videos, exercise logs, and video logs before looping to limit requests. # This means we could pull data for n-dimensional coach report displays with the same number of requests! # Note: User activity is polled inside the loop, to prevent possible slowdown for exercise and video reports. exercises = query_exercises(exercises) videos = query_videos(videos) if exercises: ex_logs = query_logs(data.keys(), exercises, "exercise", ex_logs) if videos: vid_logs = query_logs(data.keys(), videos, "video", vid_logs) for data_type in (data_types if not hasattr(data_types, "lower") else [data_types]): # convert list from string, if necessary if data_type in data[data.keys()[0]]: # if the first user has it, then all do; no need to calc again. continue # # These are summary stats: you only get one per user # if data_type == "pct_mastery": # Efficient query out, spread out to dict for user in data.keys(): data[user][data_type] = 0 if not ex_logs[user] else 100. * sum([el['complete'] for el in ex_logs[user]]) / float(len(exercises)) elif data_type == "effort": if "ex:attempts" in data[data.keys()[0]] and "vid:total_seconds_watched" in data[data.keys()[0]]: # exercises and videos would be initialized already for user in data.keys(): avg_attempts = 0 if len(exercises) == 0 else sum(data[user]["ex:attempts"].values()) / float(len(exercises)) avg_video_points = 0 if len(videos) == 0 else sum(data[user]["vid:total_seconds_watched"].values()) / float(len(videos)) data[user][data_type] = 100. * (0.5 * avg_attempts / 10. + 0.5 * avg_video_points / 750.) else: data_types += ["ex:attempts", "vid:total_seconds_watched", "effort"] # # These are detail stats: you get many per user # # Just querying out data directly: Video elif data_type.startswith("vid:") and data_type[4:] in [f.name for f in VideoLog._meta.fields]: for user in data.keys(): data[user][data_type] = OrderedDict([(v['video_id'], v[data_type[4:]]) for v in vid_logs[user]]) # Just querying out data directly: Exercise elif data_type.startswith("ex:") and data_type[3:] in [f.name for f in ExerciseLog._meta.fields]: for user in data.keys(): data[user][data_type] = OrderedDict([(el['exercise_id'], el[data_type[3:]]) for el in ex_logs[user]]) # User Log Queries elif data_type.startswith("user:"******"", "activity", activity_logs) for user in data.keys(): data[user][data_type] = [log[data_type[5:]] for log in activity_logs[user]] # User Summary Queries elif data_type.startswith("usersum:") and data_type[8:] in [f.name for f in UserLogSummary._meta.fields] and UserLog.is_enabled(): activity_logs = query_logs(data.keys(), "", "summaryactivity", activity_logs) for user in data.keys(): data[user][data_type] = sum([log[data_type[8:]] for log in activity_logs[user]]) # Unknown requested quantity else: raise Exception("Unknown type: '%s' not in %s" % (data_type, str([f.name for f in ExerciseLog._meta.fields]))) # Returning empty list instead of None allows javascript on client # side to read 'length' property without error. exercises = exercises or [] videos = videos or [] return { "data": data, "topics": topics, "exercises": exercises, "videos": videos, }
def _get_user_usage_data(users, groups=None, period_start=None, period_end=None, group_id=None): """ Returns facility user data, within the given date range. """ groups = groups or set([user.group for user in users]) # compute period start and end # Now compute stats, based on queried data num_exercises = len(get_exercise_cache()) user_data = OrderedDict() group_data = OrderedDict() # Make queries efficiently exercise_logs = ExerciseLog.objects.filter(user__in=users, complete=True) video_logs = VideoLog.objects.filter(user__in=users, total_seconds_watched__gt=0) login_logs = UserLogSummary.objects.filter(user__in=users) # filter results login_logs = login_logs.filter(total_seconds__gt=0) if period_start: exercise_logs = exercise_logs.filter( completion_timestamp__gte=period_start) video_logs = video_logs.filter(completion_timestamp__gte=period_start) if period_end: # MUST: Fix the midnight bug where period end covers up to the prior day only because # period end is datetime(year, month, day, hour=0, minute=0), meaning midnight of previous day. # Example: # If period_end == '2014-12-01', we cannot include the records dated '2014-12-01 09:30'. # So to fix this, we change it to '2014-12-01 23:59.999999'. period_end = dateutil.parser.parse(period_end) period_end = period_end + dateutil.relativedelta.relativedelta( days=+1, microseconds=-1) exercise_logs = exercise_logs.filter( completion_timestamp__lte=period_end) video_logs = video_logs.filter(completion_timestamp__lte=period_end) if period_start and period_end: exercise_logs = exercise_logs.filter( Q(completion_timestamp__gte=period_start) & Q(completion_timestamp__lte=period_end)) q1 = Q(completion_timestamp__isnull=False) & \ Q(completion_timestamp__gte=period_start) & \ Q(completion_timestamp__lte=period_end) video_logs = video_logs.filter(q1) login_q1 = Q(start_datetime__gte=period_start) & Q(start_datetime__lte=period_end) & \ Q(end_datetime__gte=period_start) & Q(end_datetime__lte=period_end) login_logs = login_logs.filter(login_q1) # Force results in a single query exercise_logs = list(exercise_logs.values("exercise_id", "user__pk")) video_logs = list(video_logs.values("video_id", "user__pk")) login_logs = list( login_logs.values("activity_type", "total_seconds", "user__pk")) for user in users: user_data[user.pk] = OrderedDict() user_data[user.pk]["id"] = user.pk user_data[user.pk]["first_name"] = user.first_name user_data[user.pk]["last_name"] = user.last_name user_data[user.pk]["username"] = user.username user_data[user.pk]["group"] = user.group user_data[user.pk][ "total_report_views"] = 0 #report_stats["count__sum"] or 0 user_data[ user.pk]["total_logins"] = 0 # login_stats["count__sum"] or 0 user_data[user.pk][ "total_hours"] = 0 #login_stats["total_seconds__sum"] or 0)/3600. user_data[user.pk]["total_exercises"] = 0 user_data[user.pk]["pct_mastery"] = 0. user_data[user.pk]["exercises_mastered"] = [] user_data[user.pk]["total_videos"] = 0 user_data[user.pk]["videos_watched"] = [] for elog in exercise_logs: user_data[elog["user__pk"]]["total_exercises"] += 1 user_data[elog["user__pk"]]["pct_mastery"] += 1. / num_exercises user_data[elog["user__pk"]]["exercises_mastered"].append( elog["exercise_id"]) for vlog in video_logs: user_data[vlog["user__pk"]]["total_videos"] += 1 user_data[vlog["user__pk"]]["videos_watched"].append(vlog["video_id"]) for llog in login_logs: if llog["activity_type"] == UserLog.get_activity_int("coachreport"): user_data[llog["user__pk"]]["total_report_views"] += 1 elif llog["activity_type"] == UserLog.get_activity_int("login"): user_data[llog["user__pk"]]["total_hours"] += ( llog["total_seconds"]) / 3600. user_data[llog["user__pk"]]["total_logins"] += 1 for group in list(groups) + [None] * ( group_id == None or group_id == UNGROUPED): # None for ungrouped, if no group_id passed. group_pk = getattr(group, "pk", None) group_name = getattr(group, "name", _(UNGROUPED)) group_title = getattr(group, "title", _(UNGROUPED)) group_data[group_pk] = { "id": group_pk, "name": group_name, "title": group_title, "total_logins": 0, "total_hours": 0, "total_users": 0, "total_videos": 0, "total_exercises": 0, "pct_mastery": 0, } # Add group data. Allow a fake group UNGROUPED for user in users: group_pk = getattr(user.group, "pk", None) if group_pk not in group_data: logging.error("User %s still in nonexistent group %s!" % (user.id, group_pk)) continue group_data[group_pk]["total_users"] += 1 group_data[group_pk]["total_logins"] += user_data[ user.pk]["total_logins"] group_data[group_pk]["total_hours"] += user_data[ user.pk]["total_hours"] group_data[group_pk]["total_videos"] += user_data[ user.pk]["total_videos"] group_data[group_pk]["total_exercises"] += user_data[ user.pk]["total_exercises"] total_mastery_so_far = (group_data[group_pk]["pct_mastery"] * (group_data[group_pk]["total_users"] - 1) + user_data[user.pk]["pct_mastery"]) group_data[group_pk][ "pct_mastery"] = total_mastery_so_far / group_data[group_pk][ "total_users"] if len(group_data) == 1 and group_data.has_key(None): if not group_data[None]["total_users"]: del group_data[None] return (user_data, group_data)
def retrieve_API_data(channel=None): # TODO(jamalex): See how much of what we do here can be replaced by KA's new projection-based API # http://www.khanacademy.org/api/v2/topics/topictree?projection={"topics":[{"slug":1,"childData":[{"id":1}]}]} khan = Khan() logging.info("Fetching Khan topic tree") topic_tree = khan.get_topic_tree() logging.info("Fetching Khan exercises") exercises = khan.get_exercises() exercises_dummy = khan.get_exercises() logging.info("Fetching Khan videos") content = khan.get_videos() # Hack to hardcode the mp4 format flag on Videos. for con in content: con["format"] = "mp4" # Compute and save file sizes logging.info("Checking remote content file sizes...") try: with open(REMOTE_VIDEO_SIZE_FILEPATH, "r") as fp: old_sizes = json.load(fp) except: old_sizes = {} blacklist = [key for key, val in old_sizes.items() if val > 0] # exclude any we already know about sizes_by_id, sizes = query_remote_content_file_sizes(content, blacklist=blacklist) ensure_dir(os.path.dirname(REMOTE_VIDEO_SIZE_FILEPATH)) old_sizes.update(sizes_by_id) sizes = OrderedDict(sorted(old_sizes.items())) with open(REMOTE_VIDEO_SIZE_FILEPATH, "w") as fp: json.dump(sizes, fp, indent=2) logging.info("Finished checking remote content file sizes...") assessment_items = [] # Limit number of simultaneous requests semaphore = threading.BoundedSemaphore(100) def fetch_assessment_data(exercise): logging.info("Fetching Assessment Item Data for {exercise}".format( exercise=exercise.display_name)) for assessment_item in exercise.all_assessment_items: counter = 0 wait = 5 while wait: try: semaphore.acquire() logging.info( "Fetching assessment item {assessment}".format( assessment=assessment_item["id"])) assessment_data = khan.get_assessment_item( assessment_item["id"]) semaphore.release() if assessment_data.get("item_data"): wait = 0 assessment_items.append(assessment_data) else: logging.info( "Fetching assessment item {assessment} failed retrying in {wait}" .format(assessment=assessment_item["id"], wait=wait)) time.sleep(wait) wait = wait * 2 counter += 1 except (requests.ConnectionError, requests.Timeout): semaphore.release() time.sleep(wait) wait = wait * 2 counter += 1 if counter > 5: logging.info( "Fetching assessment item {assessment} failed more than 5 times, aborting" .format(assessment=assessment_item["id"])) break threads = [ threading.Thread(target=fetch_assessment_data, args=(exercise, )) for exercise in exercises_dummy ] for thread in threads: thread.start() for thread in threads: thread.join() return topic_tree, exercises, assessment_items, content
def test_detail_view(request, test_id): """View details of student performance on specific exams""" facility, group_id, context = coach_nav_context(request, "test") # get users in this facility and group users = get_user_queryset(request, facility, group_id) # Get test object test_resource = TestResource() test_obj = test_resource._read_test(test_id=test_id) # get all of the test logs for this specific test object and generated by these specific users if group_id: test_logs = TestLog.objects.filter(user__group=group_id, test=test_id) # Narrow all by ungroup facility user if group_id == control_panel_api_resources.UNGROUPED_KEY: if facility: test_logs = TestLog.objects.filter(user__group__isnull=True) else: test_logs = TestLog.objects.filter(facility=facility, user__group__isnull=True) else: # covers the all groups case test_logs = TestLog.objects.filter(user__facility=facility, test=test_id) results_table, scores_dict = OrderedDict(), OrderedDict() # build this up now to use in summary stats section ex_ids = set(literal_eval(test_obj.ids)) for ex in ex_ids: scores_dict[ex] = [] for s in users: s.name = s.get_name() user_attempts = AttemptLog.objects.filter(user=s, context_type='test', context_id=test_id) results_table[s] = [] attempts_count_total, attempts_count_correct_total = 0, 0 for ex in ex_ids: attempts = [ attempt for attempt in user_attempts if attempt.exercise_id == ex ] attempts_count = len(attempts) attempts_count_correct = len( [attempt for attempt in attempts if attempt.correct]) attempts_count_total += attempts_count attempts_count_correct_total += attempts_count_correct if attempts_count: score = round( 100 * float(attempts_count_correct) / float(attempts_count), 1) scores_dict[ex].append(score) display_score = "%d%%" % score else: score = '' display_score = '' results_table[s].append({ 'display_score': display_score, 'raw_score': score, }) # Calc overall score if attempts_count_total: score = round( 100 * float(attempts_count_correct_total) / float(attempts_count_total), 1) display_score = "%d%%" % score fraction_correct = "(%(correct)d/%(attempts)d)" % ( { 'correct': attempts_count_correct_total, 'attempts': attempts_count_total }) else: score = '' display_score = '' fraction_correct = '' results_table[s].append({ 'display_score': display_score, 'raw_score': score, 'title': fraction_correct, }) # This retrieves stats for individual exercises stats_dict = OrderedDict() for stat in SUMMARY_STATS: stats_dict[stat] = [] for ex in ex_ids: scores_list = scores_dict[ex] if scores_list: stats_dict[stat].append("%d%%" % return_list_stat(scores_list, stat)) else: stats_dict[stat].append('') # replace the exercise ids with their full names exercises = get_exercise_cache() ex_titles = [] for ex in ex_ids: ex_titles.append(exercises[ex]['title']) # provide a list of test options to view for this group/facility combo if group_id: test_logs = TestLog.objects.filter(user__group=group_id) else: # covers the all/no groups case test_logs = TestLog.objects.filter(user__facility=facility) test_objects = test_resource._read_tests() unique_test_ids = set([test_log.test for test_log in test_logs]) test_options = [{ 'id': obj.test_id, 'url': reverse('test_detail_view', kwargs={'test_id': obj.test_id}), 'title': obj.title } for obj in test_objects if obj.test_id in unique_test_ids] context = plotting_metadata_context(request, facility=facility) context.update({ "test_obj": test_obj, "ex_cols": ex_titles, "results_table": results_table, "stats_dict": stats_dict, "test_options": test_options, }) return context
def test_view(request): """Test view gets data server-side and displays exam results""" facility, group_id, context = coach_nav_context(request, "test") # Get students users = get_user_queryset(request, facility, group_id) # Get the TestLog objects generated by this group of students # TODO(cpauya): what about queryset for ungrouped students? test_logs = None if group_id: test_logs = TestLog.objects.filter(user__group=group_id) # Narrow all by ungroup facility user if group_id == control_panel_api_resources.UNGROUPED_KEY: test_logs = TestLog.objects.filter(user__group__isnull=True) if facility: TestLog.objects.filter(user__facility=facility, user__group__isnull=True) else: TestLog.objects.filter(user__group__isnull=True) elif facility: test_logs = TestLog.objects.filter(user__facility=facility) else: # filter by all facilities and groups for the user (groups, facilities, ungrouped_available) = get_accessible_objects_from_logged_in_user( request, facility=facility) if facilities: facility_ids = facilities.values_list("id", flat=True) test_logs = TestLog.objects.filter( user__facility__id__in=facility_ids) # Get list of all test objects test_resource = TestResource() tests_list = test_resource._read_tests() # Get completed test objects (used as columns) completed_test_ids = set([item.test for item in test_logs]) test_objects = [ test for test in tests_list if test.test_id in completed_test_ids ] # Create the table results_table = OrderedDict() for s in users: s.name = s.get_name() user_test_logs = [log for log in test_logs if log.user == s] results_table[s] = [] for t in test_objects: log_object = next( (log for log in user_test_logs if log.test == t.test_id), '') # The template expects a status and a score to display if log_object: test_object = log_object.get_test_object() score = round( 100 * float(log_object.total_correct) / float(test_object.total_questions), 1) display_score = "%(score)d%% (%(correct)d/%(total_questions)d)" % { 'score': score, 'correct': log_object.total_correct, 'total_questions': test_object.total_questions } if log_object.complete: # Case: completed => we show % score if score >= 80: status = _("pass") elif score >= 60: status = _("borderline") else: status = _("fail") results_table[s].append({ "status": status, "cell_display": display_score, "title": status.title(), }) else: # Case: has started, but has not finished => we display % score & # remaining in title n_remaining = test_object.total_questions - log_object.index status = _("incomplete") results_table[s].append({ "status": status, "cell_display": display_score, "title": status.title() + ": " + ungettext("%(n_remaining)d problem remaining", n_remaining) % { 'n_remaining': n_remaining }, }) else: # Case: has not started status = _("not started") results_table[s].append({ "status": status, "cell_display": "", "title": status.title(), }) # This retrieves stats for students score_list = [ round( 100 * float(result.total_correct) / float(result.get_test_object().total_questions), 1) for result in user_test_logs ] for stat in SUMMARY_STATS: if score_list: results_table[s].append({ "status": "statistic", "cell_display": "%d%%" % return_list_stat(score_list, stat), }) else: results_table[s].append({ "status": "statistic", "cell_display": "", }) # This retrieves stats for tests stats_dict = OrderedDict() for stat in SUMMARY_STATS: stats_dict[stat] = [] for test_obj in test_objects: # get the logs for this test across all users and then add summary stats log_scores = [ round( 100 * float(test_log.total_correct) / float(test_log.get_test_object().total_questions), 1) for test_log in test_logs if test_log.test == test_obj.test_id ] stats_dict[stat].append("%d%%" % return_list_stat(log_scores, stat)) context.update(plotting_metadata_context(request, facility=facility)) context.update({ "results_table": results_table, "test_columns": test_objects, "summary_stats": SUMMARY_STATS, "stats_dict": stats_dict, }) return context
def _get_user_usage_data(users, groups=None, period_start=None, period_end=None, group_id=None): """ Returns facility user data, within the given date range. """ groups = groups or set([user.group for user in users]) # compute period start and end # Now compute stats, based on queried data num_exercises = len(get_node_cache('Exercise')) user_data = OrderedDict() group_data = OrderedDict() # Make queries efficiently exercise_logs = ExerciseLog.objects.filter(user__in=users, complete=True) video_logs = VideoLog.objects.filter(user__in=users) login_logs = UserLogSummary.objects.filter(user__in=users) # filter results if period_start: exercise_logs = exercise_logs.filter(completion_timestamp__gte=period_start) video_logs = video_logs.filter(completion_timestamp__gte=period_start) login_logs = login_logs.filter(start_datetime__gte=period_start) if period_end: exercise_logs = exercise_logs.filter(completion_timestamp__lte=period_end) video_logs = video_logs.filter(completion_timestamp__lte=period_end) login_logs = login_logs.filter(end_datetime__lte=period_end) # Force results in a single query exercise_logs = list(exercise_logs.values("exercise_id", "user__pk")) video_logs = list(video_logs.values("video_id", "user__pk")) login_logs = list(login_logs.values("activity_type", "total_seconds", "user__pk")) for user in users: user_data[user.pk] = OrderedDict() user_data[user.pk]["id"] = user.pk user_data[user.pk]["first_name"] = user.first_name user_data[user.pk]["last_name"] = user.last_name user_data[user.pk]["username"] = user.username user_data[user.pk]["group"] = user.group user_data[user.pk]["total_report_views"] = 0#report_stats["count__sum"] or 0 user_data[user.pk]["total_logins"] =0# login_stats["count__sum"] or 0 user_data[user.pk]["total_hours"] = 0#login_stats["total_seconds__sum"] or 0)/3600. user_data[user.pk]["total_exercises"] = 0 user_data[user.pk]["pct_mastery"] = 0. user_data[user.pk]["exercises_mastered"] = [] user_data[user.pk]["total_videos"] = 0 user_data[user.pk]["videos_watched"] = [] for elog in exercise_logs: user_data[elog["user__pk"]]["total_exercises"] += 1 user_data[elog["user__pk"]]["pct_mastery"] += 1. / num_exercises user_data[elog["user__pk"]]["exercises_mastered"].append(elog["exercise_id"]) for vlog in video_logs: user_data[vlog["user__pk"]]["total_videos"] += 1 user_data[vlog["user__pk"]]["videos_watched"].append(vlog["video_id"]) for llog in login_logs: if llog["activity_type"] == UserLog.get_activity_int("coachreport"): user_data[llog["user__pk"]]["total_report_views"] += 1 elif llog["activity_type"] == UserLog.get_activity_int("login"): user_data[llog["user__pk"]]["total_hours"] += (llog["total_seconds"]) / 3600. user_data[llog["user__pk"]]["total_logins"] += 1 for group in list(groups) + [None]*(group_id==None or group_id=="Ungrouped"): # None for ungrouped, if no group_id passed. group_pk = getattr(group, "pk", None) group_name = getattr(group, "name", _("Ungrouped")) group_data[group_pk] = { "id": group_pk, "name": group_name, "total_logins": 0, "total_hours": 0, "total_users": 0, "total_videos": 0, "total_exercises": 0, "pct_mastery": 0, } # Add group data. Allow a fake group "Ungrouped" for user in users: group_pk = getattr(user.group, "pk", None) if group_pk not in group_data: logging.error("User %s still in nonexistent group %s!" % (user.id, group_pk)) continue group_data[group_pk]["total_users"] += 1 group_data[group_pk]["total_logins"] += user_data[user.pk]["total_logins"] group_data[group_pk]["total_hours"] += user_data[user.pk]["total_hours"] group_data[group_pk]["total_videos"] += user_data[user.pk]["total_videos"] group_data[group_pk]["total_exercises"] += user_data[user.pk]["total_exercises"] total_mastery_so_far = (group_data[group_pk]["pct_mastery"] * (group_data[group_pk]["total_users"] - 1) + user_data[user.pk]["pct_mastery"]) group_data[group_pk]["pct_mastery"] = total_mastery_so_far / group_data[group_pk]["total_users"] if len(group_data) == 1 and group_data.has_key(None): if not group_data[None]["total_users"]: del group_data[None] return (user_data, group_data)
def recent_syncing(request, org_id=None, max_zones=20, chunk_size=100, ndays=None): ndays = ndays or int(request.GET.get("days", 7)) ss = SyncSession.objects \ .annotate( \ ndevices=Count("client_device__devicezone__zone__id", distinct=True), \ ) \ .filter(models_uploaded__gt=0, timestamp__gt=F("timestamp") - timedelta(days=ndays)) \ .order_by("-timestamp") \ .values( "client_device__devicezone__zone__name", "client_device__devicezone__zone__id", "client_device__devicezone__zone__organization__id", \ "ndevices", "timestamp", "models_uploaded", "client_device__name", "client_device__id", "client_version", "client_os", "client_device__devicemetadata__is_demo_device", \ ) # Apparently I can't group by zone. So, will have to do manually zones = OrderedDict() cur_chunk = 0 while len(zones) < max_zones and cur_chunk < ss.count(): for session in ss[cur_chunk:cur_chunk + chunk_size]: if len(zones) >= max_zones: break zone_id = session["client_device__devicezone__zone__id"] if zone_id in zones: zones[zone_id]["nsessions"] += 1 zones[zone_id]["nuploaded"] += session["models_uploaded"] zones[zone_id]["device"]["is_demo_device"] = zones[zone_id][ "device"]["is_demo_device"] or session[ "client_device__devicemetadata__is_demo_device"] else: zones[zone_id] = { "nsessions": 1, "last_synced": session["timestamp"], "nuploaded": session["models_uploaded"], "name": session["client_device__devicezone__zone__name"], "id": session["client_device__devicezone__zone__id"], "organization": { "id": session[ "client_device__devicezone__zone__organization__id"] }, "device": { "id": session["client_device__id"] or "ben", "name": session["client_device__name"], "os": session["client_os"], "version": session["client_version"], "is_demo_device": session[ "client_device__devicemetadata__is_demo_device"], }, } cur_chunk += chunk_size return { "days": ndays, "zones": zones, }
def sorted_dict(d, key=lambda t: t[1], reverse=True, **kwargs): return OrderedDict(sorted(d.items(), key=key, reverse=reverse, **kwargs))
def zone_management(request, zone_id="None"): context = control_panel_context(request, zone_id=zone_id) own_device = Device.get_own_device() if not context["zone"] and (zone_id != "None" or own_device.get_zone() or settings.CENTRAL_SERVER): raise Http404( ) # on distributed server, we can make due if they're not registered. # Denote the zone as headless or not if context["zone"]: is_headless_zone = re.search(r'Zone for public key ', context["zone"].name) else: is_headless_zone = False # Accumulate device data device_data = OrderedDict() if context["zone"]: devices = Device.objects.filter(devicezone__zone=context["zone"]) else: devices = Device.objects.filter(devicemetadata__is_own_device=True) for device in list( devices.order_by("devicemetadata__is_demo_device", "name")): user_activity = UserLogSummary.objects.filter(device=device) sync_sessions = SyncSession.objects.filter(client_device=device) if not settings.CENTRAL_SERVER and device.id != own_device.id: # Non-local sync sessions unavailable on distributed server sync_sessions = None exercise_activity = ExerciseLog.objects.filter(signed_by=device) device_data[device.id] = { "name": device.name or device.id, "num_times_synced": sync_sessions.count() if sync_sessions is not None else None, "last_time_synced": sync_sessions.aggregate(Max("timestamp"))["timestamp__max"] if sync_sessions is not None else None, "is_demo_device": device.get_metadata().is_demo_device, "is_own_device": device.get_metadata().is_own_device and not settings.CENTRAL_SERVER, "last_time_used": exercise_activity.order_by("-completion_timestamp")[0:1] if user_activity.count() == 0 else user_activity.order_by( "-last_activity_datetime", "-end_datetime")[0], "counter": device.get_counter_position(), "is_registered": device.is_registered(), } # Accumulate facility data facility_data = OrderedDict() if context["zone"]: facilities = Facility.objects.by_zone(context["zone"]) else: facilities = Facility.objects.all() for facility in list(facilities.order_by("name")): user_activity = UserLogSummary.objects.filter(user__facility=facility) exercise_activity = ExerciseLog.objects.filter(user__facility=facility) facility_data[facility.id] = { "name": facility.name, "num_users": FacilityUser.objects.filter(facility=facility).count(), "num_groups": FacilityGroup.objects.filter(facility=facility).count(), "id": facility.id, "last_time_used": exercise_activity.order_by("-completion_timestamp")[0:1] if user_activity.count() == 0 else user_activity.order_by( "-last_activity_datetime", "-end_datetime")[0], "is_deletable": facility.is_deletable(), } context.update({ "is_headless_zone": is_headless_zone, "facilities": facility_data, "devices": device_data, "upload_form": UploadFileForm(), "own_device_is_trusted": Device.get_own_device().get_metadata().is_trusted, }) context.update(set_clock_context(request)) return context
def show_deployment_cms(request): """ This does 3 queries: * Facilities, organized by organization. * Devices, organized by organization. * Organizations, organized by organization. It then combines results from these 3 queries to create a list of: * All Users that have facilities, have devices but no facilities, and have no devices. """ # Query 1: Organizations deployment_data = OrderedDict([(org["id"], { "org_name": org["name"], "owner": org["owner__username"], "total_users": 0, "sync_sessions": 0, "models_synced": 0, }) for org in list(Organization.objects.values("id", "name", "owner__username"))]) # Query 2: Organizations with users for org in list(Organization.objects.values("id", "users__username", "users__first_name", "users__last_name")): org_id = org["id"] deployment_data[org_id]["users"] = deployment_data[org_id].get("users", {}) deployment_data[org_id]["users"][org["users__username"]] = { "first_name": org["users__first_name"], "last_name": org["users__last_name"], "email": org["users__username"], } # Query 3: Organizations with devices device_data = DeviceZone.objects \ .annotate( \ n_sessions=Count("device__client_sessions"), \ n_models=Sum("device__client_sessions__models_uploaded")) \ .values("n_sessions", "n_models", "device__id", "device__name", "zone__id", "zone__name", "zone__organization__id") \ .order_by("zone__name", "-n_models", "-n_sessions") for devzone in list(device_data): org_id = devzone["zone__organization__id"] if not org_id: continue deployment_data[org_id]["devices"] = deployment_data[org_id].get("devices", {}) deployment_data[org_id]["devices"][devzone["device__id"]] = { "id": devzone["device__id"], "name": devzone["device__name"], "zone_name": devzone["zone__name"], "zone_id": devzone["zone__id"], "models_synced": devzone["n_models"], "sync_sessions": devzone["n_sessions"], } deployment_data[org_id]["models_synced"] += devzone["n_models"] or 0 deployment_data[org_id]["sync_sessions"] += devzone["n_sessions"] or 0 # Query 4: Organizations with facilities facilities_by_org = list(Facility.objects \ .filter(signed_by__devicemetadata__is_demo_device=False) \ .annotate( \ n_actual_users=Count("facilityuser")) \ .values( \ "n_actual_users", \ "name", "address", \ "latitude", "longitude", \ "contact_email", "contact_name", \ "user_count", \ "zone_fallback__organization__id", \ "signed_by__devicezone__zone__organization__id",) \ .order_by("-n_actual_users")) for fac in list(facilities_by_org): org_id = fac["signed_by__devicezone__zone__organization__id"] or fac["zone_fallback__organization__id"] deployment_data[org_id]["facilities"] = deployment_data[org_id].get("facilities", {}) deployment_data[org_id]["facilities"][fac["name"]] = fac deployment_data[org_id]["total_users"] += fac["n_actual_users"] or 0 # Combine all data into a single data store. sort_fn = lambda dep: (dep["total_users"], dep["models_synced"], dep["sync_sessions"]) paged_data, page_urls = paginate_data(request, sorted(deployment_data.values(), key=sort_fn, reverse=True), page=int(request.GET.get("cur_page", 1)), per_page=int(request.GET.get("per_page", 25))) return { "pages": paged_data, "page_urls": page_urls, "title": _("Deployments CMS"), }
def _get_user_usage_data(users, groups=None, period_start=None, period_end=None, group_id=None): """ Returns facility user data, within the given date range. """ groups = groups or set([user.group for user in users]) # compute period start and end # Now compute stats, based on queried data num_exercises = len(get_node_cache('Exercise')) user_data = OrderedDict() group_data = OrderedDict() # Make queries efficiently exercise_logs = ExerciseLog.objects.filter(user__in=users, complete=True) video_logs = VideoLog.objects.filter(user__in=users, total_seconds_watched__gt=0) login_logs = UserLogSummary.objects.filter(user__in=users) # filter results login_logs = login_logs.filter(total_seconds__gt=0) if period_start: exercise_logs = exercise_logs.filter(completion_timestamp__gte=period_start) if period_end: # MUST: Fix the midnight bug where period end covers up to the prior day only because # period end is datetime(year, month, day, hour=0, minute=0), meaning midnight of previous day. # Example: # If period_end == '2014-12-01', we cannot include the records dated '2014-12-01 09:30'. # So to fix this, we change it to '2014-12-01 23:59.999999'. period_end = dateutil.parser.parse(period_end) period_end = period_end + dateutil.relativedelta.relativedelta(days=+1, microseconds=-1) exercise_logs = exercise_logs.filter(completion_timestamp__lte=period_end) if period_start and period_end: exercise_logs = exercise_logs.filter(Q(completion_timestamp__gte=period_start) & Q(completion_timestamp__lte=period_end)) q1 = Q(completion_timestamp__isnull=False) & \ Q(completion_timestamp__gte=period_start) & \ Q(completion_timestamp__lte=period_end) q2 = Q(completion_timestamp__isnull=True) video_logs = video_logs.filter(q1 | q2) login_q1 = Q(start_datetime__gte=period_start) & Q(start_datetime__lte=period_end) & \ Q(end_datetime__gte=period_start) & Q(end_datetime__lte=period_end) login_logs = login_logs.filter(login_q1) # Force results in a single query exercise_logs = list(exercise_logs.values("exercise_id", "user__pk")) video_logs = list(video_logs.values("video_id", "user__pk")) login_logs = list(login_logs.values("activity_type", "total_seconds", "user__pk")) for user in users: user_data[user.pk] = OrderedDict() user_data[user.pk]["id"] = user.pk user_data[user.pk]["first_name"] = user.first_name user_data[user.pk]["last_name"] = user.last_name user_data[user.pk]["username"] = user.username user_data[user.pk]["group"] = user.group user_data[user.pk]["total_report_views"] = 0#report_stats["count__sum"] or 0 user_data[user.pk]["total_logins"] =0# login_stats["count__sum"] or 0 user_data[user.pk]["total_hours"] = 0#login_stats["total_seconds__sum"] or 0)/3600. user_data[user.pk]["total_exercises"] = 0 user_data[user.pk]["pct_mastery"] = 0. user_data[user.pk]["exercises_mastered"] = [] user_data[user.pk]["total_videos"] = 0 user_data[user.pk]["videos_watched"] = [] for elog in exercise_logs: user_data[elog["user__pk"]]["total_exercises"] += 1 user_data[elog["user__pk"]]["pct_mastery"] += 1. / num_exercises user_data[elog["user__pk"]]["exercises_mastered"].append(elog["exercise_id"]) for vlog in video_logs: user_data[vlog["user__pk"]]["total_videos"] += 1 user_data[vlog["user__pk"]]["videos_watched"].append(vlog["video_id"]) for llog in login_logs: if llog["activity_type"] == UserLog.get_activity_int("coachreport"): user_data[llog["user__pk"]]["total_report_views"] += 1 elif llog["activity_type"] == UserLog.get_activity_int("login"): user_data[llog["user__pk"]]["total_hours"] += (llog["total_seconds"]) / 3600. user_data[llog["user__pk"]]["total_logins"] += 1 for group in list(groups) + [None]*(group_id==None or group_id=="Ungrouped"): # None for ungrouped, if no group_id passed. group_pk = getattr(group, "pk", None) group_name = getattr(group, "name", _("Ungrouped")) group_data[group_pk] = { "id": group_pk, "name": group_name, "total_logins": 0, "total_hours": 0, "total_users": 0, "total_videos": 0, "total_exercises": 0, "pct_mastery": 0, } # Add group data. Allow a fake group "Ungrouped" for user in users: group_pk = getattr(user.group, "pk", None) if group_pk not in group_data: logging.error("User %s still in nonexistent group %s!" % (user.id, group_pk)) continue group_data[group_pk]["total_users"] += 1 group_data[group_pk]["total_logins"] += user_data[user.pk]["total_logins"] group_data[group_pk]["total_hours"] += user_data[user.pk]["total_hours"] group_data[group_pk]["total_videos"] += user_data[user.pk]["total_videos"] group_data[group_pk]["total_exercises"] += user_data[user.pk]["total_exercises"] total_mastery_so_far = (group_data[group_pk]["pct_mastery"] * (group_data[group_pk]["total_users"] - 1) + user_data[user.pk]["pct_mastery"]) group_data[group_pk]["pct_mastery"] = total_mastery_so_far / group_data[group_pk]["total_users"] if len(group_data) == 1 and group_data.has_key(None): if not group_data[None]["total_users"]: del group_data[None] return (user_data, group_data)