def add_syncing_models(models): """When sync is run, these models will be sync'd""" for model in models: if model in _syncing_models: logging.warn("We are already syncing model %s" % str(model)) else: _syncing_models.append(model)
def download_kmap_icons(knowledge_map): for key, value in knowledge_map["topics"].items(): # Note: id here is retrieved from knowledge_map, so we're OK # that we blew away ID in the topic tree earlier. if "icon_url" not in value: logging.warn("No icon URL for %s" % key) value["icon_url"] = iconfilepath + value["id"] + iconextension knowledge_map["topics"][key] = value out_path = data_path + "../" + value["icon_url"] if os.path.exists(out_path) and not force_icons: continue icon_khan_url = "http://www.khanacademy.org" + value["icon_url"] sys.stdout.write("Downloading icon %s from %s..." % (value["id"], icon_khan_url)) sys.stdout.flush() try: icon = requests.get(icon_khan_url) except Exception as e: sys.stdout.write("\n") # complete the "downloading" output sys.stderr.write("Failed to download %-80s: %s\n" % (icon_khan_url, e)) continue if icon.status_code == 200: iconfile = file(data_path + "../" + value["icon_url"], "w") iconfile.write(icon.content) else: sys.stdout.write(" [NOT FOUND]") value["icon_url"] = iconfilepath + defaulticon + iconextension sys.stdout.write(" done.\n") # complete the "downloading" output
def zip_language_packs(lang_codes=None): """Zip up and expose all language packs""" lang_codes = lang_codes or listdir(LOCALE_ROOT) logging.info("Zipping up %d language pack(s)" % len(lang_codes)) ensure_dir(settings.LANGUAGE_PACK_ROOT) for lang in lang_codes: lang_locale_path = os.path.join(LOCALE_ROOT, lang) if not os.path.exists(lang_locale_path): logging.warn("Unexpectedly skipping missing directory: %s" % lang) elif not os.path.isdir(lang_locale_path): logging.error("Skipping language where a file exists: %s" % lang) # Create a zipfile for this language zip_path = os.path.join(settings.LANGUAGE_PACK_ROOT, version.VERSION) ensure_dir(zip_path) z = zipfile.ZipFile(os.path.join(zip_path, "%s.zip" % convert_language_code_format(lang)), 'w') # Get every single file in the directory and zip it up for metadata_file in glob.glob('%s/*.json' % lang_locale_path): z.write(os.path.join(lang_locale_path, metadata_file), arcname=os.path.basename(metadata_file)) for mo_file in glob.glob('%s/LC_MESSAGES/*.mo' % lang_locale_path): z.write(os.path.join(lang_locale_path, mo_file), arcname=os.path.join("LC_MESSAGES", os.path.basename(mo_file))) for srt_file in glob.glob('%s/subtitles/*.srt' % lang_locale_path): z.write(os.path.join(lang_locale_path, srt_file), arcname=os.path.join("subtitles", os.path.basename(srt_file))) z.close() logging.info("Done.")
def move_srts(lang_code): """ Srts live in the locale directory, but that's not exposed at any URL. So instead, we have to move the srts out to /static/subtitles/[lang_code]/ """ lang_code_ietf = lcode_to_ietf(lang_code) lang_code_django = lcode_to_django_dir(lang_code) subtitles_static_dir = os.path.join(settings.STATIC_ROOT, "subtitles") src_dir = os.path.join(LOCALE_ROOT, lang_code_django, "subtitles") dest_dir = get_srt_path(lang_code_django) ensure_dir(dest_dir) lang_subtitles = glob.glob(os.path.join(src_dir, "*.srt")) logging.info("Moving %d subtitles from %s to %s" % (len(lang_subtitles), src_dir, dest_dir)) for fil in lang_subtitles: srt_dest_path = os.path.join(dest_dir, os.path.basename(fil)) if os.path.exists(srt_dest_path): os.remove(srt_dest_path ) # we're going to replace any srt with a newer version shutil.move(fil, srt_dest_path) if not os.path.exists(src_dir): logging.info("No subtitles for language pack %s" % lang_code) elif os.listdir(src_dir): logging.warn( "%s is not empty; will not remove. Please check that all subtitles were moved." % src_dir) else: logging.info("Removing empty source directory (%s)." % src_dir) shutil.rmtree(src_dir)
def verify_inner_zip(self, zip_file): """ Extract contents of outer zip, verify the inner zip """ zip = ZipFile(zip_file, "r") nfiles = len(zip.namelist()) for fi, afile in enumerate(zip.namelist()): zip.extract(afile, path=self.working_dir) self.signature_file = os.path.join(self.working_dir, Command.signature_filename) self.inner_zip_file = os.path.join(self.working_dir, Command.inner_zip_filename) central_server = Device.get_central_server() lines = open(self.signature_file, "r").read().split("\n") chunk_size = int(lines.pop(0)) if not central_server: logging.warn( "No central server device object found; trusting zip file because you asked me to..." ) elif central_server.key.verify_large_file(self.inner_zip_file, signature=lines, chunk_size=chunk_size): logging.info("Verified file!") else: raise Exception("Failed to verify inner zip file.") return self.inner_zip_file
def recurse_nodes_to_extract_knowledge_map(node, node_cache): """ Internal function for recursing the topic tree and building the knowledge map. Requires rebranding of metadata done by recurse_nodes function. """ assert node["kind"] == "Topic" if node.get("in_knowledge_map", None): if node["slug"] not in knowledge_map["topics"]: logging.debug("Not in knowledge map: %s" % node["slug"]) node["in_knowledge_map"] = False for node in node_cache["Topic"][node["slug"]]: node["in_knowledge_map"] = False knowledge_topics[node["slug"]] = topic_tools.get_all_leaves(node, leaf_type="Exercise") if not knowledge_topics[node["slug"]]: sys.stderr.write("Removing topic from topic tree: no exercises. %s" % node["slug"]) del knowledge_topics[node["slug"]] del knowledge_map["topics"][node["slug"]] node["in_knowledge_map"] = False for node in node_cache["Topic"][node["slug"]]: node["in_knowledge_map"] = False else: if node["slug"] in knowledge_map["topics"]: sys.stderr.write("Removing topic from topic tree; does not belong. '%s'" % node["slug"]) logging.warn("Removing from knowledge map: %s" % node["slug"]) del knowledge_map["topics"][node["slug"]] for child in [n for n in node.get("children", []) if n["kind"] == "Topic"]: recurse_nodes_to_extract_knowledge_map(child, node_cache)
def recurse_nodes_to_clean_related_videos(node): """ Internal function for recursing the topic tree and marking related exercises. Requires rebranding of metadata done by recurse_nodes function. """ def get_video_node(video_slug, node): if node["kind"] == "Topic": for child in node.get("children", []): video_node = get_video_node(video_slug, child) if video_node: return video_node elif node["kind"] == "Video" and node["slug"] == video_slug: return node return None if node["kind"] == "Exercise": videos_to_delete = [] for vi, video_slug in enumerate( node["related_video_readable_ids"]): if not get_video_node(video_slug, topictree): videos_to_delete.append(vi) for vi in reversed(videos_to_delete): logging.warn("Deleting unknown video %s" % node["related_video_readable_ids"][vi]) del node["related_video_readable_ids"][vi] for child in node.get("children", []): recurse_nodes_to_clean_related_videos(child)
def update_user_activity(cls, user, activity_type="login", update_datetime=None, language=None, suppress_save=False): """Helper function to update an existing user activity log entry.""" # Do nothing if the max # of records is zero # (i.e. this functionality is disabled) if not cls.is_enabled(): return if not user: raise ValidationError("A valid user must always be specified.") if not update_datetime: # must be done outside the function header (else becomes static) update_datetime = datetime.now() activity_type = cls.get_activity_int(activity_type) cur_log = cls.get_latest_open_log_or_None(user=user, activity_type=activity_type) if cur_log: # How could you start after you updated?? if cur_log.start_datetime > update_datetime: raise ValidationError("Update time must always be later than the login time.") else: # No unstopped starts. Start should have been called first! logging.warn("%s: Had to create a user log entry on an UPDATE(%d)! @ %s" % (user.username, activity_type, update_datetime)) cur_log = cls.begin_user_activity(user=user, activity_type=activity_type, start_datetime=update_datetime, suppress_save=True) logging.debug("%s: UPDATE activity (%d) @ %s" % (user.username, activity_type, update_datetime)) cur_log.last_active_datetime = update_datetime cur_log.language = language or cur_log.language # set the language to the current language, if there is one. if not suppress_save: cur_log.save() return cur_log
def generate_zipped_srts(lang_codes_to_update, download_path=DOWNLOAD_PATH): # Create media directory if it doesn't yet exist ensure_dir(settings.MEDIA_ROOT) zip_path = settings.MEDIA_ROOT + "subtitles/" ensure_dir(zip_path) lang_codes_to_update = lang_codes_to_update or os.listdir(download_path) for lang_code in lang_codes_to_update: srt_dir = os.path.join(download_path, lang_code, "subtitles") zip_file = os.path.join(zip_path, "%s_subtitles.zip" % lang_code) # Remove any old version (as we may not re-create) if os.path.exists(zip_file): os.remove(zip_file) if not os.path.exists(srt_dir): logging.warn("No srt directory for %s; skipping." % lang_code) continue srts = glob.glob(os.path.join(srt_dir, "*.srt")) if len(srts) == 0: logging.warn("No srts for %s; skipping." % lang_code) continue logging.info("Zipping up a new pack for language code: %s" % lang_code) zf = zipfile.ZipFile(zip_file, 'w') for f in srts: zf.write(f, arcname=os.path.basename(f)) zf.close()
def recurse_nodes_to_clean_related_videos(node): """ Internal function for recursing the topic tree and marking related exercises. Requires rebranding of metadata done by recurse_nodes function. """ def get_video_node(video_slug, node): if node["kind"] == "Topic": for child in node.get("children", []): video_node = get_video_node(video_slug, child) if video_node: return video_node elif node["kind"] == "Video" and node["slug"] == video_slug: return node return None if node["kind"] == "Exercise": videos_to_delete = [] for vi, video_slug in enumerate(node["related_video_slugs"]): if not get_video_node(video_slug, topictree): videos_to_delete.append(vi) for vi in reversed(videos_to_delete): logging.warn("Deleting unknown video %s" % node["related_video_slugs"][vi]) del node["related_video_slugs"][vi] for child in node.get("children", []): recurse_nodes_to_clean_related_videos(child)
def begin_user_activity(cls, user, activity_type="login", start_datetime=None): """Helper function to create a user activity log entry.""" # Do nothing if the max # of records is zero or None # (i.e. this functionality is disabled) if not settings.USER_LOG_MAX_RECORDS: return assert user is not None, "A valid user must always be specified." if not start_datetime: # must be done outside the function header (else becomes static) start_datetime = datetime.now() activity_type = cls.get_activity_int(activity_type) cur_user_log_entry = get_object_or_None(cls, user=user, end_datetime=None) logging.debug("%s: BEGIN activity(%d) @ %s" % (user.username, activity_type, start_datetime)) # Seems we're logging in without logging out of the previous. # Best thing to do is simulate a login # at the previous last update time. # # Note: this can be a recursive call if cur_user_log_entry: logging.warn("%s: END activity on a begin @ %s" % (user.username, start_datetime)) cls.end_user_activity( user=user, activity_type=activity_type, end_datetime=cur_user_log_entry.last_active_datetime ) # Create a new entry cur_user_log_entry = cls( user=user, activity_type=activity_type, start_datetime=start_datetime, last_active_datetime=start_datetime ) cur_user_log_entry.save() return cur_user_log_entry
def begin_user_activity(cls, user, activity_type="login", start_datetime=None, language=None, suppress_save=False): """Helper function to create a user activity log entry.""" # Do nothing if the max # of records is zero # (i.e. this functionality is disabled) if not cls.is_enabled(): return if not user: raise ValidationError("A valid user must always be specified.") if not start_datetime: # must be done outside the function header (else becomes static) start_datetime = datetime.now() activity_type = cls.get_activity_int(activity_type) cur_log = cls.get_latest_open_log_or_None(user=user, activity_type=activity_type) if cur_log: # Seems we're logging in without logging out of the previous. # Best thing to do is simulate a login # at the previous last update time. # # Note: this can be a recursive call logging.warn("%s: had to END activity on a begin(%d) @ %s" % (user.username, activity_type, start_datetime)) # Don't mark current language when closing an old one cls.end_user_activity(user=user, activity_type=activity_type, end_datetime=cur_log.last_active_datetime) # can't suppress save cur_log = None # Create a new entry logging.debug("%s: BEGIN activity(%d) @ %s" % (user.username, activity_type, start_datetime)) cur_log = cls(user=user, activity_type=activity_type, start_datetime=start_datetime, last_active_datetime=start_datetime, language=language) if not suppress_save: cur_log.save() return cur_log
def update_user_activity(cls, user, activity_type="login", update_datetime=None, language=language, suppress_save=False): """Helper function to update an existing user activity log entry.""" # Do nothing if the max # of records is zero # (i.e. this functionality is disabled) if not cls.is_enabled(): return if not user: raise ValidationError("A valid user must always be specified.") if not update_datetime: # must be done outside the function header (else becomes static) update_datetime = datetime.now() activity_type = cls.get_activity_int(activity_type) cur_log = cls.get_latest_open_log_or_None(user=user, activity_type=activity_type) if cur_log: # How could you start after you updated?? if cur_log.start_datetime > update_datetime: raise ValidationError("Update time must always be later than the login time.") else: # No unstopped starts. Start should have been called first! logging.warn("%s: Had to create a user log entry on an UPDATE(%d)! @ %s" % (user.username, activity_type, update_datetime)) cur_log = cls.begin_user_activity(user=user, activity_type=activity_type, start_datetime=update_datetime, suppress_save=True) logging.debug("%s: UPDATE activity (%d) @ %s" % (user.username, activity_type, update_datetime)) cur_log.last_active_datetime = update_datetime cur_log.language = language or cur_log.language # set the language to the current language, if there is one. if not suppress_save: cur_log.save() return cur_log
def end_user_activity(cls, user, activity_type="login", end_datetime=None, suppress_save=False): # don't accept language--we're just closing previous activity. """Helper function to complete an existing user activity log entry.""" # Do nothing if the max # of records is zero # (i.e. this functionality is disabled) if not cls.is_enabled(): return if not user: raise ValidationError("A valid user must always be specified.") if not end_datetime: # must be done outside the function header (else becomes static) end_datetime = datetime.now() activity_type = cls.get_activity_int(activity_type) cur_log = cls.get_latest_open_log_or_None(user=user, activity_type=activity_type) if cur_log: # How could you start after you ended?? if cur_log.start_datetime > end_datetime: raise ValidationError("Update time must always be later than the login time.") else: # No unstopped starts. Start should have been called first! logging.warn("%s: Had to BEGIN a user log entry, but ENDING(%d)! @ %s" % (user.username, activity_type, end_datetime)) cur_log = cls.begin_user_activity(user=user, activity_type=activity_type, start_datetime=end_datetime, suppress_save=True) logging.debug("%s: Logging LOGOUT activity @ %s" % (user.username, end_datetime)) cur_log.end_datetime = end_datetime if not suppress_save: cur_log.save() # total-seconds will be computed here. return cur_log
def zip_language_packs(lang_codes=None): """Zip up and expose all language packs converts all into ietf """ lang_codes = lang_codes or os.listdir(LOCALE_ROOT) lang_codes = [lcode_to_ietf(lc) for lc in lang_codes] logging.info("Zipping up %d language pack(s)" % len(lang_codes)) for lang_code_ietf in lang_codes: lang_code_django = lcode_to_django_dir(lang_code_ietf) lang_locale_path = os.path.join(LOCALE_ROOT, lang_code_django) if not os.path.exists(lang_locale_path): logging.warn("Unexpectedly skipping missing directory: %s" % lang_code_django) elif not os.path.isdir(lang_locale_path): logging.error("Skipping language where a file exists where a directory was expected: %s" % lang_code_django) # Create a zipfile for this language zip_filepath = get_language_pack_filepath(lang_code_ietf) ensure_dir(os.path.dirname(zip_filepath)) logging.info("Creating zip file in %s" % zip_filepath) z = zipfile.ZipFile(zip_filepath, 'w', zipfile.ZIP_DEFLATED) # Get every single file in the directory and zip it up for metadata_file in glob.glob('%s/*.json' % lang_locale_path): z.write(os.path.join(lang_locale_path, metadata_file), arcname=os.path.basename(metadata_file)) srt_dirpath = get_srt_path(lang_code_django) for srt_file in glob.glob(os.path.join(srt_dirpath, "*.srt")): z.write(srt_file, arcname=os.path.join("subtitles", os.path.basename(srt_file))) z.close() logging.info("Done.")
def get_cache_key(path=None, url_name=None, cache=None, failure_ok=False): """Call into Django to retrieve a cache key for the given url, or given url name NOTE: ONLY RETURNS CACHE_KEY IF THE CACHE_ITEM HAS BEEN CREATED ELSEWHERE!!!""" assert (path or url_name) and not ( path and url_name), "Must have path or url_name parameter, but not both" if not cache: cache = get_web_cache() request = HttpRequest() request.path = path or reverse(url_name) request.session = { settings.LANGUAGE_COOKIE_NAME: translation.get_language() } cache_key = django_get_cache_key(request, cache=get_web_cache()) if not cache_key and not failure_ok: logging.warn( "The cache item does not exist, and so could not be retrieved (path=%s)." % request.path) return cache_key
def move_exercises(lang_code): lang_pack_location = os.path.join(LOCALE_ROOT, lang_code) src_exercise_dir = os.path.join(lang_pack_location, "exercises") dest_exercise_dir = get_localized_exercise_dirpath(lang_code, is_central_server=False) if not os.path.exists(src_exercise_dir): logging.warn("Could not find downloaded exercises; skipping: %s" % src_exercise_dir) else: # Move over one at a time, to combine with any other resources that were there before. ensure_dir(dest_exercise_dir) all_exercise_files = glob.glob(os.path.join(src_exercise_dir, "*.html")) logging.info("Moving %d downloaded exercises to %s" % (len(all_exercise_files), dest_exercise_dir)) for exercise_file in all_exercise_files: shutil.move( exercise_file, os.path.join(dest_exercise_dir, os.path.basename(exercise_file))) logging.debug("Removing emtpy directory") try: shutil.rmtree(src_exercise_dir) except Exception as e: logging.error("Error removing dubbed video directory (%s): %s" % (src_exercise_dir, e))
def generate_zipped_srts(lang_codes_to_update, download_path): # Create media directory if it doesn't yet exist ensure_dir(settings.MEDIA_ROOT) zip_path = settings.MEDIA_ROOT + "subtitles/" ensure_dir(zip_path) lang_codes_to_update = lang_codes_to_update or os.listdir(download_path) for lang_code in lang_codes_to_update: srt_dir = os.path.join(download_path, lang_code, "subtitles") zip_file = os.path.join(zip_path, "%s_subtitles.zip" % lang_code) # Remove any old version (as we may not re-create) if os.path.exists(zip_file): os.remove(zip_file) if not os.path.exists(srt_dir): logging.warn("No srt directory for %s; skipping." % lang_code) continue srts = glob.glob(os.path.join(srt_dir, "*.srt")) if len(srts) == 0: logging.warn("No srts for %s; skipping." % lang_code) continue logging.info("Zipping up a new pack for language code: %s" % lang_code) zf = zipfile.ZipFile(zip_file, 'w') for f in srts: zf.write(f, arcname=os.path.basename(f)) zf.close()
def end_user_activity(cls, user, activity_type="login", end_datetime=None): """Helper function to complete an existing user activity log entry.""" # Do nothing if the max # of records is zero or None # (i.e. this functionality is disabled) if not settings.USER_LOG_MAX_RECORDS: return assert user is not None, "A valid user must always be specified." if not end_datetime: # must be done outside the function header (else becomes static) end_datetime = datetime.now() activity_type = cls.get_activity_int(activity_type) cur_user_log_entry = get_object_or_None(cls, user=user, end_datetime=None) # No unstopped starts. Start should have been called first! if not cur_user_log_entry: logging.warn( "%s: Had to create a user log entry, but STOPPING('%d')! @ %s" % (user.username, activity_type, end_datetime) ) cur_user_log_entry = cls.begin_user_activity( user=user, activity_type=activity_type, start_datetime=end_datetime ) logging.debug("%s: Logging LOGOUT activity @ %s" % (user.username, end_datetime)) cur_user_log_entry.end_datetime = end_datetime cur_user_log_entry.save() # total-seconds will be computed here.
def end_user_activity(cls, user, activity_type="login", end_datetime=None): """Helper function to complete an existing user activity log entry.""" # Do nothing if the max # of records is zero or None # (i.e. this functionality is disabled) if not settings.USER_LOG_MAX_RECORDS: return assert user is not None, "A valid user must always be specified." if not end_datetime: # must be done outside the function header (else becomes static) end_datetime = datetime.now() activity_type = cls.get_activity_int(activity_type) cur_user_log_entry = get_object_or_None(cls, user=user, end_datetime=None) # No unstopped starts. Start should have been called first! if not cur_user_log_entry: logging.warn( "%s: Had to create a user log entry, but STOPPING('%d')! @ %s" % (user.username, activity_type, end_datetime)) cur_user_log_entry = cls.begin_user_activity( user=user, activity_type=activity_type, start_datetime=end_datetime) logging.debug("%s: Logging LOGOUT activity @ %s" % (user.username, end_datetime)) cur_user_log_entry.end_datetime = end_datetime cur_user_log_entry.save() # total-seconds will be computed here.
def move_srts(lang_code): """ Srts live in the locale directory, but that's not exposed at any URL. So instead, we have to move the srts out to /static/subtitles/[lang_code]/ """ lang_code_ietf = lcode_to_ietf(lang_code) lang_code_django = lcode_to_django_dir(lang_code) subtitles_static_dir = os.path.join(settings.STATIC_ROOT, "subtitles") src_dir = os.path.join(LOCALE_ROOT, lang_code_django, "subtitles") dest_dir = get_srt_path(lang_code_django) ensure_dir(dest_dir) lang_subtitles = glob.glob(os.path.join(src_dir, "*.srt")) logging.info("Moving %d subtitles from %s to %s" % (len(lang_subtitles), src_dir, dest_dir)) for fil in lang_subtitles: srt_dest_path = os.path.join(dest_dir, os.path.basename(fil)) if os.path.exists(srt_dest_path): os.remove(srt_dest_path) # we're going to replace any srt with a newer version shutil.move(fil, srt_dest_path) if not os.path.exists(src_dir): logging.info("No subtitles for language pack %s" % lang_code) elif os.listdir(src_dir): logging.warn("%s is not empty; will not remove. Please check that all subtitles were moved." % src_dir) else: logging.info("Removing empty source directory (%s)." % src_dir) shutil.rmtree(src_dir)
def add_syncing_models(models): """When sync is run, these models will be sync'd""" get_foreign_key_classes = lambda m: set([field.rel.to for field in m._meta.fields if isinstance(field, ForeignKey)]) for model in models: if model in _syncing_models: logging.warn("We are already syncing model %s" % unicode(model)) continue # When we add models to be synced, we need to make sure # that models that depend on other models are synced AFTER # the model it depends on has been synced. # Get the dependencies of the new model foreign_key_classes = get_foreign_key_classes(model) # Find all the existing models that this new model refers to. class_indices = [_syncing_models.index(cls) for cls in foreign_key_classes if cls in _syncing_models] # Insert just after the last dependency found, # or at the front if no dependencies insert_after_idx = 1 + (max(class_indices) if class_indices else -1) # Before inserting, make sure that any models referencing *THIS* model # appear after this model. if [True for synmod in _syncing_models[0 : insert_after_idx - 1] if model in get_foreign_key_classes(synmod)]: raise Exception("Dependency loop detected in syncing models; cannot proceed.") # Now we're ready to insert. _syncing_models.insert(insert_after_idx + 1, model)
def compute_one_way(cls, zone, from_device, to_device): """ """ assert from_device.is_trusted() or from_device.get_zone() == zone # Trace back from this device to the zone-trusted device. chain = [{"device": from_device}] devices_in_chain = set([]) for i in range(cls.MAX_CHAIN_LENGTH ): # max chain size: 1000 (avoids infinite loops) # We're going to traverse the chain backwards, until we get to # the zone_owner (to_device), or a trusted device. cur_link = chain[-1] # Get a devicezone and/or zone invitation for the current device. cur_link["zone_invitation"] = get_object_or_None( ZoneInvitation, used_by=cur_link["device"].signed_by, revoked=False) if cur_link["zone_invitation"]: cur_link["zone_invitation"].verify( ) # make sure it's a valid invitation cur_link["device_zone"] = get_object_or_None( DeviceZone, device=cur_link["device"].signed_by, revoked=False) # Determine the next step. Three terminal steps, one continuing step if not cur_link["zone_invitation"] and not cur_link["device_zone"]: # A break in the chain. No connection between the device and the zone. break elif cur_link["device"] == to_device or cur_link[ "device"].is_trusted(): logging.debug("Found end of chain!") break next_device = getattr(cur_link["zone_invitation"], "invited_by", None) next_device = next_device or getattr(cur_link["device_zone"], "signed_by") if next_device in devices_in_chain: logging.warn("loop detected.") break else: # So far, we're OK--keep looking for the (valid) end of the chain assert next_device.is_trusted() or next_device.get_zone( ) == zone devices_in_chain.add(next_device) chain.append({"device": next_device}) # Validate the chain of trust to the zone zone_owner terminal_link = chain[-1] terminal_device = terminal_link["device"] obj = terminal_link["zone_invitation"] or terminal_link["device_zone"] if obj and not (terminal_device.is_creator(obj) or terminal_device.is_trusted()): logging.warn("Could not verify chain of trust.") return chain # No device data gets "synced" through the same sync mechanism as data--it is only synced # through the special hand-shaking mechanism
def update_all_distributed_callback(request): """ """ if request.method != "POST": raise PermissionDenied("Only POST allowed to this URL endpoint.") videos = json.loads(request.POST["video_logs"]) exercises = json.loads(request.POST["exercise_logs"]) user = FacilityUser.objects.get(id=request.POST["user_id"]) node_cache = get_node_cache() # Save videos n_videos_uploaded = 0 for video in videos: video_id = video['video_id'] youtube_id = video['youtube_id'] # Only save video logs for videos that we recognize. if video_id not in node_cache["Video"]: logging.warn("Skipping unknown video %s" % video_id) continue try: (vl, _) = VideoLog.get_or_initialize(user=user, video_id=video_id, youtube_id=youtube_id) for key,val in video.iteritems(): setattr(vl, key, val) logging.debug("Saving video log for %s: %s" % (video_id, vl)) vl.save() n_videos_uploaded += 1 except KeyError: # logging.error("Could not save video log for data with missing values: %s" % video) except Exception as e: error_message = "Unexpected error importing videos: %s" % e return JsonResponseMessageError(error_message) # Save exercises n_exercises_uploaded = 0 for exercise in exercises: # Only save video logs for videos that we recognize. if exercise['exercise_id'] not in node_cache['Exercise']: logging.warn("Skipping unknown video %s" % exercise['exercise_id']) continue try: (el, _) = ExerciseLog.get_or_initialize(user=user, exercise_id=exercise["exercise_id"]) for key,val in exercise.iteritems(): setattr(el, key, val) logging.debug("Saving exercise log for %s: %s" % (exercise['exercise_id'], el)) el.save() n_exercises_uploaded += 1 except KeyError: logging.error("Could not save exercise log for data with missing values: %s" % exercise) except Exception as e: error_message = "Unexpected error importing exercises: %s" % e return JsonResponseMessageError(error_message) return JsonResponse({"success": "Uploaded %d exercises and %d videos" % (n_exercises_uploaded, n_videos_uploaded)})
def recurse_nodes(node, path=""): """ Internal function for recursing over the topic tree, marking relevant metadata, and removing undesired attributes and children. """ kind = node["kind"] # Only keep key data we can use for key in node.keys(): if key not in attribute_whitelists[kind]: del node[key] # Fix up data if slug_key[kind] not in node: logging.warn("Could not find expected slug key (%s) on node: %s" % (slug_key[kind], node)) node[slug_key[kind]] = node["id"] # put it SOMEWHERE. node["slug"] = node[slug_key[kind]] if node[slug_key[kind]] != "root" else "" node["id"] = node["slug"] # these used to be the same; now not. Easier if they stay the same (issue #233) node["path"] = path + topic_tools.kind_slugs[kind] + node["slug"] + "/" node["title"] = node[title_key[kind]] kinds = set([kind]) # For each exercise, need to get related videos if kind == "Exercise": related_video_readable_ids = [vid["readable_id"] for vid in download_khan_data("http://www.khanacademy.org/api/v1/exercises/%s/videos" % node["name"], node["name"] + ".json")] node["related_video_readable_ids"] = related_video_readable_ids exercise = { "slug": node[slug_key[kind]], "title": node[title_key[kind]], "path": node["path"], } for video_id in node.get("related_video_readable_ids", []): related_exercise[video_id] = exercise # Recurse through children, remove any blacklisted items children_to_delete = [] for i, child in enumerate(node.get("children", [])): child_kind = child.get("kind", None) if child_kind in kind_blacklist: children_to_delete.append(i) continue if child[slug_key[child_kind]] in slug_blacklist: children_to_delete.append(i) continue kinds = kinds.union(recurse_nodes(child, node["path"])) for i in reversed(children_to_delete): del node["children"][i] # Mark on topics whether they contain Videos, Exercises, or both if kind == "Topic": node["contains"] = list(kinds) return kinds
def get_shell_script(self, cmd_glob, location=None): if not location: location = self.working_dir + '/kalite' cmd_glob += system_script_extension() # Find the command cmd = glob.glob(location + "/" + cmd_glob) if len(cmd) > 1: raise CommandError("Multiple commands found (%s)? Should choose based on platform, but ... how to do in Python? Contact us to implement this!" % cmd_glob) elif len(cmd)==1: cmd = cmd[0] else: cmd = None logging.warn("No command found: (%s in %s)" % (cmd_glob, location)) return cmd
def get_file2lang_map(force=False): """Map from youtube_id to language code""" global YT2LANG_MAP if YT2LANG_MAP is None or force: YT2LANG_MAP = {} for lang_code, dic in get_dubbed_video_map().iteritems(): for dubbed_youtube_id in dic.values(): if dubbed_youtube_id in YT2LANG_MAP: # Sanity check, but must be failsafe, since we don't control these data if YT2LANG_MAP[dubbed_youtube_id] == lang_code: logging.warn("Duplicate entry found in %s language map for dubbed video %s" % (lang_code, dubbed_youtube_id)) else: logging.error("Conflicting entry found in language map for video %s; overwriting previous entry of %s to %s." % (dubbed_youtube_id, YT2LANG_MAP[dubbed_youtube_id], lang_code)) YT2LANG_MAP[dubbed_youtube_id] = lang_code return YT2LANG_MAP
def clean_orphaned_polylines(knowledge_map): """ We remove some topics (without leaves); need to remove polylines associated with these topics. """ all_topic_points = [(km["x"],km["y"]) for km in knowledge_map["topics"].values()] polylines_to_delete = [] for li, polyline in enumerate(knowledge_map["polylines"]): if any(["x" for pt in polyline["path"] if (pt["x"], pt["y"]) not in all_topic_points]): polylines_to_delete.append(li) logging.warn("Removing %s of %s polylines in top-level knowledge map" % (len(polylines_to_delete), len(knowledge_map["polylines"]))) for i in reversed(polylines_to_delete): del knowledge_map["polylines"][i] return knowledge_map
def create_cache(path=None, url_name=None, cache=None, force=False): """Create a cache entry""" assert (path or url_name) and not (path and url_name), "Must have path or url_name parameter, but not both" if not cache: cache = get_web_cache() if not path: path = reverse(url_name) if force and has_cache_key(path=path, cache=cache): expire_page(path=path) assert not has_cache_key(path=path, cache=cache) if not has_cache_key(path=path, cache=cache): Client().get(path) if not has_cache_key(path=path, cache=cache): logging.warn("Did not create cache entry for %s" % path)
def recurse_nodes_to_delete_exercise(node): """ Internal function for recursing the topic tree and removing new exercises. Requires rebranding of metadata done by recurse_nodes function. Returns a list of exercise slugs for the exercises that were deleted. """ # Stop recursing when we hit leaves if node["kind"] != "Topic": return [] slugs_deleted = [] children_to_delete = [] for ci, child in enumerate(node.get("children", [])): # Mark all unrecognized exercises for deletion if child["kind"] == "Exercise": if not os.path.exists(exercise_path % child["slug"]): children_to_delete.append(ci) # Recurse over children to delete elif child.get("children", None): slugs_deleted += recurse_nodes_to_delete_exercise(child) if not child.get("children", None): # Delete children without children (all their children were removed) logging.warn("Removing now-childless topic node '%s'" % child["slug"]) children_to_delete.append(ci) elif not any([ ch["kind"] == "Exercise" or "Exercise" in ch.get("contains", []) for ch in child["children"] ]): # If there are no longer exercises, be honest about it child["contains"] = list( set(child["contains"]) - set(["Exercise"])) # Do the actual deletion for i in reversed(children_to_delete): logging.warn("Deleting unknown exercise %s" % node["children"][i]["slug"]) del node["children"][i] return slugs_deleted
def begin_user_activity(cls, user, activity_type="login", start_datetime=None): """Helper function to create a user activity log entry.""" # Do nothing if the max # of records is zero or None # (i.e. this functionality is disabled) if not settings.USER_LOG_MAX_RECORDS: return assert user is not None, "A valid user must always be specified." if not start_datetime: # must be done outside the function header (else becomes static) start_datetime = datetime.now() activity_type = cls.get_activity_int(activity_type) cur_user_log_entry = get_object_or_None(cls, user=user, end_datetime=None) logging.debug("%s: BEGIN activity(%d) @ %s" % (user.username, activity_type, start_datetime)) # Seems we're logging in without logging out of the previous. # Best thing to do is simulate a login # at the previous last update time. # # Note: this can be a recursive call if cur_user_log_entry: logging.warn("%s: END activity on a begin @ %s" % (user.username, start_datetime)) cls.end_user_activity( user=user, activity_type=activity_type, end_datetime=cur_user_log_entry.last_active_datetime) # Create a new entry cur_user_log_entry = cls(user=user, activity_type=activity_type, start_datetime=start_datetime, last_active_datetime=start_datetime) cur_user_log_entry.save() return cur_user_log_entry
def get_dubbed_video_map(lang_code=None, force=False): """ Stores a key per language. Value is a dictionary between video_id and (dubbed) youtube_id """ global DUBBED_VIDEO_MAP, DUBBED_VIDEO_MAP_RAW, DUBBED_VIDEOS_MAPPING_FILEPATH if DUBBED_VIDEO_MAP is None or force: try: if not os.path.exists(DUBBED_VIDEOS_MAPPING_FILEPATH) or force: try: if settings.CENTRAL_SERVER: # Never call commands that could fail from the distributed server. # Always create a central server API to abstract things (see below) logging.debug("Generating dubbed video mappings.") call_command("generate_dubbed_video_mappings", force=force) else: # Generate from the spreadsheet response = requests.get("http://%s/api/i18n/videos/dubbed_video_map" % (settings.CENTRAL_SERVER_HOST)) response.raise_for_status() with open(DUBBED_VIDEOS_MAPPING_FILEPATH, "wb") as fp: fp.write(response.content.decode('utf-8')) # wait until content has been confirmed before opening file. except Exception as e: if not os.path.exists(DUBBED_VIDEOS_MAPPING_FILEPATH): # Unrecoverable error, so raise raise elif DUBBED_VIDEO_MAP: # No need to recover--allow the downstream dude to catch the error. raise else: # We can recover by NOT forcing reload. logging.warn("%s" % e) DUBBED_VIDEO_MAP_RAW = softload_json(DUBBED_VIDEOS_MAPPING_FILEPATH, raises=True) except Exception as e: logging.info("Failed to get dubbed video mappings; defaulting to empty.") DUBBED_VIDEO_MAP_RAW = {} # setting this will avoid triggering reload on every call DUBBED_VIDEO_MAP = {} for lang_name, video_map in DUBBED_VIDEO_MAP_RAW.iteritems(): logging.debug("Adding dubbed video map entry for %s (name=%s)" % (get_langcode_map(lang_name), lang_name)) DUBBED_VIDEO_MAP[get_langcode_map(lang_name)] = video_map return DUBBED_VIDEO_MAP.get(lang_code, {}) if lang_code else DUBBED_VIDEO_MAP
def recurse_nodes_to_remove_childless_nodes(node): """ When we remove exercises, we remove dead-end topics. Khan just sends us dead-end topics, too. Let's remove those too. """ children_to_delete = [] for ci, child in enumerate(node.get("children", [])): # Mark all unrecognized exercises for deletion if child["kind"] != "Topic": continue recurse_nodes_to_remove_childless_nodes(child) if not child.get("children"): children_to_delete.append(ci) logging.warn("Removing KA childless topic: %s" % child["slug"]) for ci in reversed(children_to_delete): del node["children"][ci]
def add_syncing_models(models): """When sync is run, these models will be sync'd""" get_foreign_key_classes = lambda m: set([ field.rel.to for field in m._meta.fields if isinstance(field, ForeignKey) ]) for model in models: if model in _syncing_models: logging.warn("We are already syncing model %s" % unicode(model)) continue # When we add models to be synced, we need to make sure # that models that depend on other models are synced AFTER # the model it depends on has been synced. # Get the dependencies of the new model foreign_key_classes = get_foreign_key_classes(model) # Find all the existing models that this new model refers to. class_indices = [ _syncing_models.index(cls) for cls in foreign_key_classes if cls in _syncing_models ] # Insert just after the last dependency found, # or at the front if no dependencies insert_after_idx = 1 + (max(class_indices) if class_indices else -1) # Before inserting, make sure that any models referencing *THIS* model # appear after this model. if [ True for synmod in _syncing_models[0:insert_after_idx - 1] if model in get_foreign_key_classes(synmod) ]: raise Exception( "Dependency loop detected in syncing models; cannot proceed.") # Now we're ready to insert. _syncing_models.insert(insert_after_idx + 1, model)
def validate_language_map(lang_codes): """ This function will tell you any blockers that you'll hit while running this command. All srt languages must exist in the language map; missing languages will cause errors during command running (which can be long). This function avoids that problem by doing the above consistency check. """ lang_codes = lang_codes or get_all_prepped_lang_codes() missing_langs = [] for lang_code in lang_codes: try: get_language_name(lcode_to_ietf(lang_code), error_on_missing=True) except LanguageNotFoundError: missing_langs.append(lang_code) if missing_langs: logging.warn("Please add the following language codes to %s:\n\t%s" % ( LANG_LOOKUP_FILEPATH, missing_langs, ))
def get_new_counts(data_path, download_path, language_code): """Write a new dictionary of srt file counts in respective download folders""" language_subtitle_count = {} subtitles_path = "%s%s/subtitles/" % (download_path, language_code) lang_name = get_language_name(language_code) try: count = len(glob.glob("%s/*.srt" % subtitles_path)) logging.info("%4d subtitles for %-20s" % (count, lang_name)) language_subtitle_count[lang_name] = {} language_subtitle_count[lang_name]["count"] = count language_subtitle_count[lang_name]["code"] = language_code except LanguageNameDoesNotExist as ldne: logging.warn(ldne) except: logging.info("%-4s subtitles for %-20s" % ("No", lang_name)) write_new_json(language_subtitle_count, data_path) update_language_list(language_subtitle_count, data_path)
def scrub_knowledge_map(knowledge_map, node_cache): """ Some topics in the knowledge map, we don't keep in our topic tree / node cache. Eliminate them from the knowledge map here. """ for slug in knowledge_map["topics"].keys(): nodecache_node = node_cache["Topic"].get(slug) topictree_node = topic_tools.get_topic_by_path( node_cache["Topic"][slug]["path"], root_node=topictree) if not nodecache_node or not topictree_node: logging.warn("Removing unrecognized knowledge_map topic '%s'" % slug) elif not topictree_node.get("children"): logging.warn( "Removing knowledge_map topic '%s' with no children." % slug) elif not "Exercise" in topictree_node.get("contains"): logging.warn( "Removing knowledge_map topic '%s' with no exercises." % slug) else: continue del knowledge_map["topics"][slug] nodecache_node["in_knowledge_map"] = False topictree_node["in_knowledge_map"] = False
def get_new_counts(language_code, data_path=settings.SUBTITLES_DATA_ROOT, locale_root=LOCALE_ROOT): """Write a new dictionary of srt file counts in respective download folders""" language_subtitle_count = {} subtitles_path = get_srt_path(language_code) lang_name = get_language_name(language_code) try: count = len(glob.glob("%s/*.srt" % subtitles_path)) logging.info("%4d subtitles for %-20s" % (count, lang_name)) language_subtitle_count[lang_name] = {} language_subtitle_count[lang_name]["count"] = count language_subtitle_count[lang_name]["code"] = language_code except LanguageNameDoesNotExist as ldne: logging.warn(ldne) except: logging.info("%-4s subtitles for %-20s" % ("No", lang_name)) write_new_json(language_subtitle_count, data_path) return language_subtitle_count[lang_name].get("count")
def recurse_nodes(node, path=""): """ Internal function for recursing over the topic tree, marking relevant metadata, and removing undesired attributes and children. """ kind = node["kind"] # Only keep key data we can use for key in node.keys(): if key not in attribute_whitelists[kind]: del node[key] # Fix up data if slug_key[kind] not in node: logging.warn("Could not find expected slug key (%s) on node: %s" % (slug_key[kind], node)) node[slug_key[kind]] = node["id"] # put it SOMEWHERE. node["slug"] = node[ slug_key[kind]] if node[slug_key[kind]] != "root" else "" node["id"] = node[ "slug"] # these used to be the same; now not. Easier if they stay the same (issue #233) node["path"] = path + topic_tools.kind_slugs[kind] + node["slug"] + "/" node["title"] = node[title_key[kind]] kinds = set([kind]) # For each exercise, need to get related videos # and compute base points if kind == "Exercise": # compute base points # Paste points onto the exercise node["basepoints"] = ceil(7 * log(node["seconds_per_fast_problem"])) # Related videos related_video_readable_ids = [ vid["readable_id"] for vid in download_khan_data( "http://www.khanacademy.org/api/v1/exercises/%s/videos" % node["name"], node["name"] + ".json") ] node["related_video_readable_ids"] = related_video_readable_ids exercise = { "slug": node[slug_key[kind]], "title": node[title_key[kind]], "path": node["path"], } for video_id in node.get("related_video_readable_ids", []): related_exercise[video_id] = exercise # Recurse through children, remove any blacklisted items children_to_delete = [] for i, child in enumerate(node.get("children", [])): child_kind = child.get("kind", None) if child_kind in kind_blacklist: children_to_delete.append(i) continue if child[slug_key[child_kind]] in slug_blacklist: children_to_delete.append(i) continue if child_kind == "Video" and set(["mp4", "png"]) - set( child.get("download_urls", {}).keys()): # for now, since we expect the missing videos to be filled in soon, # we won't remove these nodes sys.stderr.write( "WARNING: No download link for video: %s: authors='%s'\n" % (child["youtube_id"], child["author_names"])) # children_to_delete.append(i) # continue kinds = kinds.union(recurse_nodes(child, node["path"])) for i in reversed(children_to_delete): del node["children"][i] # Mark on topics whether they contain Videos, Exercises, or both if kind == "Topic": node["contains"] = list(kinds) return kinds
def update_all_distributed_callback(request): """ """ if request.method != "POST": raise PermissionDenied("Only POST allowed to this URL endpoint.") videos = json.loads(request.POST["video_logs"]) exercises = json.loads(request.POST["exercise_logs"]) user = FacilityUser.objects.get(id=request.POST["user_id"]) # Save videos n_videos_uploaded = 0 for video in videos: youtube_id = video['youtube_id'] # Only save video logs for videos that we recognize. if youtube_id not in ID2SLUG_MAP: logging.warn("Skipping unknown video %s" % youtube_id) continue try: (vl, _) = VideoLog.get_or_initialize(user=user, youtube_id=video["youtube_id"]) for key, val in video.iteritems(): setattr(vl, key, val) logging.debug("Saving video log for %s: %s" % (youtube_id, vl)) vl.save() n_videos_uploaded += 1 except KeyError: # logging.error( "Could not save video log for data with missing values: %s" % video) except Exception as e: error_message = "Unexpected error importing videos: %s" % e return JsonResponse({"error": error_message}, status=500) # Save exercises n_exercises_uploaded = 0 for exercise in exercises: # Only save video logs for videos that we recognize. if exercise['exercise_id'] not in NODE_CACHE['Exercise']: logging.warn("Skipping unknown video %s" % exercise['exercise_id']) continue try: (el, _) = ExerciseLog.get_or_initialize( user=user, exercise_id=exercise["exercise_id"]) for key, val in exercise.iteritems(): setattr(el, key, val) logging.debug("Saving exercise log for %s: %s" % (exercise['exercise_id'], el)) el.save() n_exercises_uploaded += 1 except KeyError: logging.error( "Could not save exercise log for data with missing values: %s" % exercise) except Exception as e: error_message = "Unexpected error importing exercises: %s" % e return JsonResponse({"error": error_message}, status=500) return JsonResponse({ "success": "Uploaded %d exercises and %d videos" % (n_exercises_uploaded, n_videos_uploaded) })