コード例 #1
0
def move_srts(lang_code):
    """
    Srts live in the locale directory, but that's not exposed at any URL.  So instead,
    we have to move the srts out to /static/subtitles/[lang_code]/
    """
    lang_code_ietf = lcode_to_ietf(lang_code)
    lang_code_django = lcode_to_django_dir(lang_code)

    subtitles_static_dir = os.path.join(settings.STATIC_ROOT, "subtitles")
    src_dir = os.path.join(LOCALE_ROOT, lang_code_django, "subtitles")
    dest_dir = get_srt_path(lang_code_django)
    ensure_dir(dest_dir)

    lang_subtitles = glob.glob(os.path.join(src_dir, "*.srt"))
    logging.info("Moving %d subtitles from %s to %s" % (len(lang_subtitles), src_dir, dest_dir))

    for fil in lang_subtitles:
        srt_dest_path = os.path.join(dest_dir, os.path.basename(fil))
        if os.path.exists(srt_dest_path):
            os.remove(srt_dest_path)  # we're going to replace any srt with a newer version
        shutil.move(fil, srt_dest_path)

    if not os.path.exists(src_dir):
        logging.info("No subtitles for language pack %s" % lang_code)
    elif os.listdir(src_dir):
        logging.warn("%s is not empty; will not remove.  Please check that all subtitles were moved." % src_dir)
    else:
        logging.info("Removing empty source directory (%s)." % src_dir)
        shutil.rmtree(src_dir)
コード例 #2
0
    def recurse_nodes_to_clean_related_videos(node):
        """
        Internal function for recursing the topic tree and marking related exercises.
        Requires rebranding of metadata done by recurse_nodes function.
        """
        def get_video_node(video_slug, node):
            if node["kind"] == "Topic":
                for child in node.get("children", []):
                    video_node = get_video_node(video_slug, child)
                    if video_node:
                        return video_node
            elif node["kind"] == "Video" and node["slug"] == video_slug:
                return node

            return None

        if node["kind"] == "Exercise":
            videos_to_delete = []
            for vi, video_slug in enumerate(node["related_video_slugs"]):
                if not get_video_node(video_slug, topic_tree):
                    videos_to_delete.append(vi)
            for vi in reversed(videos_to_delete):
                logging.warn("Deleting unknown video %s" % node["related_video_slugs"][vi])
                del node["related_video_slugs"][vi]
        for child in node.get("children", []):
            recurse_nodes_to_clean_related_videos(child)
コード例 #3
0
ファイル: models.py プロジェクト: aronasorman/ka-lite-central
    def end_user_activity(cls, user, activity_type="login", end_datetime=None, suppress_save=False):  # don't accept language--we're just closing previous activity.
        """Helper function to complete an existing user activity log entry."""

        # Do nothing if the max # of records is zero
        # (i.e. this functionality is disabled)
        if not cls.is_enabled():
            return

        if not user:
            raise ValidationError("A valid user must always be specified.")
        if not end_datetime:  # must be done outside the function header (else becomes static)
            end_datetime = datetime.now()
        activity_type = cls.get_activity_int(activity_type)

        cur_log = cls.get_latest_open_log_or_None(user=user, activity_type=activity_type)

        if cur_log:
            # How could you start after you ended??
            if cur_log.start_datetime > end_datetime:
                raise ValidationError("Update time must always be later than the login time.")
        else:
            # No unstopped starts.  Start should have been called first!
            logging.warn("%s: Had to BEGIN a user log entry, but ENDING(%d)! @ %s" % (user.username, activity_type, end_datetime))
            cur_log = cls.begin_user_activity(user=user, activity_type=activity_type, start_datetime=end_datetime, suppress_save=True)

        logging.debug("%s: Logging LOGOUT activity @ %s" % (user.username, end_datetime))
        cur_log.end_datetime = end_datetime
        if not suppress_save:
            cur_log.save()  # total-seconds will be computed here.
        return cur_log
コード例 #4
0
ファイル: models.py プロジェクト: aronasorman/ka-lite-central
    def update_user_activity(cls, user, activity_type="login", update_datetime=None, language=None, suppress_save=False):
        """Helper function to update an existing user activity log entry."""

        # Do nothing if the max # of records is zero
        # (i.e. this functionality is disabled)
        if not cls.is_enabled():
            return

        if not user:
            raise ValidationError("A valid user must always be specified.")
        if not update_datetime:  # must be done outside the function header (else becomes static)
            update_datetime = datetime.now()
        activity_type = cls.get_activity_int(activity_type)

        cur_log = cls.get_latest_open_log_or_None(user=user, activity_type=activity_type)
        if cur_log:
            # How could you start after you updated??
            if cur_log.start_datetime > update_datetime:
                raise ValidationError("Update time must always be later than the login time.")
        else:
            # No unstopped starts.  Start should have been called first!
            logging.warn("%s: Had to create a user log entry on an UPDATE(%d)! @ %s" % (user.username, activity_type, update_datetime))
            cur_log = cls.begin_user_activity(user=user, activity_type=activity_type, start_datetime=update_datetime, suppress_save=True)

        logging.debug("%s: UPDATE activity (%d) @ %s" % (user.username, activity_type, update_datetime))
        cur_log.last_active_datetime = update_datetime
        cur_log.language = language or cur_log.language  # set the language to the current language, if there is one.
        if not suppress_save:
            cur_log.save()
        return cur_log
コード例 #5
0
ファイル: models.py プロジェクト: aronasorman/ka-lite-central
    def begin_user_activity(cls, user, activity_type="login", start_datetime=None, language=None, suppress_save=False):
        """Helper function to create a user activity log entry."""

        # Do nothing if the max # of records is zero
        # (i.e. this functionality is disabled)
        if not cls.is_enabled():
            return

        if not user:
            raise ValidationError("A valid user must always be specified.")
        if not start_datetime:  # must be done outside the function header (else becomes static)
            start_datetime = datetime.now()
        activity_type = cls.get_activity_int(activity_type)

        cur_log = cls.get_latest_open_log_or_None(user=user, activity_type=activity_type)
        if cur_log:
            # Seems we're logging in without logging out of the previous.
            #   Best thing to do is simulate a login
            #   at the previous last update time.
            #
            # Note: this can be a recursive call
            logging.warn("%s: had to END activity on a begin(%d) @ %s" % (user.username, activity_type, start_datetime))
            # Don't mark current language when closing an old one
            cls.end_user_activity(user=user, activity_type=activity_type, end_datetime=cur_log.last_active_datetime)  # can't suppress save
            cur_log = None

        # Create a new entry
        logging.debug("%s: BEGIN activity(%d) @ %s" % (user.username, activity_type, start_datetime))
        cur_log = cls(user=user, activity_type=activity_type, start_datetime=start_datetime, last_active_datetime=start_datetime, language=language)
        if not suppress_save:
            cur_log.save()

        return cur_log
コード例 #6
0
    def recurse_nodes_to_extract_knowledge_map(node, node_cache):
        """
        Internal function for recursing the topic tree and building the knowledge map.
        Requires rebranding of metadata done by recurse_nodes function.
        """
        assert node["kind"] == "Topic"

        if node.get("in_knowledge_map", None):
            if node["slug"] not in knowledge_map["topics"]:
                logging.debug("Not in knowledge map: %s" % node["slug"])
                node["in_knowledge_map"] = False
                for node in node_cache["Topic"][node["slug"]]:
                    node["in_knowledge_map"] = False

            knowledge_topics[node["slug"]] = topic_tools.get_all_leaves(node, leaf_type="Exercise")

            if not knowledge_topics[node["slug"]]:
                sys.stderr.write("Removing topic from topic tree: no exercises. %s" % node["slug"])
                del knowledge_topics[node["slug"]]
                del knowledge_map["topics"][node["slug"]]
                node["in_knowledge_map"] = False
                for node in node_cache["Topic"][node["slug"]]:
                    node["in_knowledge_map"] = False
        else:
            if node["slug"] in knowledge_map["topics"]:
                sys.stderr.write("Removing topic from topic tree; does not belong. '%s'" % node["slug"])
                logging.warn("Removing from knowledge map: %s" % node["slug"])
                del knowledge_map["topics"][node["slug"]]

        for child in [n for n in node.get("children", []) if n["kind"] == "Topic"]:
            recurse_nodes_to_extract_knowledge_map(child, node_cache)
コード例 #7
0
    def download_kmap_icons(knowledge_map):
        for key, value in knowledge_map["topics"].items():
            # Note: id here is retrieved from knowledge_map, so we're OK
            #   that we blew away ID in the topic tree earlier.
            if "icon_url" not in value:
                logging.warn("No icon URL for %s" % key)

            value["icon_url"] = iconfilepath + value["id"] + iconextension
            knowledge_map["topics"][key] = value

            out_path = data_path + "../" + value["icon_url"]
            if os.path.exists(out_path) and not force_icons:
                continue

            icon_khan_url = "http://www.khanacademy.org" + value["icon_url"]
            sys.stdout.write("Downloading icon %s from %s..." % (value["id"], icon_khan_url))
            sys.stdout.flush()
            try:
                icon = requests.get(icon_khan_url)
            except Exception as e:
                sys.stdout.write("\n")  # complete the "downloading" output
                sys.stderr.write("Failed to download %-80s: %s\n" % (icon_khan_url, e))
                continue
            if icon.status_code == 200:
                iconfile = file(data_path + "../" + value["icon_url"], "w")
                iconfile.write(icon.content)
            else:
                sys.stdout.write(" [NOT FOUND]")
                value["icon_url"] = iconfilepath + defaulticon + iconextension
            sys.stdout.write(" done.\n")  # complete the "downloading" output
コード例 #8
0
def update_all_distributed_callback(request):
    """
    """

    if request.method != "POST":
        raise PermissionDenied("Only POST allowed to this URL endpoint.")

    videos = json.loads(request.POST["video_logs"])
    exercises = json.loads(request.POST["exercise_logs"])
    user = FacilityUser.objects.get(id=request.POST["user_id"])
    node_cache = get_node_cache()
    # Save videos
    n_videos_uploaded = 0
    for video in videos:
        video_id = video['video_id']
        youtube_id = video['youtube_id']

        # Only save video logs for videos that we recognize.
        if video_id not in node_cache["Video"]:
            logging.warn("Skipping unknown video %s" % video_id)
            continue

        try:
            (vl, _) = VideoLog.get_or_initialize(user=user, video_id=video_id, youtube_id=youtube_id)
            for key,val in video.iteritems():
                setattr(vl, key, val)
            logging.debug("Saving video log for %s: %s" % (video_id, vl))
            vl.save()
            n_videos_uploaded += 1
        except KeyError:  #
            logging.error("Could not save video log for data with missing values: %s" % video)
        except Exception as e:
            error_message = "Unexpected error importing videos: %s" % e
            return JsonResponseMessageError(error_message)

    # Save exercises
    n_exercises_uploaded = 0
    for exercise in exercises:
        # Only save video logs for videos that we recognize.
        if exercise['exercise_id'] not in node_cache['Exercise']:
            logging.warn("Skipping unknown video %s" % exercise['exercise_id'])
            continue

        try:
            (el, _) = ExerciseLog.get_or_initialize(user=user, exercise_id=exercise["exercise_id"])
            for key,val in exercise.iteritems():
                setattr(el, key, val)
            logging.debug("Saving exercise log for %s: %s" % (exercise['exercise_id'], el))
            el.save()
            n_exercises_uploaded += 1
        except KeyError:
            logging.error("Could not save exercise log for data with missing values: %s" % exercise)
        except Exception as e:
            error_message = "Unexpected error importing exercises: %s" % e
            return JsonResponseMessageError(error_message)

    return JsonResponse({"success": "Uploaded %d exercises and %d videos" % (n_exercises_uploaded, n_videos_uploaded)})
コード例 #9
0
def get_file2lang_map(force=False):
    """Map from youtube_id to language code"""
    global YT2LANG_MAP
    if YT2LANG_MAP is None or force:
        YT2LANG_MAP = {}
        for lang_code, dic in get_dubbed_video_map().iteritems():
            for dubbed_youtube_id in dic.values():
                if dubbed_youtube_id in YT2LANG_MAP:
                    # Sanity check, but must be failsafe, since we don't control these data
                    if YT2LANG_MAP[dubbed_youtube_id] == lang_code:
                        logging.warn("Duplicate entry found in %s language map for dubbed video %s" % (lang_code, dubbed_youtube_id))
                    else:
                        logging.error("Conflicting entry found in language map for video %s; overwriting previous entry of %s to %s." % (dubbed_youtube_id, YT2LANG_MAP[dubbed_youtube_id], lang_code))
                YT2LANG_MAP[dubbed_youtube_id] = lang_code
    return YT2LANG_MAP
コード例 #10
0
ファイル: update.py プロジェクト: aronasorman/ka-lite-central
    def get_shell_script(self, cmd_glob, location=None):
        if not location:
            location = self.working_dir + '/kalite'
        cmd_glob += system_script_extension()

        # Find the command
        cmd = glob.glob(location + "/" + cmd_glob)
        if len(cmd) > 1:
            raise CommandError("Multiple commands found (%s)?  Should choose based on platform, but ... how to do in Python?  Contact us to implement this!" % cmd_glob)
        elif len(cmd)==1:
            cmd = cmd[0]
        else:
            cmd = None
            logging.warn("No command found: (%s in %s)" % (cmd_glob, location))
        return cmd
コード例 #11
0
    def clean_orphaned_polylines(knowledge_map):
        """
        We remove some topics (without leaves); need to remove polylines associated with these topics.
        """
        all_topic_points = [(km["x"],km["y"]) for km in knowledge_map["topics"].values()]

        polylines_to_delete = []
        for li, polyline in enumerate(knowledge_map["polylines"]):
            if any(["x" for pt in polyline["path"] if (pt["x"], pt["y"]) not in all_topic_points]):
                polylines_to_delete.append(li)

        logging.warn("Removing %s of %s polylines in top-level knowledge map" % (len(polylines_to_delete), len(knowledge_map["polylines"])))
        for i in reversed(polylines_to_delete):
            del knowledge_map["polylines"][i]

        return knowledge_map
コード例 #12
0
def get_dubbed_video_map(lang_code=None, force=False):
    """
    Stores a key per language.  Value is a dictionary between video_id and (dubbed) youtube_id
    """
    global DUBBED_VIDEO_MAP, DUBBED_VIDEO_MAP_RAW, DUBBED_VIDEOS_MAPPING_FILEPATH

    if DUBBED_VIDEO_MAP is None or force:
        try:
            if not os.path.exists(DUBBED_VIDEOS_MAPPING_FILEPATH) or force:
                try:
                    if settings.CENTRAL_SERVER:
                        # Never call commands that could fail from the distributed server.
                        #   Always create a central server API to abstract things (see below)
                        logging.debug("Generating dubbed video mappings.")
                        call_command("generate_dubbed_video_mappings", force=force)
                    else:
                        # Generate from the spreadsheet
                        response = requests.get("http://%s/api/i18n/videos/dubbed_video_map" % (settings.CENTRAL_SERVER_HOST))
                        response.raise_for_status()
                        with open(DUBBED_VIDEOS_MAPPING_FILEPATH, "wb") as fp:
                            fp.write(response.content.decode('utf-8'))  # wait until content has been confirmed before opening file.
                except Exception as e:
                    if not os.path.exists(DUBBED_VIDEOS_MAPPING_FILEPATH):
                        # Unrecoverable error, so raise
                        raise
                    elif DUBBED_VIDEO_MAP:
                        # No need to recover--allow the downstream dude to catch the error.
                        raise
                    else:
                        # We can recover by NOT forcing reload.
                        logging.warn("%s" % e)

            DUBBED_VIDEO_MAP_RAW = softload_json(DUBBED_VIDEOS_MAPPING_FILEPATH, raises=True)
        except Exception as e:
            logging.info("Failed to get dubbed video mappings; defaulting to empty.")
            DUBBED_VIDEO_MAP_RAW = {}  # setting this will avoid triggering reload on every call

        DUBBED_VIDEO_MAP = {}
        for lang_name, video_map in DUBBED_VIDEO_MAP_RAW.iteritems():
            logging.debug("Adding dubbed video map entry for %s (name=%s)" % (get_langcode_map(lang_name), lang_name))
            DUBBED_VIDEO_MAP[get_langcode_map(lang_name)] = video_map

    return DUBBED_VIDEO_MAP.get(lang_code, {}) if lang_code else DUBBED_VIDEO_MAP
コード例 #13
0
    def recurse_nodes_to_remove_childless_nodes(node):
        """
        When we remove exercises, we remove dead-end topics.
        Khan just sends us dead-end topics, too.
        Let's remove those too.
        """
        children_to_delete = []
        for ci, child in enumerate(node.get("children", [])):
            # Mark all unrecognized exercises for deletion
            if child["kind"] != "Topic":
                continue

            recurse_nodes_to_remove_childless_nodes(child)

            if not child.get("children"):
                children_to_delete.append(ci)
                logging.warn("Removing KA childless topic: %s" % child["slug"])

        for ci in reversed(children_to_delete):
            del node["children"][ci]
コード例 #14
0
def validate_language_map(lang_codes):
    """
    This function will tell you any blockers that you'll hit while
    running this command.

    All srt languages must exist in the language map; missing languages
    will cause errors during command running (which can be long).
    This function avoids that problem by doing the above consistency check.
    """
    lang_codes = lang_codes or get_all_prepped_lang_codes()
    missing_langs = []
    for lang_code in lang_codes:
        try:
            get_language_name(lcode_to_ietf(lang_code), error_on_missing=True)
        except LanguageNotFoundError:
            missing_langs.append(lang_code)

    if missing_langs:
        logging.warn("Please add the following language codes to %s:\n\t%s" % (
            LANG_LOOKUP_FILEPATH, missing_langs,
        ))
コード例 #15
0
def move_exercises(lang_code):
    lang_pack_location = os.path.join(LOCALE_ROOT, lang_code)
    src_exercise_dir = os.path.join(lang_pack_location, "exercises")
    dest_exercise_dir = get_localized_exercise_dirpath(lang_code, is_central_server=False)

    if not os.path.exists(src_exercise_dir):
        logging.warn("Could not find downloaded exercises; skipping: %s" % src_exercise_dir)
    else:
        # Move over one at a time, to combine with any other resources that were there before.
        ensure_dir(dest_exercise_dir)
        all_exercise_files = glob.glob(os.path.join(src_exercise_dir, "*.html"))
        logging.info("Moving %d downloaded exercises to %s" % (len(all_exercise_files), dest_exercise_dir))

        for exercise_file in all_exercise_files:
            shutil.move(exercise_file, os.path.join(dest_exercise_dir, os.path.basename(exercise_file)))

        logging.debug("Removing emtpy directory")
        try:
            shutil.rmtree(src_exercise_dir)
        except Exception as e:
            logging.error("Error removing dubbed video directory (%s): %s" % (src_exercise_dir, e))
コード例 #16
0
ファイル: update.py プロジェクト: aronasorman/ka-lite-central
    def verify_inner_zip(self, zip_file):
        """
        Extract contents of outer zip, verify the inner zip
        """
        zip = ZipFile(zip_file, "r")
        nfiles = len(zip.namelist())
        for fi,afile in enumerate(zip.namelist()):
            zip.extract(afile, path=self.working_dir)

        self.signature_file = os.path.join(self.working_dir, Command.signature_filename)
        self.inner_zip_file = os.path.join(self.working_dir, Command.inner_zip_filename)

        central_server = Device.get_central_server()
        lines = open(self.signature_file, "r").read().split("\n")
        chunk_size = int(lines.pop(0))
        if not central_server:
            logging.warn("No central server device object found; trusting zip file because you asked me to...")
        elif central_server.key.verify_large_file(self.inner_zip_file, signature=lines, chunk_size=chunk_size):
            logging.info("Verified file!")
        else:
            raise Exception("Failed to verify inner zip file.")
        return self.inner_zip_file
コード例 #17
0
    def recurse_nodes_to_delete_exercise(node):
        """
        Internal function for recursing the topic tree and removing new exercises.
        Requires rebranding of metadata done by recurse_nodes function.
        Returns a list of exercise slugs for the exercises that were deleted.
        """
        # Stop recursing when we hit leaves
        if node["kind"] != "Topic":
            return []

        slugs_deleted = []

        children_to_delete = []
        for ci, child in enumerate(node.get("children", [])):
            # Mark all unrecognized exercises for deletion
            if child["kind"] == "Exercise":
                if not os.path.exists(exercise_path % child["slug"]):
                    children_to_delete.append(ci)

            # Recurse over children to delete
            elif child.get("children", None):
                slugs_deleted += recurse_nodes_to_delete_exercise(child)

                if not child.get("children", None):
                    # Delete children without children (all their children were removed)
                    logging.warn("Removing now-childless topic node '%s'" % child["slug"])
                    children_to_delete.append(ci)
                elif not any([ch["kind"] == "Exercise" or "Exercise" in ch.get("contains", []) for ch in child["children"]]):
                    # If there are no longer exercises, be honest about it
                    child["contains"] = list(set(child["contains"]) - set(["Exercise"]))

        # Do the actual deletion
        for i in reversed(children_to_delete):
            logging.warn("Deleting unknown exercise %s" % node["children"][i]["slug"])
            del node["children"][i]

        return slugs_deleted
コード例 #18
0
    def scrub_knowledge_map(knowledge_map, node_cache):
        """
        Some topics in the knowledge map, we don't keep in our topic tree / node cache.
        Eliminate them from the knowledge map here.
        """
        for slug in knowledge_map["topics"].keys():
            nodecache_node = node_cache["Topic"].get(slug, [{}])[0]
            topictree_node = topic_tools.get_topic_by_path(nodecache_node.get("path"), root_node=topic_tree)

            if not nodecache_node or not topictree_node:
                logging.warn("Removing unrecognized knowledge_map topic '%s'" % slug)
            elif not topictree_node.get("children"):
                logging.warn("Removing knowledge_map topic '%s' with no children." % slug)
            elif not "Exercise" in topictree_node.get("contains"):
                logging.warn("Removing knowledge_map topic '%s' with no exercises." % slug)
            else:
                continue

            del knowledge_map["topics"][slug]
            topictree_node["in_knowledge_map"] = False
コード例 #19
0
def update_all_central_callback(request):
    """
    Callback after authentication.

    Parses out the request token verification.
    Then finishes the request by getting an auth token.
    """
    if not "ACCESS_TOKEN" in request.session:
        finish_auth(request)

    exercises = get_api_resource(request, "/api/v1/user/exercises")
    videos = get_api_resource(request, "/api/v1/user/videos")
    node_cache = get_node_cache()

    # Collate videos
    video_logs = []
    for video in videos:
        # Assume that KA videos are all english-language, not dubbed (for now)
        video_id = youtube_id = video.get('video', {}).get('youtube_id', "")

        # Only save videos with progress
        if not video.get('seconds_watched', None):
            continue

        # Only save video logs for videos that we recognize.
        if video_id not in node_cache["Video"]:
            logging.warn("Skipping unknown video %s" % video_id)
            continue

        try:
            video_logs.append({
                "video_id": video_id,
                "youtube_id": youtube_id,
                "total_seconds_watched": video['seconds_watched'],
                "points": VideoLog.calc_points(video['seconds_watched'], video['duration']),
                "complete": video['completed'],
                "completion_timestamp": convert_ka_date(video['last_watched']) if video['completed'] else None,
            })
            logging.debug("Got video log for %s: %s" % (video_id, video_logs[-1]))
        except KeyError:  #
            logging.error("Could not save video log for data with missing values: %s" % video)

    # Collate exercises
    exercise_logs = []
    for exercise in exercises:
        # Only save exercises that have any progress.
        if not exercise.get('last_done', None):
            continue

        # Only save video logs for videos that we recognize.
        slug = exercise.get('exercise', "")
        if slug not in node_cache['Exercise']:
            logging.warn("Skipping unknown video %s" % slug)
            continue

        try:
            completed = exercise['streak'] >= 10
            basepoints = node_cache['Exercise'][slug][0]['basepoints']
            exercise_logs.append({
                "exercise_id": slug,
                "streak_progress": min(100, 100 * exercise['streak']/10),  # duplicates logic elsewhere
                "attempts": exercise['total_done'],
                "points": ExerciseLog.calc_points(basepoints, ncorrect=exercise['streak'], add_randomness=False),  # no randomness when importing from KA
                "complete": completed,
                "attempts_before_completion": exercise['total_done'] if not exercise['practiced'] else None,  #can't figure this out if they practiced after mastery.
                "completion_timestamp": convert_ka_date(exercise['proficient_date']) if completed else None,
            })
            logging.debug("Got exercise log for %s: %s" % (slug, exercise_logs[-1]))
        except KeyError:
            logging.error("Could not save exercise log for data with missing values: %s" % exercise)

    # POST the data back to the distributed server
    try:

        dthandler = lambda obj: obj.isoformat() if isinstance(obj, datetime.datetime) else None
        logging.debug("POST'ing to %s" % request.session["distributed_callback_url"])
        response = requests.post(
            request.session["distributed_callback_url"],
            cookies={ "csrftoken": request.session["distributed_csrf_token"] },
            data = {
                "csrfmiddlewaretoken": request.session["distributed_csrf_token"],
                "video_logs": json.dumps(video_logs, default=dthandler),
                "exercise_logs": json.dumps(exercise_logs, default=dthandler),
                "user_id": request.session["distributed_user_id"],
            }
        )
        logging.debug("Response (%d): %s" % (response.status_code, response.content))
    except requests.exceptions.ConnectionError as e:
        return HttpResponseRedirect(set_query_params(request.session["distributed_redirect_url"], {
            "message_type": "error",
            "message": _("Could not connect to your KA Lite installation to share Khan Academy data."),
            "message_id": "id_khanload",
        }))
    except Exception as e:
        return HttpResponseRedirect(set_query_params(request.session["distributed_redirect_url"], {
            "message_type": "error",
            "message": _("Failure to send data to your KA Lite installation: %s") % e,
            "message_id": "id_khanload",
        }))


    try:
        json_response = json.loads(response.content)
        if not isinstance(json_response, dict) or len(json_response) != 1:
            # Could not validate the message is a single key-value pair
            raise Exception(_("Unexpected response format from your KA Lite installation."))
        message_type = json_response.keys()[0]
        message = json_response.values()[0]
    except ValueError as e:
        message_type = "error"
        message = unicode(e)
    except Exception as e:
        message_type = "error"
        message = _("Loading json object: %s") % e

    # If something broke on the distribute d server, we are SCREWED.
    #   For now, just show the error to users.
    #
    # Ultimately, we have a message, would like to share with the distributed server.
#    if response.status_code != 200:
#        return HttpResponseServerError(response.content)

    return HttpResponseRedirect(set_query_params(request.session["distributed_redirect_url"], {
        "message_type": message_type,
        "message": message,
        "message_id": "id_khanload",
    }))
コード例 #20
0
def generate_dubbed_video_mappings(download_url=None, csv_data=None):
    """
    Function to do the heavy lifting in getting the dubbed videos map.

    Could be moved into utils
    """
    if not download_url:
        download_url = SPREADSHEET_BASE_URL
        params = {'key': SPREADSHEET_ID, 'gid': SPREADSHEET_GID, 'output': SPREADSHEET_EXPORT_FORMAT}
    else:
        params = {}

    if not csv_data:
        logging.info("Downloading dubbed video data from %s" % download_url)
        response = requests.get(download_url, params=params)
        if response.status_code != 200:
            raise CommandError("Failed to download dubbed video CSV data: status=%s" % response.status)
        csv_data = response.content

    # This CSV file is in standard format: separated by ",", quoted by '"'
    logging.info("Parsing csv file.")
    reader = csv.reader(StringIO(csv_data))

    # Build a two-level video map.
    #   First key: language name
    #   Second key: english youtube ID
    #   Value: corresponding youtube ID in the new language.
    video_map = {}

    row_num = -1
    try:
        # Loop through each row in the spreadsheet.
        while (True):
            row_num += 1
            row = reader.next()


            if row_num < 4:
                # Rows 1-4 are crap.
                continue

            elif row_num == 4:
                # Row 5 is the header row.
                header_row = [v.lower() for v in row]  # lcase all header row values (including language names)
                slug_idx = header_row.index("titled id")
                english_idx = header_row.index("english")
                assert slug_idx != -1, "Video slug column header should be found."
                assert english_idx != -1, "English video column header should be found."

            else:
                # Rows 6 and beyond are data.
                assert len(row) == len(header_row), "Values line length equals headers line length"

                # Grab the slug and english video ID.
                video_slug = row[slug_idx]
                english_video_id = row[english_idx]
                assert english_video_id, "English Video ID should not be empty"
                assert video_slug, "Slug should not be empty"

                # English video is the first video ID column,
                #   and following columns (until the end) are other languages.
                # Loop through those columns and, if a video exists,
                #   add it to the dictionary.
                for idx in range(english_idx, len(row)):
                    if not row[idx]:  # make sure there's a dubbed video
                        continue

                    lang = header_row[idx]
                    if lang not in video_map:  # add the first level if it doesn't exist
                        video_map[lang] = {}
                    dubbed_youtube_id = row[idx]
                    if english_video_id == dubbed_youtube_id and lang != "english":
                        logging.error("Removing entry for (%s, %s): dubbed and english youtube ID are the same." % (lang, english_video_id))
                    #elif dubbed_youtube_id in video_map[lang].values():
                        # Talked to Bilal, and this is actually supposed to be OK.  Would throw us for a loop!
                        #    For now, just keep one.
                        #for key in video_map[lang].keys():
                        #    if video_map[lang][key] == dubbed_youtube_id:
                        #        del video_map[lang][key]
                        #        break
                        #logging.error("Removing entry for (%s, %s): the same dubbed video ID is used in two places, and we can only keep one in our current system." % (lang, english_video_id))
                    else:
                        video_map[lang][english_video_id] = row[idx]  # add the corresponding video id for the video, in this language.

    except StopIteration:
        # The loop ends when the CSV file hits the end and throws a StopIteration
        pass

    # Now, validate the mappings with our topic data
    known_videos = get_node_cache("Video").keys()
    missing_videos = set(known_videos) - set(video_map["english"].keys())
    extra_videos = set(video_map["english"].keys()) - set(known_videos)
    if missing_videos:
        logging.warn("There are %d known videos not in the list of dubbed videos" % len(missing_videos))
        logging.warn("Adding missing English videos to English dubbed video map")
        for video in missing_videos:
            video_map["english"][video] = video
    if extra_videos:
        logging.warn("There are %d videos in the list of dubbed videos that we have never heard of." % len(extra_videos))

    return (video_map, csv_data)
コード例 #21
0
def zip_language_packs(lang_codes=None, version=VERSION):
    """Zip up and expose all language packs

    converts all into ietf
    """
    sizes = {}
    lang_codes = lang_codes or os.listdir(LANGUAGE_PACK_BUILD_DIR)
    lang_codes = [lcode_to_ietf(lc) for lc in lang_codes]
    logging.info("Zipping up %d language pack(s)" % len(lang_codes))

    for lang_code_ietf in lang_codes:
        lang_code_map = get_supported_language_map(lang_code_ietf)

        # Initialize values
        sizes[lang_code_ietf] = { "package_size": 0, "zip_size": 0}

        #
        lang_locale_path = get_lp_build_dir(lang_code_ietf, version=version)
        if not os.path.exists(lang_locale_path):
            logging.warn("Unexpectedly skipping missing directory: %s" % lang_code_ietf)
        elif not os.path.isdir(lang_locale_path):
            logging.error("Skipping language where a file exists where a directory was expected: %s" % lang_code_ietf)

        # Create a zipfile for this language
        zip_filepath = get_language_pack_filepath(lang_code_ietf, version=version)
        ensure_dir(os.path.dirname(zip_filepath))
        logging.info("Creating zip file in %s" % zip_filepath)
        z = zipfile.ZipFile(zip_filepath, 'w', zipfile.ZIP_DEFLATED)

        # Get metadata from the versioned directory
        for metadata_file in glob.glob('%s/*.json' % get_lp_build_dir(lang_code_ietf, version=version)):
            # Get every single file in the directory and zip it up
            filepath = os.path.join(lang_locale_path, metadata_file)
            z.write(filepath, arcname=os.path.basename(metadata_file))
            sizes[lang_code_ietf]["package_size"] += os.path.getsize(filepath)

        # Get mo files from the directory
        lang_code_crowdin = lang_code_map["crowdin"]
        mo_files = glob.glob('%s/*.mo' % get_lp_build_dir(lcode_to_ietf(lang_code_crowdin), version=version)) if lang_code_crowdin else []
        for mo_file in mo_files:
            # Get every single compiled language file
            filepath = os.path.join(lang_locale_path, mo_file)
            z.write(filepath, arcname=os.path.join("LC_MESSAGES", os.path.basename(mo_file)))
            sizes[lang_code_ietf]["package_size"] += os.path.getsize(filepath)

        # include video file sizes
        remote_video_size_list = get_all_remote_video_sizes()
        z.writestr('video_file_sizes.json', str(remote_video_size_list))

        srt_dirpath = get_srt_path(lcode_to_django_dir(lang_code_map["amara"]))
        for srt_file in glob.glob(os.path.join(srt_dirpath, "*.srt")):
            z.write(srt_file, arcname=os.path.join("subtitles", os.path.basename(srt_file)))
            sizes[lang_code_ietf]["package_size"] += os.path.getsize(srt_file)

        if version_diff(version, "0.10.3") > 0:  # since these are globally available, need to check version.
            exercises_dirpath = get_localized_exercise_dirpath(lang_code_map["exercises"])
            for exercise_file in glob.glob(os.path.join(exercises_dirpath, "*.html")):
                # Get every single compiled language file
                filepath = os.path.join(exercises_dirpath, exercise_file)
                z.write(filepath, arcname=os.path.join("exercises", os.path.basename(exercise_file)))
                sizes[lang_code_ietf]["package_size"] += os.path.getsize(filepath)

        # Add dubbed video map
        z.write(DUBBED_VIDEOS_MAPPING_FILEPATH, arcname=os.path.join("dubbed_videos", os.path.basename(DUBBED_VIDEOS_MAPPING_FILEPATH)))
        sizes[lang_code_ietf]["package_size"] += os.path.getsize(DUBBED_VIDEOS_MAPPING_FILEPATH)

        z.close()
        sizes[lang_code_ietf]["zip_size"]= os.path.getsize(zip_filepath)

    logging.info("Done.")
    return sizes
コード例 #22
0
    def recurse_nodes(node, path="", ancestor_ids=[]):
        """
        Internal function for recursing over the topic tree, marking relevant metadata,
        and removing undesired attributes and children.
        """

        kind = node["kind"]

        # Only keep key data we can use
        for key in node.keys():
            if key not in attribute_whitelists[kind]:
                del node[key]

        # Fix up data
        if slug_key[kind] not in node:
            logging.warn("Could not find expected slug key (%s) on node: %s" % (slug_key[kind], node))
            node[slug_key[kind]] = node["id"]  # put it SOMEWHERE.
        node["slug"] = node[slug_key[kind]] if node[slug_key[kind]] != "root" else ""
        node["id"] = node[id_key[kind]]  # these used to be the same; now not. Easier if they stay the same (issue #233)

        node["path"] = path + khanload.kind_slugs[kind] + node["slug"] + "/"
        node["title"] = node[title_key[kind]].strip()

        # Add some attribute that should have been on there to start with.
        node["parent_id"] = ancestor_ids[-1] if ancestor_ids else None
        node["ancestor_ids"] = ancestor_ids

        if kind == "Exercise":
            # For each exercise, need to set the exercise_id
            #   get related videos
            #   and compute base points
            node["exercise_id"] = node["slug"]

            # compute base points
            # Paste points onto the exercise
            node["basepoints"] = ceil(7 * log(node["seconds_per_fast_problem"]));

            # Related videos
            related_video_slugs = [vid["readable_id"] for vid in download_khan_data("http://www.khanacademy.org/api/v1/exercises/%s/videos" % node["name"], node["name"] + ".json")]
            node["related_video_slugs"] = related_video_slugs

            related_exercise_metadata = {
                "id": node["id"],
                "slug": node["slug"],
                "title": node["title"],
                "path": node["path"],
            }
            for video_slug in node.get("related_video_slugs", []):
                related_exercise[video_slug] = related_exercise_metadata


        # Recurse through children, remove any blacklisted items
        children_to_delete = []
        child_kinds = set()
        for i, child in enumerate(node.get("children", [])):
            child_kind = child.get("kind", None)

            # Blacklisted--remove
            if child_kind in kind_blacklist:
                children_to_delete.append(i)
                continue
            elif child[slug_key[child_kind]] in slug_blacklist:
                children_to_delete.append(i)
                continue
            elif not child.get("live", True) and remove_disabled_topics:  # node is not live
                logging.debug("Remvong non-live child: %s" % child[slug_key[child_kind]])
                children_to_delete.append(i)
                continue
            elif child.get("hide", False) and remove_disabled_topics:  # node is hidden. Note that root is hidden, and we're implicitly skipping that.
                children_to_delete.append(i)
                logging.debug("Remvong hidden child: %s" % child[slug_key[child_kind]])
                continue
            elif child_kind == "Video" and set(["mp4", "png"]) - set(child.get("download_urls", {}).keys()):
                # for now, since we expect the missing videos to be filled in soon,
                #   we won't remove these nodes
                sys.stderr.write("WARNING: No download link for video: %s: authors='%s'\n" % (child["youtube_id"], child["author_names"]))
                children_to_delete.append(i)
                continue

            child_kinds = child_kinds.union(set([child_kind]))
            child_kinds = child_kinds.union(recurse_nodes(child, path=node["path"], ancestor_ids=ancestor_ids + [node["id"]]))

        # Delete those marked for completion
        for i in reversed(children_to_delete):
            del node["children"][i]

        # Mark on topics whether they contain Videos, Exercises, or both
        if kind == "Topic":
            node["contains"] = list(child_kinds)

        return child_kinds