Exemple #1
0
def move_srts(lang_code):
    """
    Srts live in the locale directory, but that's not exposed at any URL.  So instead,
    we have to move the srts out to /static/subtitles/[lang_code]/
    """
    lang_code_ietf = lcode_to_ietf(lang_code)
    lang_code_django = lcode_to_django_dir(lang_code)

    subtitles_static_dir = os.path.join(settings.STATIC_ROOT, "subtitles")
    src_dir = os.path.join(LOCALE_ROOT, lang_code_django, "subtitles")
    dest_dir = get_srt_path(lang_code_django)
    ensure_dir(dest_dir)

    lang_subtitles = glob.glob(os.path.join(src_dir, "*.srt"))
    logging.info("Moving %d subtitles from %s to %s" %
                 (len(lang_subtitles), src_dir, dest_dir))

    for fil in lang_subtitles:
        srt_dest_path = os.path.join(dest_dir, os.path.basename(fil))
        if os.path.exists(srt_dest_path):
            os.remove(srt_dest_path
                      )  # we're going to replace any srt with a newer version
        shutil.move(fil, srt_dest_path)

    if not os.path.exists(src_dir):
        logging.info("No subtitles for language pack %s" % lang_code)
    elif os.listdir(src_dir):
        logging.warn(
            "%s is not empty; will not remove.  Please check that all subtitles were moved."
            % src_dir)
    else:
        logging.info("Removing empty source directory (%s)." % src_dir)
        shutil.rmtree(src_dir)
def move_srts(lang_code):
    """
    Srts live in the locale directory, but that's not exposed at any URL.  So instead,
    we have to move the srts out to /static/subtitles/[lang_code]/
    """
    lang_code_ietf = lcode_to_ietf(lang_code)
    lang_code_django = lcode_to_django_dir(lang_code)

    subtitles_static_dir = os.path.join(settings.USER_STATIC_FILES, "subtitles")
    src_dir = os.path.join(settings.USER_WRITABLE_LOCALE_DIR, lang_code_django, "subtitles")
    dest_dir = get_srt_path(lang_code_django)
    ensure_dir(dest_dir)

    lang_subtitles = glob.glob(os.path.join(src_dir, "*.srt"))
    logging.info("Moving %d subtitles from %s to %s" % (len(lang_subtitles), src_dir, dest_dir))

    for fil in lang_subtitles:
        srt_dest_path = os.path.join(dest_dir, os.path.basename(fil))
        if os.path.exists(srt_dest_path):
            os.remove(srt_dest_path)  # we're going to replace any srt with a newer version
        shutil.move(fil, srt_dest_path)

    if not os.path.exists(src_dir):
        logging.info("No subtitles for language pack %s" % lang_code)
    elif os.listdir(src_dir):
        logging.warn("%s is not empty; will not remove.  Please check that all subtitles were moved." % src_dir)
    else:
        logging.info("Removing empty source directory (%s)." % src_dir)
        shutil.rmtree(src_dir)
Exemple #3
0
def delete_language(lang_code):

    langpack_resource_paths = [get_localized_exercise_dirpath(lang_code), get_srt_path(lang_code), get_locale_path(lang_code)]

    for langpack_resource_path in langpack_resource_paths:
        try:
            shutil.rmtree(langpack_resource_path)
            logging.info("Deleted language pack resource path: %s" % langpack_resource_path)
        except OSError as e:
            if e.errno != 2:    # Only ignore error: No Such File or Directory
                raise
            else:
                logging.debug("Not deleting missing language pack resource path: %s" % langpack_resource_path)

    invalidate_web_cache()
Exemple #4
0
def delete_language(lang_code):

    langpack_resource_paths = [get_localized_exercise_dirpath(lang_code), get_srt_path(lang_code), get_locale_path(lang_code)]

    for langpack_resource_path in langpack_resource_paths:
        try:
            shutil.rmtree(langpack_resource_path)
            logging.info("Deleted language pack resource path: %s" % langpack_resource_path)
        except OSError as e:
            if e.errno != 2:    # Only ignore error: No Such File or Directory
                raise
            else:
                logging.debug("Not deleting missing language pack resource path: %s" % langpack_resource_path)

    invalidate_web_cache()
Exemple #5
0
def stamp_availability_on_video(video, format="mp4", force=False, stamp_urls=True, videos_path=None):
    """
    Stamp all relevant urls and availability onto a video object (if necessary), including:
    * whether the video is available (on disk or online)
    """
    videos_path = videos_path or settings.CONTENT_ROOT

    def compute_video_availability(youtube_id, format, videos_path=videos_path):
        return {"on_disk": is_video_on_disk(youtube_id, format, videos_path=videos_path)}

    def compute_video_metadata(youtube_id, format):
        return {"stream_type": "video/%s" % format}

    def compute_video_urls(youtube_id, format, lang_code, on_disk=None, thumb_formats=["png", "jpg"], videos_path=videos_path):
        if on_disk is None:
            on_disk = is_video_on_disk(youtube_id, format, videos_path=videos_path)

        if on_disk:
            video_base_url = settings.CONTENT_URL + youtube_id
            stream_url = video_base_url + ".%s" % format
            thumbnail_url = None  # default to None now, so we know when no thumbnail is available.

            for thumb_format in thumb_formats:  # find the thumbnail on disk
                thumb_filename = '%s.%s' % (youtube_id, thumb_format)
                thumb_filepath = os.path.join(videos_path, thumb_filename)
                if os.path.exists(thumb_filepath):
                    thumbnail_url = video_base_url + "." + thumb_format  # default
                    break

        elif settings.BACKUP_VIDEO_SOURCE and lang_code == "en":
            dict_vals = {"youtube_id": youtube_id, "video_format": format, "thumb_format": thumb_formats[0] }
            stream_url = settings.BACKUP_VIDEO_SOURCE % dict_vals
            thumbnail_url = settings.BACKUP_THUMBNAIL_SOURCE % dict_vals if settings.BACKUP_THUMBNAIL_SOURCE else None

        else:
            return {}  # no URLs
        return {"stream": stream_url, "thumbnail": thumbnail_url}

    video_availability = video.get("availability", {}) if not force else {}
    en_youtube_id = get_youtube_id(video["id"], lang_code=None)  # get base ID
    video_map = get_id2oklang_map(video["id"]) or {}

    if not "on_disk" in video_availability:
        for lang_code in video_map.keys():
            youtube_id = video_map[lang_code].encode('utf-8')
            video_availability[lang_code] = compute_video_availability(youtube_id, format=format, videos_path=videos_path)
        video_availability["en"] = video_availability.get("en", {"on_disk": False})  # en should always be defined

        # Summarize status
        any_on_disk = any([lang_avail["on_disk"] for lang_avail in video_availability.values()])
        any_available = any_on_disk or bool(settings.BACKUP_VIDEO_SOURCE)

    if stamp_urls:
        # Loop over all known dubbed videos
        for lang_code, youtube_id in video_map.iteritems():
            urls = compute_video_urls(youtube_id, format, lang_code, on_disk=video_availability[lang_code]["on_disk"], videos_path=videos_path)
            if urls:
                # Only add properties if anything is available.
                video_availability[lang_code].update(urls)
                video_availability[lang_code].update(compute_video_metadata(youtube_id, format))

        # Get the (english) subtitle urls
        subtitle_lang_codes = get_langs_with_subtitle(en_youtube_id)
        subtitles_tuple = [(lc, get_srt_url(en_youtube_id, lc)) for lc in subtitle_lang_codes if os.path.exists(get_srt_path(lc, en_youtube_id))]
        subtitles_urls = dict(subtitles_tuple)
        video_availability["en"]["subtitles"] = subtitles_urls

    # now scrub any values that don't actually exist
    for lang_code in video_availability.keys():
        if not video_availability[lang_code]["on_disk"] and len(video_availability[lang_code]) == 1:
            del video_availability[lang_code]

    # Now summarize some availability onto the video itself
    video["availability"] = video_availability
    video["on_disk"]   = any_on_disk
    video["available"] = any_available

    return video
Exemple #6
0
    def handle(self, *args, **options):
        if len(args) != 1:
            raise CommandError("Takes exactly 1 argument")

        dest_file = os.path.abspath(args[0])

        logger.info("Starting up KA Lite export2zim command")
        beginning = datetime.now()
        logger.info("Begin: {}".format(beginning))

        language = options.get('language')
        if not language:
            raise CommandError("Must specify a language!")

        if not options.get('tmp_dir'):
            tmp_dir = os.path.join(tempfile.gettempdir(), 'ka-lite-zim_{}'.format(language))
        else:
            tmp_dir = options.get('tmp_dir')

        tmp_dir = os.path.abspath(tmp_dir)

        if os.path.exists(tmp_dir) and os.listdir(tmp_dir):
            if options['clear']:
                logger.info("Clearing directory {}".format(tmp_dir))
                shutil.rmtree(tmp_dir)
            elif options['resume']:
                logger.info("Resuming in dirty tmp directory {}".format(tmp_dir))
            else:
                raise CommandError(
                    "{} not empty, use the -c option to clean it, -r to resume, or use an empty destination directory.".format(
                        tmp_dir
                    )
                )

        zimwriterfs = options.get("zimwriterfs", None)
        publisher = options.get("publisher")
        transcode2webm = options.get("transcode2webm")
        ffmpeg = find_executable("ffmpeg")

        if not ffmpeg:
            logger.warning("FFMpeg not found in your path, you won't be able to create missing thumbnails or transcode to webm.")

        if not zimwriterfs:
            zimwriterfs = find_executable("zimwriterfs")
            if not zimwriterfs:
                raise CommandError("Could not find zimwriterfs in your path, try specifying --zimwriterfs=/path")

        if not os.path.exists(zimwriterfs):
            raise CommandError("Invalid --zimwriterfs")

        from kalite_zim import __name__ as base_path
        base_path = os.path.abspath(base_path)
        data_path = os.path.join(base_path, 'data')

        # Where subtitles are found in KA Lite
        subtitle_src_dir = i18n.get_srt_path(language)

        logger.info("Will export videos for language: {}".format(language))
        logger.info("Preparing KA Lite topic tree...")

        # Use live data
        if not options.get('test'):
            # This way of doing things will be deprecated in KA Lite 0.16
            topic_tree_json_path = topic_tools_settings.TOPICS_FILEPATHS.get('khan')
            content_cache = get_content_cache(language=language, annotate=True)
            exercise_cache = get_exercise_cache(language=language)
        # Use test data
        else:
            topic_tree_json_path = os.path.join(data_path, 'test_topics.json')
            content_cache = json.load(
                open(os.path.join(data_path, 'test_content.json'))
            )
            exercise_cache = json.load(
                open(os.path.join(data_path, 'test_exercise.json'))
            )

        topic_tree = softload_json(topic_tree_json_path, logger=logger.debug, raises=False)

        content_json_output = {}
        exercise_json_output = {}

        def annotate_tree(topic, depth=0, parent=None):
            """
            We need to recurse into the tree in order to annotate elements
            with topic data and exercise data
            """
            children = topic.get('children', [])
            new_children = []
            for child_topic in children:
                if child_topic.get("kind") in ("Video", "Topic"):
                    annotate_tree(child_topic, depth=depth + 1, parent=topic)
                    new_children.append(child_topic)
            topic["children"] = new_children
            if topic.get("kind") == "Exercise":
                topic['exercise'] = exercise_cache.get(topic.get("id"), {})
                exercise_json_output[topic.get("id")] = topic['exercise']
            elif topic.get("kind") == "Topic":
                pass
            else:
                topic['exercise'] = None
                topic['content'] = content_cache.get(topic.get("id"), {})
                content_json_output[topic.get("id")] = topic['content']
                if not topic['content']:
                    logger.error('No content!?, id is: {}'.format(topic.get('id')))

            # Translate everything for good measure
            with i18n.translate_block(language):
                topic["title"] = _(topic.get("title", ""))
                topic["description"] = _(topic.get("description", "")) if topic.get("description") else ""

            topic["url"] = topic["id"] + ".html"
            topic["parent"] = parent
            topic["depth"] = depth
            for key in ("child_data", "keywords", "hide", "contains"):
                topic.pop(key, None)

        # 1. Annotate a topic tree
        annotate_tree(topic_tree)

        # 2. Now go through the tree and copy each element into the destination
        # zim file system

        def copy_media(node):
            if node['kind'] == 'Topic':
                # Don't do anything if it's a topic
                pass
            elif node['kind'] == 'Exercise':
                # Exercises cannot be displayed
                node["content"]["available"] = False
            elif node['kind'] == 'Video':

                if node['content']['format'] == "webm":
                    logger.warning("Found a duplicate ID for {}, re-downloading".format(node['id']))
                    node['content']['format'] = "mp4"

                # Available is False by default until we locate the file
                node["content"]["available"] = False
                node_dir = os.path.join(tmp_dir, node["path"])
                if not os.path.exists(node_dir):
                    os.makedirs(node_dir)
                video_file_name = node['id'] + '.' + node['content']['format']
                thumb_file_name = node['id'] + '.png'
                video_file_src = os.path.join(CONTENT_ROOT, video_file_name)
                video_file_dest = os.path.join(node_dir, video_file_name)
                thumb_file_src = os.path.join(CONTENT_ROOT, thumb_file_name)
                thumb_file_dest = os.path.join(node_dir, thumb_file_name)

                if options['download'] and not os.path.exists(video_file_src):
                    logger.info("Video file being downloaded to: {}".format(video_file_src))
                    download_video(
                        node['content']['youtube_id'],
                        node['content']['format'],
                        CONTENT_ROOT,
                    )

                if os.path.exists(video_file_src):
                    if transcode2webm:
                        ffmpeg_pass_log = "/tmp/logfile_vp8.fpf"
                        if os.path.isfile(ffmpeg_pass_log):
                            os.unlink(ffmpeg_pass_log)
                        video_file_name = node['id'] + '.webm'
                        video_file_dest = os.path.join(node_dir, video_file_name)
                        if os.path.isfile(video_file_dest):
                            logger.info("Already encoded: {}".format(video_file_dest))
                        else:
                            ffmpeg_base_args = [
                                ffmpeg,
                                "-i", video_file_src,
                                "-codec:v", "libvpx",
                                "-quality", "best",
                                "-cpu-used", "0",
                                "-b:v", "300k",
                                "-qmin", "10",  # 10=lowest value
                                "-qmax", "35",  # 42=highest value
                                "-maxrate", "300k",
                                "-bufsize", "600k",
                                "-threads", "8",
                                # "-vf", "scale=-1",
                                "-codec:a", "libvorbis",
                                # "-b:a", "128k",
                                "-aq", "5",
                                "-f", "webm",
                            ]
                            ffmpeg_pass1 = ffmpeg_base_args + [
                                "-an",  # Disables audio, no effect first pass
                                "-pass", "1",
                                "-passlogfile", ffmpeg_pass_log,
                                video_file_dest,
                            ]
                            ffmpeg_pass2 = ffmpeg_base_args + [
                                "-pass", "2",
                                "-y", "-passlogfile", ffmpeg_pass_log,
                                video_file_dest,
                            ]
                            for cmd in (ffmpeg_pass1, ffmpeg_pass2):
                                process = subprocess.Popen(cmd, stdout=subprocess.PIPE)
                                stdout_data, _stderr_data = process.communicate()
                                if process.returncode != 0:
                                    logger.error("Error invoking ffmpeg: {}".format((_stderr_data or "") + (stdout_data or "")))
                                    logger.error("Command was: {}".format(" ".join(cmd)))
                                    raise CommandError("Could not complete transcoding")
                        node['content']['format'] = "webm"
                    else:
                        # If not transcoding, just link the original file
                        os.link(video_file_src, video_file_dest)
                    node["video_url"] = os.path.join(
                        node["path"],
                        video_file_name
                    )
                    copy_media.videos_found += 1
                    logger.info("Videos processed: {}".format(copy_media.videos_found))
                    node["content"]["available"] = True

                    # Create thumbnail if it wasn't downloaded
                    if not os.path.exists(thumb_file_src):
                        fp = create_thumbnail(video_file_src, output_format="png")
                        if fp is None:
                            logger.error("Failed to create thumbnail for {}".format(video_file_src))
                        else:
                            logger.info("Successfully created thumbnail for {}".format(video_file_src))
                            file(thumb_file_src, 'wb').write(fp.read())

                    # Handle thumbnail
                    if os.path.exists(thumb_file_src):
                        node["thumbnail_url"] = os.path.join(
                            node["path"],
                            node['id'] + '.png'
                        )
                        if not os.path.exists(thumb_file_dest):
                            os.link(thumb_file_src, thumb_file_dest)
                    else:
                        node["thumbnail_url"] = None

                    subtitle_srt = os.path.join(
                        subtitle_src_dir,
                        node['id'] + '.srt'
                    )
                    if os.path.isfile(subtitle_srt):
                        subtitle_vtt = os.path.join(
                            node_dir,
                            node['id'] + '.vtt'
                        )
                        # Convert to .vtt because this format is understood
                        # by latest video.js and the old ones that read
                        # .srt don't work with newer jquery etc.
                        submarine_parser(subtitle_srt, subtitle_vtt)
                        if not os.path.exists(subtitle_vtt):
                            logger.warning("Subtitle not converted: {}".format(subtitle_srt))
                        else:
                            logger.info("Subtitle convert from SRT to VTT: {}".format(subtitle_vtt))
                            node["subtitle_url"] = os.path.join(
                                node["path"],
                                node['id'] + '.vtt'
                            )

                else:
                    if options['download']:
                        logger.error("File not found or downloaded: {}".format(video_file_src))
            else:
                logger.error("Invalid node, kind: {}".format(node.get("kind", None)))
                # Exercises cannot be displayed
                node["content"] = {"available": False}

            new_children = []
            for child in node.get('children', []):
                copy_media(child)
                empty_topic = child["kind"] == "Topic" and not child.get("children", [])
                unavailable_video = child["kind"] == "Video" and not child.get("content", {}).get("available", False)
                if not (empty_topic or unavailable_video):
                    new_children.append(child)
            node['children'] = new_children
        copy_media.videos_found = 0

        def render_topic_pages(node):

            parents = [node] if node.get("children") else []
            parent = node["parent"]
            while parent:
                parents.append(parent)
                parent = parent["parent"]

            # Finally, render templates into the destination
            template_context = {
                "topic_tree": topic_tree,
                "topic": node,
                "parents": parents
            }
            with i18n.translate_block(language):
                topic_html = render_to_string("kalite_zim/topic.html", template_context)
            # Replace absolute references to '/static' with relative
            topic_html = topic_html.replace("/static", "static")

            dest_html = os.path.join(tmp_dir, node["id"] + ".html")
            logger.info("Rendering {}".format(dest_html))

            open(dest_html, "w").write(topic_html)

            render_topic_pages.pages_rendered += 1

            for child in node.get('children', []):
                render_topic_pages(child)
        render_topic_pages.pages_rendered = 0

        logger.info("Hard linking video files from KA Lite...")
        copy_media(topic_tree)

        sys.stderr.write("\n")
        logger.info("Done!")

        # Configure django-compressor
        compressor_init(os.path.join(base_path, 'static'))

        # Finally, render templates into the destination
        template_context = {
            "topic_tree": topic_tree,
            "welcome": True,
        }

        with i18n.translate_block(language):
            welcome_html = render_to_string("kalite_zim/welcome.html", template_context)
            about_html = render_to_string("kalite_zim/about.html", template_context)
        # Replace absolute references to '/static' with relative
        welcome_html = welcome_html.replace("/static", "static")
        about_html = about_html.replace("/static", "static")

        # Write the welcome.html file
        open(os.path.join(tmp_dir, 'welcome.html'), 'w').write(welcome_html)
        open(os.path.join(tmp_dir, 'about.html'), 'w').write(about_html)

        # Render all topic html files
        render_topic_pages(topic_tree)

        # Copy in static data after it's been handled by django compressor
        # (this happens during template rendering)

        shutil.copytree(os.path.join(base_path, 'static'), os.path.join(tmp_dir, 'static'))

        ending = datetime.now()
        duration = int((ending - beginning).total_seconds())
        logger.info("Total number of videos found: {}".format(copy_media.videos_found))
        logger.info("Total number of topic pages created: {}".format(render_topic_pages.pages_rendered))

        logger.info("Invoking zimwriterfs, writing to: {}".format(dest_file))

        zimwriterfs_args = (
            zimwriterfs,
            "--welcome", "welcome.html",
            "--favicon", "static/img/ka_leaf.png",
            "--publisher", publisher,
            "--creator", "KhanAcademy.org",
            "--description", "Khan Academy ({})".format(language),
            "--description", "Videos from Khan Academy",
            "--language", language,
            tmp_dir,
            dest_file,
        )

        process = subprocess.Popen(zimwriterfs_args, stdout=subprocess.PIPE)
        stdout_data, _stderr_data = process.communicate()

        if process.returncode != 0:
            logger.error("Error invoking zimwriterfs: {}").format(_stderr_data + stdout_data)

        logger.info(
            "Duration: {h:} hours, {m:} minutes, {s:} seconds".format(
                h=duration // 3600,
                m=(duration % 3600) // 60,
                s=duration % 60,
            )
        )
Exemple #7
0
def get_content_cache(force=False, annotate=False, language=None):

    if not language:
        language = django_settings.LANGUAGE_CODE

    global CONTENT

    if CONTENT is None:
        CONTENT = {}

    if CONTENT.get(language) is None:
        content = None
        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP and not force:
            content = softload_sqlite_cache(settings.CONTENT_CACHE_FILEPATH)
        if content:
            CONTENT[language] = content
            return CONTENT[language]
        else:
            if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP:
                call_command("create_content_db")
                content = softload_sqlite_cache(settings.CONTENT_CACHE_FILEPATH)
            else:
                content = softload_json(settings.CONTENT_FILEPATH, logger=logging.debug, raises=False)
            CONTENT[language] = content
            annotate = True

    if annotate:

        # Loop through all content items and put thumbnail urls, content urls,
        # and subtitle urls on the content dictionary, and list all languages
        # that the content is available in.
        try:
            contents_folder = os.listdir(django_settings.CONTENT_ROOT)
        except OSError:
            contents_folder = []

        subtitle_langs = {}

        if os.path.exists(i18n.get_srt_path()):
            for (dirpath, dirnames, filenames) in os.walk(i18n.get_srt_path()):
                # Only both looking at files that are inside a 'subtitles' directory
                if os.path.basename(dirpath) == "subtitles":
                    lc = os.path.basename(os.path.dirname(dirpath))
                    for filename in filenames:
                        if filename in subtitle_langs:
                            subtitle_langs[filename].append(lc)
                        else:
                            subtitle_langs[filename] = [lc]

        for key, content in CONTENT[language].iteritems():
            default_thumbnail = create_thumbnail_url(content.get("id"))
            dubmap = i18n.get_id2oklang_map(content.get("id"))
            if dubmap:
                content_lang = i18n.select_best_available_language(language, available_codes=dubmap.keys()) or ""
                if content_lang:
                    dubbed_id = dubmap.get(content_lang)
                    format = content.get("format", "")
                    if (dubbed_id + "." + format) in contents_folder:
                        content["available"] = True
                        thumbnail = create_thumbnail_url(dubbed_id) or default_thumbnail
                        content["content_urls"] = {
                            "stream": django_settings.CONTENT_URL + dubmap.get(content_lang) + "." + format,
                            "stream_type": "{kind}/{format}".format(kind=content.get("kind", "").lower(), format=format),
                            "thumbnail": thumbnail,
                        }
                    elif django_settings.BACKUP_VIDEO_SOURCE:
                        content["available"] = True
                        content["content_urls"] = {
                            "stream": django_settings.BACKUP_VIDEO_SOURCE.format(youtube_id=dubbed_id, video_format=format),
                            "stream_type": "{kind}/{format}".format(kind=content.get("kind", "").lower(), format=format),
                            "thumbnail": django_settings.BACKUP_VIDEO_SOURCE.format(youtube_id=dubbed_id, video_format="png"),
                        }
                    else:
                        content["available"] = False
                else:
                    content["available"] = False
            else:
                content["available"] = False

            # Get list of subtitle language codes currently available
            subtitle_lang_codes = subtitle_langs.get("{id}.srt".format(id=content.get("id")), [])

            # Generate subtitle URLs for any subtitles that do exist for this content item
            subtitle_urls = [{
                "code": lc,
                "url": django_settings.STATIC_URL + "srt/{code}/subtitles/{id}.srt".format(code=lc, id=content.get("id")),
                "name": i18n.get_language_name(lc)
                } for lc in subtitle_lang_codes]

            # Sort all subtitle URLs by language code
            content["subtitle_urls"] = sorted(subtitle_urls, key=lambda x: x.get("code", ""))

            with i18n.translate_block(language):
                content["selected_language"] = content_lang
                content["title"] = _(content["title"])
                content["description"] = _(content.get("description")) if content.get("description") else ""

            CONTENT[language][key] = content

        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP:
            try:
                CONTENT[language].commit()
            except IOError as e:
                logging.warn("Annotated content cache file failed in saving with error {e}".format(e=e))

    return CONTENT[language]
Exemple #8
0
def get_content_cache(force=False, annotate=False, language=settings.LANGUAGE_CODE):
    global CONTENT, CONTENT_FILEPATH

    if CONTENT is None:
        CONTENT = {}
    if CONTENT.get(language) is None:
        CONTENT[language] = softload_json(CONTENT_FILEPATH, logger=logging.debug, raises=False)
        annotate = True

    if annotate:
        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP and not force:
            content = softload_json(CONTENT_FILEPATH + "_" + language + ".cache", logger=logging.debug, raises=False)
            if content:
                CONTENT[language] = content
                return CONTENT[language]

        # Loop through all content items and put thumbnail urls, content urls,
        # and subtitle urls on the content dictionary, and list all languages
        # that the content is available in.
        for content in CONTENT[language].values():
            default_thumbnail = create_thumbnail_url(content.get("id"))
            dubmap = i18n.get_id2oklang_map(content.get("id"))
            if dubmap:
                content_lang = i18n.select_best_available_language(language, available_codes=dubmap.keys()) or ""
                if content_lang:
                    dubbed_id = dubmap.get(content_lang)
                    format = content.get("format", "")
                    if is_content_on_disk(dubbed_id, format):
                        content["available"] = True
                        thumbnail = create_thumbnail_url(dubbed_id) or default_thumbnail
                        content["content_urls"] = {
                            "stream": settings.CONTENT_URL + dubmap.get(content_lang) + "." + format,
                            "stream_type": "{kind}/{format}".format(kind=content.get("kind", "").lower(), format=format),
                            "thumbnail": thumbnail,
                        }
                    else:
                        content["available"] = False
                else:
                    content["available"] = False
            else:
                content["available"] = False

            # Get list of subtitle language codes currently available
            subtitle_lang_codes = [] if not os.path.exists(i18n.get_srt_path()) else [lc for lc in os.listdir(i18n.get_srt_path()) if os.path.exists(i18n.get_srt_path(lc, content.get("id")))]

            # Generate subtitle URLs for any subtitles that do exist for this content item
            subtitle_urls = [{
                "code": lc,
                "url": settings.STATIC_URL + "srt/{code}/subtitles/{id}.srt".format(code=lc, id=content.get("id")),
                "name": i18n.get_language_name(lc)
                } for lc in subtitle_lang_codes if os.path.exists(i18n.get_srt_path(lc, content.get("id")))]

            # Sort all subtitle URLs by language code
            content["subtitle_urls"] = sorted(subtitle_urls, key=lambda x: x.get("code", ""))

            with i18n.translate_block(content_lang):
                content["selected_language"] = content_lang
                content["title"] = _(content["title"])
                content["description"] = _(content.get("description", "")) if content.get("description") else ""

        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP:
            try:
                with open(CONTENT_FILEPATH + "_" + language + ".cache", "w") as f:
                    json.dump(CONTENT[language], f)
            except IOError as e:
                logging.warn("Annotated content cache file failed in saving with error {e}".format(e=e))

    return CONTENT[language]
Exemple #9
0
    def handle(self, *args, **options):
        if len(args) != 1:
            raise CommandError("Takes exactly 1 argument")

        dest_file = os.path.abspath(args[0])

        logger.info("Starting up KA Lite export2zim command")
        beginning = datetime.now()
        logger.info("Begin: {}".format(beginning))

        language = options.get('language')
        if not language:
            raise CommandError("Must specify a language!")

        if not options.get('tmp_dir'):
            tmp_dir = os.path.join(tempfile.gettempdir(),
                                   'ka-lite-zim_{}'.format(language))
        else:
            tmp_dir = options.get('tmp_dir')

        tmp_dir = os.path.abspath(tmp_dir)

        if os.path.exists(tmp_dir) and os.listdir(tmp_dir):
            if options['clear']:
                logger.info("Clearing directory {}".format(tmp_dir))
                shutil.rmtree(tmp_dir)
            elif options['resume']:
                logger.info(
                    "Resuming in dirty tmp directory {}".format(tmp_dir))
            else:
                raise CommandError(
                    "{} not empty, use the -c option to clean it, -r to resume, or use an empty destination directory."
                    .format(tmp_dir))

        zimwriterfs = options.get("zimwriterfs", None)
        publisher = options.get("publisher")
        transcode2webm = options.get("transcode2webm")
        ffmpeg = find_executable("ffmpeg")

        if not ffmpeg:
            logger.warning(
                "FFMpeg not found in your path, you won't be able to create missing thumbnails or transcode to webm."
            )

        if not zimwriterfs:
            zimwriterfs = find_executable("zimwriterfs")
            if not zimwriterfs:
                raise CommandError(
                    "Could not find zimwriterfs in your path, try specifying --zimwriterfs=/path"
                )

        if not os.path.exists(zimwriterfs):
            raise CommandError("Invalid --zimwriterfs")

        from kalite_zim import __name__ as base_path
        base_path = os.path.abspath(base_path)
        data_path = os.path.join(base_path, 'data')

        # Where subtitles are found in KA Lite
        subtitle_src_dir = i18n.get_srt_path(language)

        logger.info("Will export videos for language: {}".format(language))
        logger.info("Preparing KA Lite topic tree...")

        # Use live data
        if not options.get('test'):
            # This way of doing things will be deprecated in KA Lite 0.16
            topic_tree_json_path = topic_tools_settings.TOPICS_FILEPATHS.get(
                'khan')
            content_cache = get_content_cache(language=language, annotate=True)
            exercise_cache = get_exercise_cache(language=language)
        # Use test data
        else:
            topic_tree_json_path = os.path.join(data_path, 'test_topics.json')
            content_cache = json.load(
                open(os.path.join(data_path, 'test_content.json')))
            exercise_cache = json.load(
                open(os.path.join(data_path, 'test_exercise.json')))

        topic_tree = softload_json(topic_tree_json_path,
                                   logger=logger.debug,
                                   raises=False)

        content_json_output = {}
        exercise_json_output = {}

        def annotate_tree(topic, depth=0, parent=None):
            """
            We need to recurse into the tree in order to annotate elements
            with topic data and exercise data
            """
            children = topic.get('children', [])
            new_children = []
            for child_topic in children:
                if child_topic.get("kind") in ("Video", "Topic"):
                    annotate_tree(child_topic, depth=depth + 1, parent=topic)
                    new_children.append(child_topic)
            topic["children"] = new_children
            if topic.get("kind") == "Exercise":
                topic['exercise'] = exercise_cache.get(topic.get("id"), {})
                exercise_json_output[topic.get("id")] = topic['exercise']
            elif topic.get("kind") == "Topic":
                pass
            else:
                topic['exercise'] = None
                topic['content'] = content_cache.get(topic.get("id"), {})
                content_json_output[topic.get("id")] = topic['content']
                if not topic['content']:
                    logger.error('No content!?, id is: {}'.format(
                        topic.get('id')))

            # Translate everything for good measure
            with i18n.translate_block(language):
                topic["title"] = _(topic.get("title", ""))
                topic["description"] = _(topic.get(
                    "description", "")) if topic.get("description") else ""

            topic["url"] = topic["id"] + ".html"
            topic["parent"] = parent
            topic["depth"] = depth
            for key in ("child_data", "keywords", "hide", "contains"):
                topic.pop(key, None)

        # 1. Annotate a topic tree
        annotate_tree(topic_tree)

        # 2. Now go through the tree and copy each element into the destination
        # zim file system

        def copy_media(node):
            if node['kind'] == 'Topic':
                # Don't do anything if it's a topic
                pass
            elif node['kind'] == 'Exercise':
                # Exercises cannot be displayed
                node["content"]["available"] = False
            elif node['kind'] == 'Video':

                if node['content']['format'] == "webm":
                    logger.warning(
                        "Found a duplicate ID for {}, re-downloading".format(
                            node['id']))
                    node['content']['format'] = "mp4"

                # Available is False by default until we locate the file
                node["content"]["available"] = False
                node_dir = os.path.join(tmp_dir, node["path"])
                if not os.path.exists(node_dir):
                    os.makedirs(node_dir)
                video_file_name = node['id'] + '.' + node['content']['format']
                thumb_file_name = node['id'] + '.png'
                video_file_src = os.path.join(CONTENT_ROOT, video_file_name)
                video_file_dest = os.path.join(node_dir, video_file_name)
                thumb_file_src = os.path.join(CONTENT_ROOT, thumb_file_name)
                thumb_file_dest = os.path.join(node_dir, thumb_file_name)

                if options['download'] and not os.path.exists(video_file_src):
                    logger.info("Video file being downloaded to: {}".format(
                        video_file_src))
                    download_video(
                        node['content']['youtube_id'],
                        node['content']['format'],
                        CONTENT_ROOT,
                    )

                if os.path.exists(video_file_src):
                    if transcode2webm:
                        ffmpeg_pass_log = "/tmp/logfile_vp8.fpf"
                        if os.path.isfile(ffmpeg_pass_log):
                            os.unlink(ffmpeg_pass_log)
                        video_file_name = node['id'] + '.webm'
                        video_file_dest = os.path.join(node_dir,
                                                       video_file_name)
                        if os.path.isfile(video_file_dest):
                            logger.info(
                                "Already encoded: {}".format(video_file_dest))
                        else:
                            ffmpeg_base_args = [
                                ffmpeg,
                                "-i",
                                video_file_src,
                                "-codec:v",
                                "libvpx",
                                "-quality",
                                "best",
                                "-cpu-used",
                                "0",
                                "-b:v",
                                "300k",
                                "-qmin",
                                "10",  # 10=lowest value
                                "-qmax",
                                "35",  # 42=highest value
                                "-maxrate",
                                "300k",
                                "-bufsize",
                                "600k",
                                "-threads",
                                "8",
                                # "-vf", "scale=-1",
                                "-codec:a",
                                "libvorbis",
                                # "-b:a", "128k",
                                "-aq",
                                "5",
                                "-f",
                                "webm",
                            ]
                            ffmpeg_pass1 = ffmpeg_base_args + [
                                "-an",  # Disables audio, no effect first pass
                                "-pass",
                                "1",
                                "-passlogfile",
                                ffmpeg_pass_log,
                                video_file_dest,
                            ]
                            ffmpeg_pass2 = ffmpeg_base_args + [
                                "-pass",
                                "2",
                                "-y",
                                "-passlogfile",
                                ffmpeg_pass_log,
                                video_file_dest,
                            ]
                            for cmd in (ffmpeg_pass1, ffmpeg_pass2):
                                process = subprocess.Popen(
                                    cmd, stdout=subprocess.PIPE)
                                stdout_data, _stderr_data = process.communicate(
                                )
                                if process.returncode != 0:
                                    logger.error(
                                        "Error invoking ffmpeg: {}".format(
                                            (_stderr_data or "") +
                                            (stdout_data or "")))
                                    logger.error("Command was: {}".format(
                                        " ".join(cmd)))
                                    raise CommandError(
                                        "Could not complete transcoding")
                        node['content']['format'] = "webm"
                    else:
                        # If not transcoding, just link the original file
                        os.link(video_file_src, video_file_dest)
                    node["video_url"] = os.path.join(node["path"],
                                                     video_file_name)
                    copy_media.videos_found += 1
                    logger.info("Videos processed: {}".format(
                        copy_media.videos_found))
                    node["content"]["available"] = True

                    # Create thumbnail if it wasn't downloaded
                    if not os.path.exists(thumb_file_src):
                        fp = create_thumbnail(video_file_src,
                                              output_format="png")
                        if fp is None:
                            logger.error(
                                "Failed to create thumbnail for {}".format(
                                    video_file_src))
                        else:
                            logger.info(
                                "Successfully created thumbnail for {}".format(
                                    video_file_src))
                            file(thumb_file_src, 'wb').write(fp.read())

                    # Handle thumbnail
                    if os.path.exists(thumb_file_src):
                        node["thumbnail_url"] = os.path.join(
                            node["path"], node['id'] + '.png')
                        if not os.path.exists(thumb_file_dest):
                            os.link(thumb_file_src, thumb_file_dest)
                    else:
                        node["thumbnail_url"] = None

                    subtitle_srt = os.path.join(subtitle_src_dir,
                                                node['id'] + '.srt')
                    if os.path.isfile(subtitle_srt):
                        subtitle_vtt = os.path.join(node_dir,
                                                    node['id'] + '.vtt')
                        # Convert to .vtt because this format is understood
                        # by latest video.js and the old ones that read
                        # .srt don't work with newer jquery etc.
                        submarine_parser(subtitle_srt, subtitle_vtt)
                        if not os.path.exists(subtitle_vtt):
                            logger.warning("Subtitle not converted: {}".format(
                                subtitle_srt))
                        else:
                            logger.info(
                                "Subtitle convert from SRT to VTT: {}".format(
                                    subtitle_vtt))
                            node["subtitle_url"] = os.path.join(
                                node["path"], node['id'] + '.vtt')

                else:
                    if options['download']:
                        logger.error("File not found or downloaded: {}".format(
                            video_file_src))
            else:
                logger.error("Invalid node, kind: {}".format(
                    node.get("kind", None)))
                # Exercises cannot be displayed
                node["content"] = {"available": False}

            new_children = []
            for child in node.get('children', []):
                copy_media(child)
                empty_topic = child["kind"] == "Topic" and not child.get(
                    "children", [])
                unavailable_video = child["kind"] == "Video" and not child.get(
                    "content", {}).get("available", False)
                if not (empty_topic or unavailable_video):
                    new_children.append(child)
            node['children'] = new_children

        copy_media.videos_found = 0

        def render_topic_pages(node):

            parents = [node] if node.get("children") else []
            parent = node["parent"]
            while parent:
                parents.append(parent)
                parent = parent["parent"]

            # Finally, render templates into the destination
            template_context = {
                "topic_tree": topic_tree,
                "topic": node,
                "parents": parents
            }
            with i18n.translate_block(language):
                topic_html = render_to_string("kalite_zim/topic.html",
                                              template_context)
            # Replace absolute references to '/static' with relative
            topic_html = topic_html.replace("/static", "static")

            dest_html = os.path.join(tmp_dir, node["id"] + ".html")
            logger.info("Rendering {}".format(dest_html))

            open(dest_html, "w").write(topic_html)

            render_topic_pages.pages_rendered += 1

            for child in node.get('children', []):
                render_topic_pages(child)

        render_topic_pages.pages_rendered = 0

        logger.info("Hard linking video files from KA Lite...")
        copy_media(topic_tree)

        sys.stderr.write("\n")
        logger.info("Done!")

        # Configure django-compressor
        compressor_init(os.path.join(base_path, 'static'))

        # Finally, render templates into the destination
        template_context = {
            "topic_tree": topic_tree,
            "welcome": True,
        }

        with i18n.translate_block(language):
            welcome_html = render_to_string("kalite_zim/welcome.html",
                                            template_context)
            about_html = render_to_string("kalite_zim/about.html",
                                          template_context)
        # Replace absolute references to '/static' with relative
        welcome_html = welcome_html.replace("/static", "static")
        about_html = about_html.replace("/static", "static")

        # Write the welcome.html file
        open(os.path.join(tmp_dir, 'welcome.html'), 'w').write(welcome_html)
        open(os.path.join(tmp_dir, 'about.html'), 'w').write(about_html)

        # Render all topic html files
        render_topic_pages(topic_tree)

        # Copy in static data after it's been handled by django compressor
        # (this happens during template rendering)

        shutil.copytree(os.path.join(base_path, 'static'),
                        os.path.join(tmp_dir, 'static'))

        ending = datetime.now()
        duration = int((ending - beginning).total_seconds())
        logger.info("Total number of videos found: {}".format(
            copy_media.videos_found))
        logger.info("Total number of topic pages created: {}".format(
            render_topic_pages.pages_rendered))

        logger.info("Invoking zimwriterfs, writing to: {}".format(dest_file))

        zimwriterfs_args = (
            zimwriterfs,
            "--welcome",
            "welcome.html",
            "--favicon",
            "static/img/ka_leaf.png",
            "--publisher",
            publisher,
            "--creator",
            "KhanAcademy.org",
            "--description",
            "Khan Academy ({})".format(language),
            "--description",
            "Videos from Khan Academy",
            "--language",
            language,
            tmp_dir,
            dest_file,
        )

        process = subprocess.Popen(zimwriterfs_args, stdout=subprocess.PIPE)
        stdout_data, _stderr_data = process.communicate()

        if process.returncode != 0:
            logger.error("Error invoking zimwriterfs: {}").format(
                _stderr_data + stdout_data)

        logger.info("Duration: {h:} hours, {m:} minutes, {s:} seconds".format(
            h=duration // 3600,
            m=(duration % 3600) // 60,
            s=duration % 60,
        ))
Exemple #10
0
def stamp_availability_on_video(video, format="mp4", force=False, stamp_urls=True, videos_path=None):
    """
    Stamp all relevant urls and availability onto a video object (if necessary), including:
    * whether the video is available (on disk or online)
    """
    videos_path = videos_path or settings.CONTENT_ROOT

    def compute_video_availability(youtube_id, format, videos_path=videos_path):
        return {"on_disk": is_video_on_disk(youtube_id, format, videos_path=videos_path)}

    def compute_video_metadata(youtube_id, format):
        return {"stream_type": "video/%s" % format}

    def compute_video_urls(youtube_id, format, lang_code, on_disk=None, thumb_formats=["png", "jpg"], videos_path=videos_path):
        if on_disk is None:
            on_disk = is_video_on_disk(youtube_id, format, videos_path=videos_path)

        if on_disk:
            video_base_url = settings.CONTENT_URL + youtube_id
            stream_url = video_base_url + ".%s" % format
            thumbnail_url = None  # default to None now, so we know when no thumbnail is available.

            for thumb_format in thumb_formats:  # find the thumbnail on disk
                thumb_filename = '%s.%s' % (youtube_id, thumb_format)
                thumb_filepath = os.path.join(videos_path, thumb_filename)
                if os.path.exists(thumb_filepath):
                    thumbnail_url = video_base_url + "." + thumb_format  # default
                    break

        elif settings.BACKUP_VIDEO_SOURCE and lang_code == "en":
            dict_vals = {"youtube_id": youtube_id, "video_format": format, "thumb_format": thumb_formats[0] }
            stream_url = settings.BACKUP_VIDEO_SOURCE % dict_vals
            thumbnail_url = settings.BACKUP_THUMBNAIL_SOURCE % dict_vals if settings.BACKUP_THUMBNAIL_SOURCE else None

        else:
            return {}  # no URLs
        return {"stream": stream_url, "thumbnail": thumbnail_url}

    video_availability = video.get("availability", {}) if not force else {}
    en_youtube_id = get_youtube_id(video["id"], lang_code=None)  # get base ID
    video_map = get_id2oklang_map(video["id"]) or {}

    if not "on_disk" in video_availability:
        for lang_code in video_map.keys():
            youtube_id = video_map[lang_code].encode('utf-8')
            video_availability[lang_code] = compute_video_availability(youtube_id, format=format, videos_path=videos_path)
        video_availability["en"] = video_availability.get("en", {"on_disk": False})  # en should always be defined

        # Summarize status
        any_on_disk = any([lang_avail["on_disk"] for lang_avail in video_availability.values()])
        any_available = any_on_disk or bool(settings.BACKUP_VIDEO_SOURCE)

    if stamp_urls:
        # Loop over all known dubbed videos
        for lang_code, youtube_id in video_map.iteritems():
            urls = compute_video_urls(youtube_id, format, lang_code, on_disk=video_availability[lang_code]["on_disk"], videos_path=videos_path)
            if urls:
                # Only add properties if anything is available.
                video_availability[lang_code].update(urls)
                video_availability[lang_code].update(compute_video_metadata(youtube_id, format))

        # Get the (english) subtitle urls
        subtitle_lang_codes = get_langs_with_subtitle(en_youtube_id)
        subtitles_tuple = [(lc, get_srt_url(en_youtube_id, lc)) for lc in subtitle_lang_codes if os.path.exists(get_srt_path(lc, en_youtube_id))]
        subtitles_urls = dict(subtitles_tuple)
        video_availability["en"]["subtitles"] = subtitles_urls

    # now scrub any values that don't actually exist
    for lang_code in video_availability.keys():
        if not video_availability[lang_code]["on_disk"] and len(video_availability[lang_code]) == 1:
            del video_availability[lang_code]

    # Now summarize some availability onto the video itself
    video["availability"] = video_availability
    video["on_disk"]   = any_on_disk
    video["available"] = any_available

    return video
Exemple #11
0
def get_content_cache(force=False, annotate=False, language=settings.LANGUAGE_CODE):
    global CONTENT, CONTENT_FILEPATH

    if CONTENT is None:
        CONTENT = {}
    if CONTENT.get(language) is None:
        CONTENT[language] = softload_json(CONTENT_FILEPATH, logger=logging.debug, raises=False)
        annotate = True

    if annotate:
        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP and not force:
            content = softload_json(CONTENT_FILEPATH + "_" + language + ".cache", logger=logging.debug, raises=False)
            if content:
                CONTENT[language] = content
                return CONTENT[language]

        # Loop through all content items and put thumbnail urls, content urls,
        # and subtitle urls on the content dictionary, and list all languages
        # that the content is available in.
        for content in CONTENT[language].values():
            default_thumbnail = create_thumbnail_url(content.get("id"))
            dubmap = i18n.get_id2oklang_map(content.get("id"))
            if dubmap:
                content_lang = i18n.select_best_available_language(language, available_codes=dubmap.keys()) or ""
                if content_lang:
                    dubbed_id = dubmap.get(content_lang)
                    format = content.get("format", "")
                    if is_content_on_disk(dubbed_id, format):
                        content["available"] = True
                        thumbnail = create_thumbnail_url(dubbed_id) or default_thumbnail
                        content["content_urls"] = {
                            "stream": settings.CONTENT_URL + dubmap.get(content_lang) + "." + format,
                            "stream_type": "{kind}/{format}".format(kind=content.get("kind", "").lower(), format=format),
                            "thumbnail": thumbnail,
                        }
                    elif settings.BACKUP_VIDEO_SOURCE:
                        content["available"] = True
                        content["content_urls"] = {
                            "stream": settings.BACKUP_VIDEO_SOURCE.format(youtube_id=dubbed_id, video_format=format),
                            "stream_type": "{kind}/{format}".format(kind=content.get("kind", "").lower(), format=format),
                            "thumbnail": settings.BACKUP_VIDEO_SOURCE.format(youtube_id=dubbed_id, video_format="png"),
                        }
                    else:
                        content["available"] = False
                else:
                    content["available"] = False
            else:
                content["available"] = False

            # Get list of subtitle language codes currently available
            subtitle_lang_codes = [] if not os.path.exists(i18n.get_srt_path()) else [lc for lc in os.listdir(i18n.get_srt_path()) if os.path.exists(i18n.get_srt_path(lc, content.get("id")))]

            # Generate subtitle URLs for any subtitles that do exist for this content item
            subtitle_urls = [{
                "code": lc,
                "url": settings.STATIC_URL + "srt/{code}/subtitles/{id}.srt".format(code=lc, id=content.get("id")),
                "name": i18n.get_language_name(lc)
                } for lc in subtitle_lang_codes if os.path.exists(i18n.get_srt_path(lc, content.get("id")))]

            # Sort all subtitle URLs by language code
            content["subtitle_urls"] = sorted(subtitle_urls, key=lambda x: x.get("code", ""))

            with i18n.translate_block(content_lang):
                content["selected_language"] = content_lang
                content["title"] = _(content["title"])
                content["description"] = _(content.get("description", "")) if content.get("description") else ""

        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP:
            try:
                with open(CONTENT_FILEPATH + "_" + language + ".cache", "w") as f:
                    json.dump(CONTENT[language], f)
            except IOError as e:
                logging.warn("Annotated content cache file failed in saving with error {e}".format(e=e))

    return CONTENT[language]
Exemple #12
0
def get_content_cache(force=False, annotate=False):
    global CONTENT, CONTENT_FILEPATH

    if CONTENT is None:
        CONTENT = softload_json(CONTENT_FILEPATH,
                                logger=logging.debug,
                                raises=False)
        annotate = True

    if annotate:
        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP and not force:
            content = softload_json(CONTENT_FILEPATH + ".cache",
                                    logger=logging.debug,
                                    raises=False)
            if content:
                CONTENT = content
                return CONTENT

        # Loop through all content items and put thumbnail urls, content urls,
        # and subtitle urls on the content dictionary, and list all languages
        # that the content is available in.
        for content in CONTENT.values():
            languages = []
            default_thumbnail = create_thumbnail_url(content.get("id"))
            dubmap = i18n.get_id2oklang_map(content.get("id"))
            for lang, dubbed_id in dubmap.items():
                format = content.get("format", "")
                if is_content_on_disk(dubbed_id, format):
                    languages.append(lang)
                    thumbnail = create_thumbnail_url(
                        dubbed_id) or default_thumbnail
                    content["lang_data_" + lang] = {
                        "stream":
                        settings.CONTENT_URL + dubmap.get(lang) + "." + format,
                        "stream_type":
                        "{kind}/{format}".format(kind=content.get("kind",
                                                                  "").lower(),
                                                 format=format),
                        "thumbnail":
                        thumbnail,
                    }
            content["languages"] = languages

            # Get list of subtitle language codes currently available
            subtitle_lang_codes = [] if not os.path.exists(
                i18n.get_srt_path()) else [
                    lc for lc in os.listdir(i18n.get_srt_path())
                    if os.path.exists(i18n.get_srt_path(lc, content.get("id")))
                ]

            # Generate subtitle URLs for any subtitles that do exist for this content item
            subtitle_urls = [
                {
                    "code":
                    lc,
                    "url":
                    settings.STATIC_URL +
                    "srt/{code}/subtitles/{id}.srt".format(
                        code=lc, id=content.get("id")),
                    "name":
                    i18n.get_language_name(lc)
                } for lc in subtitle_lang_codes
                if os.path.exists(i18n.get_srt_path(lc, content.get("id")))
            ]

            # Sort all subtitle URLs by language code
            content["subtitle_urls"] = sorted(subtitle_urls,
                                              key=lambda x: x.get("code", ""))

        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP:
            try:
                with open(CONTENT_FILEPATH + ".cache", "w") as f:
                    json.dump(CONTENT, f)
            except IOError as e:
                logging.warn(
                    "Annotated content cache file failed in saving with error {e}"
                    .format(e=e))

    return CONTENT