Esempio n. 1
0
        def render_topic_pages(node):

            parents = [node] if node.get("children") else []
            parent = node["parent"]
            while parent:
                parents.append(parent)
                parent = parent["parent"]

            # Finally, render templates into the destination
            template_context = {
                "topic_tree": topic_tree,
                "topic": node,
                "parents": parents
            }
            with i18n.translate_block(language):
                topic_html = render_to_string("kalite_zim/topic.html", template_context)
            # Replace absolute references to '/static' with relative
            topic_html = topic_html.replace("/static", "static")

            dest_html = os.path.join(tmp_dir, node["id"] + ".html")
            logger.info("Rendering {}".format(dest_html))

            open(dest_html, "w").write(topic_html)

            render_topic_pages.pages_rendered += 1

            for child in node.get('children', []):
                render_topic_pages(child)
Esempio n. 2
0
        def recurse_nodes(node):

            child_availability = []

            # Do the recursion
            for child in node.get("children", []):
                recurse_nodes(child)
                child_availability.append(child.get("available", False))

            # If child_availability is empty then node has no children so we can determine availability
            if child_availability:
                node["available"] = any(child_availability)
            else:
                # By default this is very charitable, assuming if something has not been annotated
                # it is available.
                if node.get("kind") == "Exercise":
                    cache_node = exercise_cache.get(node.get("id"), {})
                else:
                    cache_node = content_cache.get(node.get("id"), {})
                node["available"] = cache_node.get("available", True)

            # Translate everything for good measure
            with i18n.translate_block(language):
                node["title"] = _(node.get("title", ""))
                node["description"] = _(node.get("description", "")) if node.get("description") else ""
Esempio n. 3
0
        def annotate_tree(topic, depth=0, parent=None):
            """
            We need to recurse into the tree in order to annotate elements
            with topic data and exercise data
            """
            children = topic.get('children', [])
            new_children = []
            for child_topic in children:
                if child_topic.get("kind") in ("Video", "Topic"):
                    annotate_tree(child_topic, depth=depth + 1, parent=topic)
                    new_children.append(child_topic)
            topic["children"] = new_children
            if topic.get("kind") == "Exercise":
                topic['exercise'] = exercise_cache.get(topic.get("id"), {})
                exercise_json_output[topic.get("id")] = topic['exercise']
            elif topic.get("kind") == "Topic":
                pass
            else:
                topic['exercise'] = None
                topic['content'] = content_cache.get(topic.get("id"), {})
                content_json_output[topic.get("id")] = topic['content']
                if not topic['content']:
                    logger.error('No content!?, id is: {}'.format(topic.get('id')))

            # Translate everything for good measure
            with i18n.translate_block(language):
                topic["title"] = _(topic.get("title", ""))
                topic["description"] = _(topic.get("description", "")) if topic.get("description") else ""

            topic["url"] = topic["id"] + ".html"
            topic["parent"] = parent
            topic["depth"] = depth
            for key in ("child_data", "keywords", "hide", "contains"):
                topic.pop(key, None)
Esempio n. 4
0
        def recurse_nodes(node):

            child_availability = []

            # Do the recursion
            for child in node.get("children", []):
                recurse_nodes(child)
                child_availability.append(child.get("available", False))

            # If child_availability is empty then node has no children so we can determine availability
            if child_availability:
                node["available"] = any(child_availability)
            else:
                # By default this is very charitable, assuming if something has not been annotated
                # it is available.
                if node.get("kind") == "Exercise":
                    cache_node = exercise_cache.get(node.get("id"), {})
                else:
                    cache_node = content_cache.get(node.get("id"), {})
                node["available"] = cache_node.get("available", True)

            # Translate everything for good measure
            with i18n.translate_block(language):
                node["title"] = _(node.get("title", ""))
                node["description"] = _(node.get("description", "")) if node.get("description") else ""
Esempio n. 5
0
        def render_topic_pages(node):

            parents = [node] if node.get("children") else []
            parent = node["parent"]
            while parent:
                parents.append(parent)
                parent = parent["parent"]

            # Finally, render templates into the destination
            template_context = {
                "topic_tree": topic_tree,
                "topic": node,
                "parents": parents
            }
            with i18n.translate_block(language):
                topic_html = render_to_string("kalite_zim/topic.html",
                                              template_context)
            # Replace absolute references to '/static' with relative
            topic_html = topic_html.replace("/static", "static")

            dest_html = os.path.join(tmp_dir, node["id"] + ".html")
            logger.info("Rendering {}".format(dest_html))

            open(dest_html, "w").write(topic_html)

            render_topic_pages.pages_rendered += 1

            for child in node.get('children', []):
                render_topic_pages(child)
Esempio n. 6
0
def generate_flat_topic_tree(node_cache=None, lang_code=settings.LANGUAGE_CODE, alldata=False):
    with i18n.translate_block(lang_code):

        categories = node_cache or get_node_cache(language=i18n.lcode_to_django_lang(lang_code))
        result = dict()
        # make sure that we only get the slug of child of a topic
        # to avoid redundancy
        for category_name, category in categories.iteritems():
            result[category_name] = {}
            for node_name, node in category.iteritems():
                if alldata:
                    relevant_data = node
                else:
                    relevant_data = {
                        'title': _(node['title']),
                        'path': node['path'],
                        'kind': node['kind'],
                        'available': node.get('available', True),
                        'keywords': node.get('keywords', []),
                    }
                result[category_name][node_name] = relevant_data

    return result
Esempio n. 7
0
def generate_flat_topic_tree(node_cache=None, lang_code=settings.LANGUAGE_CODE, alldata=False):
    with i18n.translate_block(lang_code):

        categories = node_cache or get_node_cache(language=i18n.lcode_to_django_lang(lang_code))
        result = dict()
        # make sure that we only get the slug of child of a topic
        # to avoid redundancy
        for category_name, category in categories.iteritems():
            result[category_name] = {}
            for node_name, node in category.iteritems():
                if alldata:
                    relevant_data = node
                else:
                    relevant_data = {
                        'title': _(node['title']),
                        'path': node['path'],
                        'kind': node['kind'],
                        'available': node.get('available', True),
                        'keywords': node.get('keywords', []),
                    }
                result[category_name][node_name] = relevant_data

    return result
Esempio n. 8
0
        def annotate_tree(topic, depth=0, parent=None):
            """
            We need to recurse into the tree in order to annotate elements
            with topic data and exercise data
            """
            children = topic.get('children', [])
            new_children = []
            for child_topic in children:
                if child_topic.get("kind") in ("Video", "Topic"):
                    annotate_tree(child_topic, depth=depth + 1, parent=topic)
                    new_children.append(child_topic)
            topic["children"] = new_children
            if topic.get("kind") == "Exercise":
                topic['exercise'] = exercise_cache.get(topic.get("id"), {})
                exercise_json_output[topic.get("id")] = topic['exercise']
            elif topic.get("kind") == "Topic":
                pass
            else:
                topic['exercise'] = None
                topic['content'] = content_cache.get(topic.get("id"), {})
                content_json_output[topic.get("id")] = topic['content']
                if not topic['content']:
                    logger.error('No content!?, id is: {}'.format(
                        topic.get('id')))

            # Translate everything for good measure
            with i18n.translate_block(language):
                topic["title"] = _(topic.get("title", ""))
                topic["description"] = _(topic.get(
                    "description", "")) if topic.get("description") else ""

            topic["url"] = topic["id"] + ".html"
            topic["parent"] = parent
            topic["depth"] = depth
            for key in ("child_data", "keywords", "hide", "contains"):
                topic.pop(key, None)
Esempio n. 9
0
    def handle(self, *args, **options):
        if len(args) != 1:
            raise CommandError("Takes exactly 1 argument")

        dest_file = os.path.abspath(args[0])

        logger.info("Starting up KA Lite export2zim command")
        beginning = datetime.now()
        logger.info("Begin: {}".format(beginning))

        language = options.get('language')
        if not language:
            raise CommandError("Must specify a language!")

        if not options.get('tmp_dir'):
            tmp_dir = os.path.join(tempfile.gettempdir(), 'ka-lite-zim_{}'.format(language))
        else:
            tmp_dir = options.get('tmp_dir')

        tmp_dir = os.path.abspath(tmp_dir)

        if os.path.exists(tmp_dir) and os.listdir(tmp_dir):
            if options['clear']:
                logger.info("Clearing directory {}".format(tmp_dir))
                shutil.rmtree(tmp_dir)
            elif options['resume']:
                logger.info("Resuming in dirty tmp directory {}".format(tmp_dir))
            else:
                raise CommandError(
                    "{} not empty, use the -c option to clean it, -r to resume, or use an empty destination directory.".format(
                        tmp_dir
                    )
                )

        zimwriterfs = options.get("zimwriterfs", None)
        publisher = options.get("publisher")
        transcode2webm = options.get("transcode2webm")
        ffmpeg = find_executable("ffmpeg")

        if not ffmpeg:
            logger.warning("FFMpeg not found in your path, you won't be able to create missing thumbnails or transcode to webm.")

        if not zimwriterfs:
            zimwriterfs = find_executable("zimwriterfs")
            if not zimwriterfs:
                raise CommandError("Could not find zimwriterfs in your path, try specifying --zimwriterfs=/path")

        if not os.path.exists(zimwriterfs):
            raise CommandError("Invalid --zimwriterfs")

        from kalite_zim import __name__ as base_path
        base_path = os.path.abspath(base_path)
        data_path = os.path.join(base_path, 'data')

        # Where subtitles are found in KA Lite
        subtitle_src_dir = i18n.get_srt_path(language)

        logger.info("Will export videos for language: {}".format(language))
        logger.info("Preparing KA Lite topic tree...")

        # Use live data
        if not options.get('test'):
            # This way of doing things will be deprecated in KA Lite 0.16
            topic_tree_json_path = topic_tools_settings.TOPICS_FILEPATHS.get('khan')
            content_cache = get_content_cache(language=language, annotate=True)
            exercise_cache = get_exercise_cache(language=language)
        # Use test data
        else:
            topic_tree_json_path = os.path.join(data_path, 'test_topics.json')
            content_cache = json.load(
                open(os.path.join(data_path, 'test_content.json'))
            )
            exercise_cache = json.load(
                open(os.path.join(data_path, 'test_exercise.json'))
            )

        topic_tree = softload_json(topic_tree_json_path, logger=logger.debug, raises=False)

        content_json_output = {}
        exercise_json_output = {}

        def annotate_tree(topic, depth=0, parent=None):
            """
            We need to recurse into the tree in order to annotate elements
            with topic data and exercise data
            """
            children = topic.get('children', [])
            new_children = []
            for child_topic in children:
                if child_topic.get("kind") in ("Video", "Topic"):
                    annotate_tree(child_topic, depth=depth + 1, parent=topic)
                    new_children.append(child_topic)
            topic["children"] = new_children
            if topic.get("kind") == "Exercise":
                topic['exercise'] = exercise_cache.get(topic.get("id"), {})
                exercise_json_output[topic.get("id")] = topic['exercise']
            elif topic.get("kind") == "Topic":
                pass
            else:
                topic['exercise'] = None
                topic['content'] = content_cache.get(topic.get("id"), {})
                content_json_output[topic.get("id")] = topic['content']
                if not topic['content']:
                    logger.error('No content!?, id is: {}'.format(topic.get('id')))

            # Translate everything for good measure
            with i18n.translate_block(language):
                topic["title"] = _(topic.get("title", ""))
                topic["description"] = _(topic.get("description", "")) if topic.get("description") else ""

            topic["url"] = topic["id"] + ".html"
            topic["parent"] = parent
            topic["depth"] = depth
            for key in ("child_data", "keywords", "hide", "contains"):
                topic.pop(key, None)

        # 1. Annotate a topic tree
        annotate_tree(topic_tree)

        # 2. Now go through the tree and copy each element into the destination
        # zim file system

        def copy_media(node):
            if node['kind'] == 'Topic':
                # Don't do anything if it's a topic
                pass
            elif node['kind'] == 'Exercise':
                # Exercises cannot be displayed
                node["content"]["available"] = False
            elif node['kind'] == 'Video':

                if node['content']['format'] == "webm":
                    logger.warning("Found a duplicate ID for {}, re-downloading".format(node['id']))
                    node['content']['format'] = "mp4"

                # Available is False by default until we locate the file
                node["content"]["available"] = False
                node_dir = os.path.join(tmp_dir, node["path"])
                if not os.path.exists(node_dir):
                    os.makedirs(node_dir)
                video_file_name = node['id'] + '.' + node['content']['format']
                thumb_file_name = node['id'] + '.png'
                video_file_src = os.path.join(CONTENT_ROOT, video_file_name)
                video_file_dest = os.path.join(node_dir, video_file_name)
                thumb_file_src = os.path.join(CONTENT_ROOT, thumb_file_name)
                thumb_file_dest = os.path.join(node_dir, thumb_file_name)

                if options['download'] and not os.path.exists(video_file_src):
                    logger.info("Video file being downloaded to: {}".format(video_file_src))
                    download_video(
                        node['content']['youtube_id'],
                        node['content']['format'],
                        CONTENT_ROOT,
                    )

                if os.path.exists(video_file_src):
                    if transcode2webm:
                        ffmpeg_pass_log = "/tmp/logfile_vp8.fpf"
                        if os.path.isfile(ffmpeg_pass_log):
                            os.unlink(ffmpeg_pass_log)
                        video_file_name = node['id'] + '.webm'
                        video_file_dest = os.path.join(node_dir, video_file_name)
                        if os.path.isfile(video_file_dest):
                            logger.info("Already encoded: {}".format(video_file_dest))
                        else:
                            ffmpeg_base_args = [
                                ffmpeg,
                                "-i", video_file_src,
                                "-codec:v", "libvpx",
                                "-quality", "best",
                                "-cpu-used", "0",
                                "-b:v", "300k",
                                "-qmin", "10",  # 10=lowest value
                                "-qmax", "35",  # 42=highest value
                                "-maxrate", "300k",
                                "-bufsize", "600k",
                                "-threads", "8",
                                # "-vf", "scale=-1",
                                "-codec:a", "libvorbis",
                                # "-b:a", "128k",
                                "-aq", "5",
                                "-f", "webm",
                            ]
                            ffmpeg_pass1 = ffmpeg_base_args + [
                                "-an",  # Disables audio, no effect first pass
                                "-pass", "1",
                                "-passlogfile", ffmpeg_pass_log,
                                video_file_dest,
                            ]
                            ffmpeg_pass2 = ffmpeg_base_args + [
                                "-pass", "2",
                                "-y", "-passlogfile", ffmpeg_pass_log,
                                video_file_dest,
                            ]
                            for cmd in (ffmpeg_pass1, ffmpeg_pass2):
                                process = subprocess.Popen(cmd, stdout=subprocess.PIPE)
                                stdout_data, _stderr_data = process.communicate()
                                if process.returncode != 0:
                                    logger.error("Error invoking ffmpeg: {}".format((_stderr_data or "") + (stdout_data or "")))
                                    logger.error("Command was: {}".format(" ".join(cmd)))
                                    raise CommandError("Could not complete transcoding")
                        node['content']['format'] = "webm"
                    else:
                        # If not transcoding, just link the original file
                        os.link(video_file_src, video_file_dest)
                    node["video_url"] = os.path.join(
                        node["path"],
                        video_file_name
                    )
                    copy_media.videos_found += 1
                    logger.info("Videos processed: {}".format(copy_media.videos_found))
                    node["content"]["available"] = True

                    # Create thumbnail if it wasn't downloaded
                    if not os.path.exists(thumb_file_src):
                        fp = create_thumbnail(video_file_src, output_format="png")
                        if fp is None:
                            logger.error("Failed to create thumbnail for {}".format(video_file_src))
                        else:
                            logger.info("Successfully created thumbnail for {}".format(video_file_src))
                            file(thumb_file_src, 'wb').write(fp.read())

                    # Handle thumbnail
                    if os.path.exists(thumb_file_src):
                        node["thumbnail_url"] = os.path.join(
                            node["path"],
                            node['id'] + '.png'
                        )
                        if not os.path.exists(thumb_file_dest):
                            os.link(thumb_file_src, thumb_file_dest)
                    else:
                        node["thumbnail_url"] = None

                    subtitle_srt = os.path.join(
                        subtitle_src_dir,
                        node['id'] + '.srt'
                    )
                    if os.path.isfile(subtitle_srt):
                        subtitle_vtt = os.path.join(
                            node_dir,
                            node['id'] + '.vtt'
                        )
                        # Convert to .vtt because this format is understood
                        # by latest video.js and the old ones that read
                        # .srt don't work with newer jquery etc.
                        submarine_parser(subtitle_srt, subtitle_vtt)
                        if not os.path.exists(subtitle_vtt):
                            logger.warning("Subtitle not converted: {}".format(subtitle_srt))
                        else:
                            logger.info("Subtitle convert from SRT to VTT: {}".format(subtitle_vtt))
                            node["subtitle_url"] = os.path.join(
                                node["path"],
                                node['id'] + '.vtt'
                            )

                else:
                    if options['download']:
                        logger.error("File not found or downloaded: {}".format(video_file_src))
            else:
                logger.error("Invalid node, kind: {}".format(node.get("kind", None)))
                # Exercises cannot be displayed
                node["content"] = {"available": False}

            new_children = []
            for child in node.get('children', []):
                copy_media(child)
                empty_topic = child["kind"] == "Topic" and not child.get("children", [])
                unavailable_video = child["kind"] == "Video" and not child.get("content", {}).get("available", False)
                if not (empty_topic or unavailable_video):
                    new_children.append(child)
            node['children'] = new_children
        copy_media.videos_found = 0

        def render_topic_pages(node):

            parents = [node] if node.get("children") else []
            parent = node["parent"]
            while parent:
                parents.append(parent)
                parent = parent["parent"]

            # Finally, render templates into the destination
            template_context = {
                "topic_tree": topic_tree,
                "topic": node,
                "parents": parents
            }
            with i18n.translate_block(language):
                topic_html = render_to_string("kalite_zim/topic.html", template_context)
            # Replace absolute references to '/static' with relative
            topic_html = topic_html.replace("/static", "static")

            dest_html = os.path.join(tmp_dir, node["id"] + ".html")
            logger.info("Rendering {}".format(dest_html))

            open(dest_html, "w").write(topic_html)

            render_topic_pages.pages_rendered += 1

            for child in node.get('children', []):
                render_topic_pages(child)
        render_topic_pages.pages_rendered = 0

        logger.info("Hard linking video files from KA Lite...")
        copy_media(topic_tree)

        sys.stderr.write("\n")
        logger.info("Done!")

        # Configure django-compressor
        compressor_init(os.path.join(base_path, 'static'))

        # Finally, render templates into the destination
        template_context = {
            "topic_tree": topic_tree,
            "welcome": True,
        }

        with i18n.translate_block(language):
            welcome_html = render_to_string("kalite_zim/welcome.html", template_context)
            about_html = render_to_string("kalite_zim/about.html", template_context)
        # Replace absolute references to '/static' with relative
        welcome_html = welcome_html.replace("/static", "static")
        about_html = about_html.replace("/static", "static")

        # Write the welcome.html file
        open(os.path.join(tmp_dir, 'welcome.html'), 'w').write(welcome_html)
        open(os.path.join(tmp_dir, 'about.html'), 'w').write(about_html)

        # Render all topic html files
        render_topic_pages(topic_tree)

        # Copy in static data after it's been handled by django compressor
        # (this happens during template rendering)

        shutil.copytree(os.path.join(base_path, 'static'), os.path.join(tmp_dir, 'static'))

        ending = datetime.now()
        duration = int((ending - beginning).total_seconds())
        logger.info("Total number of videos found: {}".format(copy_media.videos_found))
        logger.info("Total number of topic pages created: {}".format(render_topic_pages.pages_rendered))

        logger.info("Invoking zimwriterfs, writing to: {}".format(dest_file))

        zimwriterfs_args = (
            zimwriterfs,
            "--welcome", "welcome.html",
            "--favicon", "static/img/ka_leaf.png",
            "--publisher", publisher,
            "--creator", "KhanAcademy.org",
            "--description", "Khan Academy ({})".format(language),
            "--description", "Videos from Khan Academy",
            "--language", language,
            tmp_dir,
            dest_file,
        )

        process = subprocess.Popen(zimwriterfs_args, stdout=subprocess.PIPE)
        stdout_data, _stderr_data = process.communicate()

        if process.returncode != 0:
            logger.error("Error invoking zimwriterfs: {}").format(_stderr_data + stdout_data)

        logger.info(
            "Duration: {h:} hours, {m:} minutes, {s:} seconds".format(
                h=duration // 3600,
                m=(duration % 3600) // 60,
                s=duration % 60,
            )
        )
Esempio n. 10
0
def get_content_cache(force=False, annotate=False, language=None):

    if not language:
        language = django_settings.LANGUAGE_CODE

    global CONTENT

    if CONTENT is None:
        CONTENT = {}

    if CONTENT.get(language) is None:
        content = None
        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP and not force:
            content = softload_sqlite_cache(settings.CONTENT_CACHE_FILEPATH)
        if content:
            CONTENT[language] = content
            return CONTENT[language]
        else:
            if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP:
                call_command("create_content_db")
                content = softload_sqlite_cache(settings.CONTENT_CACHE_FILEPATH)
            else:
                content = softload_json(settings.CONTENT_FILEPATH, logger=logging.debug, raises=False)
            CONTENT[language] = content
            annotate = True

    if annotate:

        # Loop through all content items and put thumbnail urls, content urls,
        # and subtitle urls on the content dictionary, and list all languages
        # that the content is available in.
        try:
            contents_folder = os.listdir(django_settings.CONTENT_ROOT)
        except OSError:
            contents_folder = []

        subtitle_langs = {}

        if os.path.exists(i18n.get_srt_path()):
            for (dirpath, dirnames, filenames) in os.walk(i18n.get_srt_path()):
                # Only both looking at files that are inside a 'subtitles' directory
                if os.path.basename(dirpath) == "subtitles":
                    lc = os.path.basename(os.path.dirname(dirpath))
                    for filename in filenames:
                        if filename in subtitle_langs:
                            subtitle_langs[filename].append(lc)
                        else:
                            subtitle_langs[filename] = [lc]

        for key, content in CONTENT[language].iteritems():
            default_thumbnail = create_thumbnail_url(content.get("id"))
            dubmap = i18n.get_id2oklang_map(content.get("id"))
            if dubmap:
                content_lang = i18n.select_best_available_language(language, available_codes=dubmap.keys()) or ""
                if content_lang:
                    dubbed_id = dubmap.get(content_lang)
                    format = content.get("format", "")
                    if (dubbed_id + "." + format) in contents_folder:
                        content["available"] = True
                        thumbnail = create_thumbnail_url(dubbed_id) or default_thumbnail
                        content["content_urls"] = {
                            "stream": django_settings.CONTENT_URL + dubmap.get(content_lang) + "." + format,
                            "stream_type": "{kind}/{format}".format(kind=content.get("kind", "").lower(), format=format),
                            "thumbnail": thumbnail,
                        }
                    elif django_settings.BACKUP_VIDEO_SOURCE:
                        content["available"] = True
                        content["content_urls"] = {
                            "stream": django_settings.BACKUP_VIDEO_SOURCE.format(youtube_id=dubbed_id, video_format=format),
                            "stream_type": "{kind}/{format}".format(kind=content.get("kind", "").lower(), format=format),
                            "thumbnail": django_settings.BACKUP_VIDEO_SOURCE.format(youtube_id=dubbed_id, video_format="png"),
                        }
                    else:
                        content["available"] = False
                else:
                    content["available"] = False
            else:
                content["available"] = False

            # Get list of subtitle language codes currently available
            subtitle_lang_codes = subtitle_langs.get("{id}.srt".format(id=content.get("id")), [])

            # Generate subtitle URLs for any subtitles that do exist for this content item
            subtitle_urls = [{
                "code": lc,
                "url": django_settings.STATIC_URL + "srt/{code}/subtitles/{id}.srt".format(code=lc, id=content.get("id")),
                "name": i18n.get_language_name(lc)
                } for lc in subtitle_lang_codes]

            # Sort all subtitle URLs by language code
            content["subtitle_urls"] = sorted(subtitle_urls, key=lambda x: x.get("code", ""))

            with i18n.translate_block(language):
                content["selected_language"] = content_lang
                content["title"] = _(content["title"])
                content["description"] = _(content.get("description")) if content.get("description") else ""

            CONTENT[language][key] = content

        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP:
            try:
                CONTENT[language].commit()
            except IOError as e:
                logging.warn("Annotated content cache file failed in saving with error {e}".format(e=e))

    return CONTENT[language]
Esempio n. 11
0
def get_exercise_cache(force=False, language=None):

    if not language:
        language = django_settings.LANGUAGE_CODE

    global EXERCISES
    if EXERCISES is None:
        EXERCISES = {}
    if EXERCISES.get(language) is None:
        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP and not force:
            exercises = softload_json(
                cache_file_path("exercises_{0}.json".format(language)),
                logger=logging.debug,
                raises=False
            )
            if exercises:
                EXERCISES[language] = exercises
                return EXERCISES[language]
        EXERCISES[language] = softload_json(settings.EXERCISES_FILEPATH, logger=logging.debug, raises=False)

        # English-language exercises live in application space, translations in user space
        if language == "en":
            exercise_root = os.path.join(settings.KHAN_EXERCISES_DIRPATH, "exercises")
        else:
            exercise_root = i18n.get_localized_exercise_dirpath(language)
        if os.path.exists(exercise_root):
            try:
                exercise_templates = os.listdir(exercise_root)
            except OSError:
                exercise_templates = []
        else:
            exercise_templates = []

        for exercise in EXERCISES[language].values():
            exercise_file = exercise["name"] + ".html"
            exercise_template = exercise_file
            exercise_lang = "en"

            # The central server doesn't have an assessment item database
            if django_settings.CENTRAL_SERVER:
                available = False
            elif exercise.get("uses_assessment_items", False):
                available = False
                items = []
                for item in exercise.get("all_assessment_items", []):
                    item = json.loads(item)
                    if get_assessment_item_data(request=None, assessment_item_id=item.get("id")):
                        items.append(item)
                        available = True
                exercise["all_assessment_items"] = items
            else:
                available = exercise_template in exercise_templates

                # Get the language codes for exercise templates that exist
                # Try to minimize the number of os.path.exists calls (since they're a bottleneck) by using the same
                # precedence rules in i18n.select_best_available_languages
                available_langs = set(["en"] + [language] * available)
                # Return the best available exercise template
                exercise_lang = i18n.select_best_available_language(language, available_codes=available_langs)

            if exercise_lang == "en":
                exercise_template = exercise_file
            else:
                exercise_template = os.path.join(exercise_lang, exercise_file)

            with i18n.translate_block(language):
                exercise["available"] = available
                exercise["lang"] = exercise_lang
                exercise["template"] = exercise_template
                exercise["title"] = _(exercise.get("title", ""))
                exercise["description"] = _(exercise.get("description", "")) if exercise.get("description") else ""

        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP:
            try:
                with open(cache_file_path("exercises_{0}.json".format(language)), "w") as f:
                    json.dump(EXERCISES[language], f)
            except IOError as e:
                logging.warn("Annotated exercise cache file failed in saving with error {e}".format(e=e))

    return EXERCISES[language]
Esempio n. 12
0
def get_content_cache(force=False, annotate=False, language=settings.LANGUAGE_CODE):
    global CONTENT, CONTENT_FILEPATH

    if CONTENT is None:
        CONTENT = {}
    if CONTENT.get(language) is None:
        CONTENT[language] = softload_json(CONTENT_FILEPATH, logger=logging.debug, raises=False)
        annotate = True

    if annotate:
        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP and not force:
            content = softload_json(CONTENT_FILEPATH + "_" + language + ".cache", logger=logging.debug, raises=False)
            if content:
                CONTENT[language] = content
                return CONTENT[language]

        # Loop through all content items and put thumbnail urls, content urls,
        # and subtitle urls on the content dictionary, and list all languages
        # that the content is available in.
        for content in CONTENT[language].values():
            default_thumbnail = create_thumbnail_url(content.get("id"))
            dubmap = i18n.get_id2oklang_map(content.get("id"))
            if dubmap:
                content_lang = i18n.select_best_available_language(language, available_codes=dubmap.keys()) or ""
                if content_lang:
                    dubbed_id = dubmap.get(content_lang)
                    format = content.get("format", "")
                    if is_content_on_disk(dubbed_id, format):
                        content["available"] = True
                        thumbnail = create_thumbnail_url(dubbed_id) or default_thumbnail
                        content["content_urls"] = {
                            "stream": settings.CONTENT_URL + dubmap.get(content_lang) + "." + format,
                            "stream_type": "{kind}/{format}".format(kind=content.get("kind", "").lower(), format=format),
                            "thumbnail": thumbnail,
                        }
                    else:
                        content["available"] = False
                else:
                    content["available"] = False
            else:
                content["available"] = False

            # Get list of subtitle language codes currently available
            subtitle_lang_codes = [] if not os.path.exists(i18n.get_srt_path()) else [lc for lc in os.listdir(i18n.get_srt_path()) if os.path.exists(i18n.get_srt_path(lc, content.get("id")))]

            # Generate subtitle URLs for any subtitles that do exist for this content item
            subtitle_urls = [{
                "code": lc,
                "url": settings.STATIC_URL + "srt/{code}/subtitles/{id}.srt".format(code=lc, id=content.get("id")),
                "name": i18n.get_language_name(lc)
                } for lc in subtitle_lang_codes if os.path.exists(i18n.get_srt_path(lc, content.get("id")))]

            # Sort all subtitle URLs by language code
            content["subtitle_urls"] = sorted(subtitle_urls, key=lambda x: x.get("code", ""))

            with i18n.translate_block(content_lang):
                content["selected_language"] = content_lang
                content["title"] = _(content["title"])
                content["description"] = _(content.get("description", "")) if content.get("description") else ""

        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP:
            try:
                with open(CONTENT_FILEPATH + "_" + language + ".cache", "w") as f:
                    json.dump(CONTENT[language], f)
            except IOError as e:
                logging.warn("Annotated content cache file failed in saving with error {e}".format(e=e))

    return CONTENT[language]
Esempio n. 13
0
def get_exercise_cache(force=False, language=settings.LANGUAGE_CODE):
    global EXERCISES, EXERCISES_FILEPATH
    if EXERCISES is None:
        EXERCISES = {}
    if EXERCISES.get(language) is None:
        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP and not force:
            exercises = softload_json(EXERCISES_FILEPATH + "_" + language + ".cache", logger=logging.debug, raises=False)
            if exercises:
                EXERCISES[language] = exercises
                return EXERCISES[language]
        EXERCISES[language] = softload_json(EXERCISES_FILEPATH, logger=logging.debug, raises=False)
        exercise_root = os.path.join(settings.KHAN_EXERCISES_DIRPATH, "exercises")
        if os.path.exists(exercise_root):
            exercise_templates = os.listdir(exercise_root)
        else:
            exercise_templates = []
        assessmentitems = get_assessment_item_cache()
        TEMPLATE_FILE_PATH = os.path.join(settings.KHAN_EXERCISES_DIRPATH, "exercises", "%s")
        for exercise in EXERCISES[language].values():
            exercise_file = exercise["name"] + ".html"
            exercise_template = exercise_file
            exercise_lang = "en"

            if exercise.get("uses_assessment_items", False):
                available = False
                items = []
                for item in exercise.get("all_assessment_items","[]"):
                    item = json.loads(item)
                    if assessmentitems.get(item.get("id")):
                        items.append(item)
                        available = True
                exercise["all_assessment_items"] = items
            else:
                available = os.path.isfile(TEMPLATE_FILE_PATH % exercise_template)

                # Get the language codes for exercise templates that exist
                available_langs = set(["en"] + [lang_code for lang_code in exercise_templates if os.path.exists(os.path.join(exercise_root, lang_code, exercise_file))])

                # Return the best available exercise template
                exercise_lang = i18n.select_best_available_language(language, available_codes=available_langs)

            if exercise_lang == "en":
                exercise_template = exercise_file
            else:
                exercise_template = os.path.join(exercise_lang, exercise_file)


            with i18n.translate_block(language):
                exercise["available"] = available
                exercise["lang"] = exercise_lang
                exercise["template"] = exercise_template
                exercise["title"] = _(exercise.get("title", ""))
                exercise["description"] = _(exercise.get("description", "")) if exercise.get("description") else ""

        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP:
            try:
                with open(EXERCISES_FILEPATH + "_" + language + ".cache", "w") as f:
                    json.dump(EXERCISES[language], f)
            except IOError as e:
                logging.warn("Annotated exercise cache file failed in saving with error {e}".format(e=e))

    return EXERCISES[language]
Esempio n. 14
0
    def handle(self, *args, **options):
        if len(args) != 1:
            raise CommandError("Takes exactly 1 argument")

        dest_file = os.path.abspath(args[0])

        logger.info("Starting up KA Lite export2zim command")
        beginning = datetime.now()
        logger.info("Begin: {}".format(beginning))

        language = options.get('language')
        if not language:
            raise CommandError("Must specify a language!")

        if not options.get('tmp_dir'):
            tmp_dir = os.path.join(tempfile.gettempdir(),
                                   'ka-lite-zim_{}'.format(language))
        else:
            tmp_dir = options.get('tmp_dir')

        tmp_dir = os.path.abspath(tmp_dir)

        if os.path.exists(tmp_dir) and os.listdir(tmp_dir):
            if options['clear']:
                logger.info("Clearing directory {}".format(tmp_dir))
                shutil.rmtree(tmp_dir)
            elif options['resume']:
                logger.info(
                    "Resuming in dirty tmp directory {}".format(tmp_dir))
            else:
                raise CommandError(
                    "{} not empty, use the -c option to clean it, -r to resume, or use an empty destination directory."
                    .format(tmp_dir))

        zimwriterfs = options.get("zimwriterfs", None)
        publisher = options.get("publisher")
        transcode2webm = options.get("transcode2webm")
        ffmpeg = find_executable("ffmpeg")

        if not ffmpeg:
            logger.warning(
                "FFMpeg not found in your path, you won't be able to create missing thumbnails or transcode to webm."
            )

        if not zimwriterfs:
            zimwriterfs = find_executable("zimwriterfs")
            if not zimwriterfs:
                raise CommandError(
                    "Could not find zimwriterfs in your path, try specifying --zimwriterfs=/path"
                )

        if not os.path.exists(zimwriterfs):
            raise CommandError("Invalid --zimwriterfs")

        from kalite_zim import __name__ as base_path
        base_path = os.path.abspath(base_path)
        data_path = os.path.join(base_path, 'data')

        # Where subtitles are found in KA Lite
        subtitle_src_dir = i18n.get_srt_path(language)

        logger.info("Will export videos for language: {}".format(language))
        logger.info("Preparing KA Lite topic tree...")

        # Use live data
        if not options.get('test'):
            # This way of doing things will be deprecated in KA Lite 0.16
            topic_tree_json_path = topic_tools_settings.TOPICS_FILEPATHS.get(
                'khan')
            content_cache = get_content_cache(language=language, annotate=True)
            exercise_cache = get_exercise_cache(language=language)
        # Use test data
        else:
            topic_tree_json_path = os.path.join(data_path, 'test_topics.json')
            content_cache = json.load(
                open(os.path.join(data_path, 'test_content.json')))
            exercise_cache = json.load(
                open(os.path.join(data_path, 'test_exercise.json')))

        topic_tree = softload_json(topic_tree_json_path,
                                   logger=logger.debug,
                                   raises=False)

        content_json_output = {}
        exercise_json_output = {}

        def annotate_tree(topic, depth=0, parent=None):
            """
            We need to recurse into the tree in order to annotate elements
            with topic data and exercise data
            """
            children = topic.get('children', [])
            new_children = []
            for child_topic in children:
                if child_topic.get("kind") in ("Video", "Topic"):
                    annotate_tree(child_topic, depth=depth + 1, parent=topic)
                    new_children.append(child_topic)
            topic["children"] = new_children
            if topic.get("kind") == "Exercise":
                topic['exercise'] = exercise_cache.get(topic.get("id"), {})
                exercise_json_output[topic.get("id")] = topic['exercise']
            elif topic.get("kind") == "Topic":
                pass
            else:
                topic['exercise'] = None
                topic['content'] = content_cache.get(topic.get("id"), {})
                content_json_output[topic.get("id")] = topic['content']
                if not topic['content']:
                    logger.error('No content!?, id is: {}'.format(
                        topic.get('id')))

            # Translate everything for good measure
            with i18n.translate_block(language):
                topic["title"] = _(topic.get("title", ""))
                topic["description"] = _(topic.get(
                    "description", "")) if topic.get("description") else ""

            topic["url"] = topic["id"] + ".html"
            topic["parent"] = parent
            topic["depth"] = depth
            for key in ("child_data", "keywords", "hide", "contains"):
                topic.pop(key, None)

        # 1. Annotate a topic tree
        annotate_tree(topic_tree)

        # 2. Now go through the tree and copy each element into the destination
        # zim file system

        def copy_media(node):
            if node['kind'] == 'Topic':
                # Don't do anything if it's a topic
                pass
            elif node['kind'] == 'Exercise':
                # Exercises cannot be displayed
                node["content"]["available"] = False
            elif node['kind'] == 'Video':

                if node['content']['format'] == "webm":
                    logger.warning(
                        "Found a duplicate ID for {}, re-downloading".format(
                            node['id']))
                    node['content']['format'] = "mp4"

                # Available is False by default until we locate the file
                node["content"]["available"] = False
                node_dir = os.path.join(tmp_dir, node["path"])
                if not os.path.exists(node_dir):
                    os.makedirs(node_dir)
                video_file_name = node['id'] + '.' + node['content']['format']
                thumb_file_name = node['id'] + '.png'
                video_file_src = os.path.join(CONTENT_ROOT, video_file_name)
                video_file_dest = os.path.join(node_dir, video_file_name)
                thumb_file_src = os.path.join(CONTENT_ROOT, thumb_file_name)
                thumb_file_dest = os.path.join(node_dir, thumb_file_name)

                if options['download'] and not os.path.exists(video_file_src):
                    logger.info("Video file being downloaded to: {}".format(
                        video_file_src))
                    download_video(
                        node['content']['youtube_id'],
                        node['content']['format'],
                        CONTENT_ROOT,
                    )

                if os.path.exists(video_file_src):
                    if transcode2webm:
                        ffmpeg_pass_log = "/tmp/logfile_vp8.fpf"
                        if os.path.isfile(ffmpeg_pass_log):
                            os.unlink(ffmpeg_pass_log)
                        video_file_name = node['id'] + '.webm'
                        video_file_dest = os.path.join(node_dir,
                                                       video_file_name)
                        if os.path.isfile(video_file_dest):
                            logger.info(
                                "Already encoded: {}".format(video_file_dest))
                        else:
                            ffmpeg_base_args = [
                                ffmpeg,
                                "-i",
                                video_file_src,
                                "-codec:v",
                                "libvpx",
                                "-quality",
                                "best",
                                "-cpu-used",
                                "0",
                                "-b:v",
                                "300k",
                                "-qmin",
                                "10",  # 10=lowest value
                                "-qmax",
                                "35",  # 42=highest value
                                "-maxrate",
                                "300k",
                                "-bufsize",
                                "600k",
                                "-threads",
                                "8",
                                # "-vf", "scale=-1",
                                "-codec:a",
                                "libvorbis",
                                # "-b:a", "128k",
                                "-aq",
                                "5",
                                "-f",
                                "webm",
                            ]
                            ffmpeg_pass1 = ffmpeg_base_args + [
                                "-an",  # Disables audio, no effect first pass
                                "-pass",
                                "1",
                                "-passlogfile",
                                ffmpeg_pass_log,
                                video_file_dest,
                            ]
                            ffmpeg_pass2 = ffmpeg_base_args + [
                                "-pass",
                                "2",
                                "-y",
                                "-passlogfile",
                                ffmpeg_pass_log,
                                video_file_dest,
                            ]
                            for cmd in (ffmpeg_pass1, ffmpeg_pass2):
                                process = subprocess.Popen(
                                    cmd, stdout=subprocess.PIPE)
                                stdout_data, _stderr_data = process.communicate(
                                )
                                if process.returncode != 0:
                                    logger.error(
                                        "Error invoking ffmpeg: {}".format(
                                            (_stderr_data or "") +
                                            (stdout_data or "")))
                                    logger.error("Command was: {}".format(
                                        " ".join(cmd)))
                                    raise CommandError(
                                        "Could not complete transcoding")
                        node['content']['format'] = "webm"
                    else:
                        # If not transcoding, just link the original file
                        os.link(video_file_src, video_file_dest)
                    node["video_url"] = os.path.join(node["path"],
                                                     video_file_name)
                    copy_media.videos_found += 1
                    logger.info("Videos processed: {}".format(
                        copy_media.videos_found))
                    node["content"]["available"] = True

                    # Create thumbnail if it wasn't downloaded
                    if not os.path.exists(thumb_file_src):
                        fp = create_thumbnail(video_file_src,
                                              output_format="png")
                        if fp is None:
                            logger.error(
                                "Failed to create thumbnail for {}".format(
                                    video_file_src))
                        else:
                            logger.info(
                                "Successfully created thumbnail for {}".format(
                                    video_file_src))
                            file(thumb_file_src, 'wb').write(fp.read())

                    # Handle thumbnail
                    if os.path.exists(thumb_file_src):
                        node["thumbnail_url"] = os.path.join(
                            node["path"], node['id'] + '.png')
                        if not os.path.exists(thumb_file_dest):
                            os.link(thumb_file_src, thumb_file_dest)
                    else:
                        node["thumbnail_url"] = None

                    subtitle_srt = os.path.join(subtitle_src_dir,
                                                node['id'] + '.srt')
                    if os.path.isfile(subtitle_srt):
                        subtitle_vtt = os.path.join(node_dir,
                                                    node['id'] + '.vtt')
                        # Convert to .vtt because this format is understood
                        # by latest video.js and the old ones that read
                        # .srt don't work with newer jquery etc.
                        submarine_parser(subtitle_srt, subtitle_vtt)
                        if not os.path.exists(subtitle_vtt):
                            logger.warning("Subtitle not converted: {}".format(
                                subtitle_srt))
                        else:
                            logger.info(
                                "Subtitle convert from SRT to VTT: {}".format(
                                    subtitle_vtt))
                            node["subtitle_url"] = os.path.join(
                                node["path"], node['id'] + '.vtt')

                else:
                    if options['download']:
                        logger.error("File not found or downloaded: {}".format(
                            video_file_src))
            else:
                logger.error("Invalid node, kind: {}".format(
                    node.get("kind", None)))
                # Exercises cannot be displayed
                node["content"] = {"available": False}

            new_children = []
            for child in node.get('children', []):
                copy_media(child)
                empty_topic = child["kind"] == "Topic" and not child.get(
                    "children", [])
                unavailable_video = child["kind"] == "Video" and not child.get(
                    "content", {}).get("available", False)
                if not (empty_topic or unavailable_video):
                    new_children.append(child)
            node['children'] = new_children

        copy_media.videos_found = 0

        def render_topic_pages(node):

            parents = [node] if node.get("children") else []
            parent = node["parent"]
            while parent:
                parents.append(parent)
                parent = parent["parent"]

            # Finally, render templates into the destination
            template_context = {
                "topic_tree": topic_tree,
                "topic": node,
                "parents": parents
            }
            with i18n.translate_block(language):
                topic_html = render_to_string("kalite_zim/topic.html",
                                              template_context)
            # Replace absolute references to '/static' with relative
            topic_html = topic_html.replace("/static", "static")

            dest_html = os.path.join(tmp_dir, node["id"] + ".html")
            logger.info("Rendering {}".format(dest_html))

            open(dest_html, "w").write(topic_html)

            render_topic_pages.pages_rendered += 1

            for child in node.get('children', []):
                render_topic_pages(child)

        render_topic_pages.pages_rendered = 0

        logger.info("Hard linking video files from KA Lite...")
        copy_media(topic_tree)

        sys.stderr.write("\n")
        logger.info("Done!")

        # Configure django-compressor
        compressor_init(os.path.join(base_path, 'static'))

        # Finally, render templates into the destination
        template_context = {
            "topic_tree": topic_tree,
            "welcome": True,
        }

        with i18n.translate_block(language):
            welcome_html = render_to_string("kalite_zim/welcome.html",
                                            template_context)
            about_html = render_to_string("kalite_zim/about.html",
                                          template_context)
        # Replace absolute references to '/static' with relative
        welcome_html = welcome_html.replace("/static", "static")
        about_html = about_html.replace("/static", "static")

        # Write the welcome.html file
        open(os.path.join(tmp_dir, 'welcome.html'), 'w').write(welcome_html)
        open(os.path.join(tmp_dir, 'about.html'), 'w').write(about_html)

        # Render all topic html files
        render_topic_pages(topic_tree)

        # Copy in static data after it's been handled by django compressor
        # (this happens during template rendering)

        shutil.copytree(os.path.join(base_path, 'static'),
                        os.path.join(tmp_dir, 'static'))

        ending = datetime.now()
        duration = int((ending - beginning).total_seconds())
        logger.info("Total number of videos found: {}".format(
            copy_media.videos_found))
        logger.info("Total number of topic pages created: {}".format(
            render_topic_pages.pages_rendered))

        logger.info("Invoking zimwriterfs, writing to: {}".format(dest_file))

        zimwriterfs_args = (
            zimwriterfs,
            "--welcome",
            "welcome.html",
            "--favicon",
            "static/img/ka_leaf.png",
            "--publisher",
            publisher,
            "--creator",
            "KhanAcademy.org",
            "--description",
            "Khan Academy ({})".format(language),
            "--description",
            "Videos from Khan Academy",
            "--language",
            language,
            tmp_dir,
            dest_file,
        )

        process = subprocess.Popen(zimwriterfs_args, stdout=subprocess.PIPE)
        stdout_data, _stderr_data = process.communicate()

        if process.returncode != 0:
            logger.error("Error invoking zimwriterfs: {}").format(
                _stderr_data + stdout_data)

        logger.info("Duration: {h:} hours, {m:} minutes, {s:} seconds".format(
            h=duration // 3600,
            m=(duration % 3600) // 60,
            s=duration % 60,
        ))
Esempio n. 15
0
def get_content_cache(force=False, annotate=False, language=settings.LANGUAGE_CODE):
    global CONTENT, CONTENT_FILEPATH

    if CONTENT is None:
        CONTENT = {}
    if CONTENT.get(language) is None:
        CONTENT[language] = softload_json(CONTENT_FILEPATH, logger=logging.debug, raises=False)
        annotate = True

    if annotate:
        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP and not force:
            content = softload_json(CONTENT_FILEPATH + "_" + language + ".cache", logger=logging.debug, raises=False)
            if content:
                CONTENT[language] = content
                return CONTENT[language]

        # Loop through all content items and put thumbnail urls, content urls,
        # and subtitle urls on the content dictionary, and list all languages
        # that the content is available in.
        for content in CONTENT[language].values():
            default_thumbnail = create_thumbnail_url(content.get("id"))
            dubmap = i18n.get_id2oklang_map(content.get("id"))
            if dubmap:
                content_lang = i18n.select_best_available_language(language, available_codes=dubmap.keys()) or ""
                if content_lang:
                    dubbed_id = dubmap.get(content_lang)
                    format = content.get("format", "")
                    if is_content_on_disk(dubbed_id, format):
                        content["available"] = True
                        thumbnail = create_thumbnail_url(dubbed_id) or default_thumbnail
                        content["content_urls"] = {
                            "stream": settings.CONTENT_URL + dubmap.get(content_lang) + "." + format,
                            "stream_type": "{kind}/{format}".format(kind=content.get("kind", "").lower(), format=format),
                            "thumbnail": thumbnail,
                        }
                    elif settings.BACKUP_VIDEO_SOURCE:
                        content["available"] = True
                        content["content_urls"] = {
                            "stream": settings.BACKUP_VIDEO_SOURCE.format(youtube_id=dubbed_id, video_format=format),
                            "stream_type": "{kind}/{format}".format(kind=content.get("kind", "").lower(), format=format),
                            "thumbnail": settings.BACKUP_VIDEO_SOURCE.format(youtube_id=dubbed_id, video_format="png"),
                        }
                    else:
                        content["available"] = False
                else:
                    content["available"] = False
            else:
                content["available"] = False

            # Get list of subtitle language codes currently available
            subtitle_lang_codes = [] if not os.path.exists(i18n.get_srt_path()) else [lc for lc in os.listdir(i18n.get_srt_path()) if os.path.exists(i18n.get_srt_path(lc, content.get("id")))]

            # Generate subtitle URLs for any subtitles that do exist for this content item
            subtitle_urls = [{
                "code": lc,
                "url": settings.STATIC_URL + "srt/{code}/subtitles/{id}.srt".format(code=lc, id=content.get("id")),
                "name": i18n.get_language_name(lc)
                } for lc in subtitle_lang_codes if os.path.exists(i18n.get_srt_path(lc, content.get("id")))]

            # Sort all subtitle URLs by language code
            content["subtitle_urls"] = sorted(subtitle_urls, key=lambda x: x.get("code", ""))

            with i18n.translate_block(content_lang):
                content["selected_language"] = content_lang
                content["title"] = _(content["title"])
                content["description"] = _(content.get("description", "")) if content.get("description") else ""

        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP:
            try:
                with open(CONTENT_FILEPATH + "_" + language + ".cache", "w") as f:
                    json.dump(CONTENT[language], f)
            except IOError as e:
                logging.warn("Annotated content cache file failed in saving with error {e}".format(e=e))

    return CONTENT[language]
Esempio n. 16
0
def get_exercise_cache(force=False, language=settings.LANGUAGE_CODE):
    global EXERCISES, EXERCISES_FILEPATH
    if EXERCISES is None:
        EXERCISES = {}
    if EXERCISES.get(language) is None:
        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP and not force:
            exercises = softload_json(EXERCISES_FILEPATH + "_" + language + ".cache", logger=logging.debug, raises=False)
            if exercises:
                EXERCISES[language] = exercises
                return EXERCISES[language]
        EXERCISES[language] = softload_json(EXERCISES_FILEPATH, logger=logging.debug, raises=False)
        exercise_root = os.path.join(settings.KHAN_EXERCISES_DIRPATH, "exercises")
        if os.path.exists(exercise_root):
            exercise_templates = os.listdir(exercise_root)
        else:
            exercise_templates = []
        assessmentitems = get_assessment_item_cache()
        TEMPLATE_FILE_PATH = os.path.join(settings.KHAN_EXERCISES_DIRPATH, "exercises", "%s")
        for exercise in EXERCISES[language].values():
            exercise_file = exercise["name"] + ".html"
            exercise_template = exercise_file
            exercise_lang = "en"

            if exercise.get("uses_assessment_items", False):
                available = False
                items = []
                for item in exercise.get("all_assessment_items","[]"):
                    item = json.loads(item)
                    if assessmentitems.get(item.get("id")):
                        items.append(item)
                        available = True
                exercise["all_assessment_items"] = items
            else:
                available = os.path.isfile(TEMPLATE_FILE_PATH % exercise_template)

                # Get the language codes for exercise templates that exist
                available_langs = set(["en"] + [lang_code for lang_code in exercise_templates if os.path.exists(os.path.join(exercise_root, lang_code, exercise_file))])

                # Return the best available exercise template
                exercise_lang = i18n.select_best_available_language(language, available_codes=available_langs)

            if exercise_lang == "en":
                exercise_template = exercise_file
            else:
                exercise_template = os.path.join(exercise_lang, exercise_file)


            with i18n.translate_block(language):
                exercise["available"] = available
                exercise["lang"] = exercise_lang
                exercise["template"] = exercise_template
                exercise["title"] = _(exercise.get("title", ""))
                exercise["description"] = _(exercise.get("description", "")) if exercise.get("description") else ""

        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP:
            try:
                with open(EXERCISES_FILEPATH + "_" + language + ".cache", "w") as f:
                    json.dump(EXERCISES[language], f)
            except IOError as e:
                logging.warn("Annotated exercise cache file failed in saving with error {e}".format(e=e))

    return EXERCISES[language]