Esempio n. 1
0
def update_metadata(package_metadata, version=VERSION):
    """
    We've zipped the packages, and now have unzipped & zipped sizes.
    Update this info in the local metadata (but not inside the zip)
    """
    master_filepath = get_language_pack_availability_filepath(version=version)
    master_metadata = softload_json(master_filepath, logger=logging.warn, errmsg="Error opening master language pack metadata")

    for lc, updated_meta in package_metadata.iteritems():
        lang_code_ietf = lcode_to_ietf(lc)

        # Gather existing metadata
        metadata_filepath = get_language_pack_metadata_filepath(lang_code_ietf, version=version)
        stored_meta = softload_json(metadata_filepath, logger=logging.warn, errmsg="Error opening %s language pack metadata" % lc)

        stored_meta.update(updated_meta)

        # Write locally (this is used on download by distributed server to update it's database)
        with open(metadata_filepath, 'w') as output:
            json.dump(stored_meta, output)

        # Update master (this is used for central server to handle API requests for data)
        master_metadata[lang_code_ietf] = stored_meta

    # Save updated master
    ensure_dir(os.path.dirname(master_filepath))
    with open(master_filepath, 'w') as output:
        json.dump(master_metadata, output)
    logging.info("Local record of translations updated")
def update_metadata(package_metadata, version=SHORTVERSION):
    """
    We've zipped the packages, and now have unzipped & zipped sizes.
    Update this info in the local metadata (but not inside the zip)
    """
    master_filepath = get_language_pack_availability_filepath(version=version)
    master_metadata = softload_json(master_filepath, logger=logging.warn, errmsg="Error opening master language pack metadata")

    for lc, updated_meta in package_metadata.iteritems():
        lang_code_ietf = lcode_to_ietf(lc)

        # Gather existing metadata
        metadata_filepath = get_language_pack_metadata_filepath(lang_code_ietf, version=version)
        stored_meta = softload_json(metadata_filepath, logger=logging.warn, errmsg="Error opening %s language pack metadata" % lc)

        stored_meta.update(updated_meta)

        # Write locally (this is used on download by distributed server to update it's database)
        with open(metadata_filepath, 'w') as output:
            json.dump(stored_meta, output)

        # Update master (this is used for central server to handle API requests for data)
        master_metadata[lang_code_ietf] = stored_meta

    # Save updated master
    ensure_dir(os.path.dirname(master_filepath))
    with open(master_filepath, 'w') as output:
        json.dump(master_metadata, output)
    logging.info("Local record of translations updated")
Esempio n. 3
0
def get_topic_tree(force=False, annotate=False, channel=settings.CHANNEL, language=settings.LANGUAGE_CODE):
    global TOPICS, TOPICS_FILEPATHS
    if not TOPICS:
        TOPICS = {}
    if TOPICS.get(channel) is None:
        TOPICS[channel] = {}
    if TOPICS.get(channel, {}).get(language) is None:
        TOPICS[channel][language] = softload_json(TOPICS_FILEPATHS.get(channel), logger=logging.debug, raises=False)

        # Just loaded from disk, so have to restamp.
        annotate = True

    if annotate:
        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP and not force:
            topics = softload_json(TOPICS_FILEPATHS.get(channel) + "_" + language + ".cache", logger=logging.debug, raises=False)
            if topics:
                TOPICS[channel][language] = topics
                return TOPICS[channel][language]

        # Loop through all the nodes in the topic tree
        # and cross reference with the content_cache to check availability.
        content_cache = get_content_cache(language=language)
        exercise_cache = get_exercise_cache(language=language)
        def recurse_nodes(node):

            child_availability = []

            # Do the recursion
            for child in node.get("children", []):
                recurse_nodes(child)
                child_availability.append(child.get("available", False))

            # If child_availability is empty then node has no children so we can determine availability
            if child_availability:
                node["available"] = any(child_availability)
            else:
                # By default this is very charitable, assuming if something has not been annotated
                # it is available.
                if node.get("kind") == "Exercise":
                    cache_node = exercise_cache.get(node.get("id"), {})
                else:
                    cache_node = content_cache.get(node.get("id"), {})
                node["available"] = cache_node.get("available", True)

            # Translate everything for good measure
            with i18n.translate_block(language):
                node["title"] = _(node.get("title", ""))
                node["description"] = _(node.get("description", "")) if node.get("description") else ""

        recurse_nodes(TOPICS[channel][language])
        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP:
            try:
                with open(TOPICS_FILEPATHS.get(channel) + "_" + language + ".cache", "w") as f:
                    json.dump(TOPICS[channel][language], f)
            except IOError as e:
                logging.warn("Annotated topic cache file failed in saving with error {e}".format(e=e))

    return TOPICS[channel][language]
Esempio n. 4
0
def get_topic_tree(force=False, annotate=False, channel=settings.CHANNEL, language=settings.LANGUAGE_CODE):
    global TOPICS, TOPICS_FILEPATHS
    if not TOPICS:
        TOPICS = {}
    if TOPICS.get(channel) is None:
        TOPICS[channel] = {}
    if TOPICS.get(channel, {}).get(language) is None:
        TOPICS[channel][language] = softload_json(TOPICS_FILEPATHS.get(channel), logger=logging.debug, raises=False)

        # Just loaded from disk, so have to restamp.
        annotate = True

    if annotate:
        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP and not force:
            topics = softload_json(TOPICS_FILEPATHS.get(channel) + "_" + language + ".cache", logger=logging.debug, raises=False)
            if topics:
                TOPICS[channel][language] = topics
                return TOPICS[channel][language]

        # Loop through all the nodes in the topic tree
        # and cross reference with the content_cache to check availability.
        content_cache = get_content_cache(language=language)
        exercise_cache = get_exercise_cache(language=language)
        def recurse_nodes(node):

            child_availability = []

            # Do the recursion
            for child in node.get("children", []):
                recurse_nodes(child)
                child_availability.append(child.get("available", False))

            # If child_availability is empty then node has no children so we can determine availability
            if child_availability:
                node["available"] = any(child_availability)
            else:
                # By default this is very charitable, assuming if something has not been annotated
                # it is available.
                if node.get("kind") == "Exercise":
                    cache_node = exercise_cache.get(node.get("id"), {})
                else:
                    cache_node = content_cache.get(node.get("id"), {})
                node["available"] = cache_node.get("available", True)

            # Translate everything for good measure
            with i18n.translate_block(language):
                node["title"] = _(node.get("title", ""))
                node["description"] = _(node.get("description", "")) if node.get("description") else ""

        recurse_nodes(TOPICS[channel][language])
        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP:
            try:
                with open(TOPICS_FILEPATHS.get(channel) + "_" + language + ".cache", "w") as f:
                    json.dump(TOPICS[channel][language], f)
            except IOError as e:
                logging.warn("Annotated topic cache file failed in saving with error {e}".format(e=e))

    return TOPICS[channel][language]
Esempio n. 5
0
def get_topic_tree(force=False, annotate=False, channel=settings.CHANNEL):
    global TOPICS, TOPICS_FILEPATHS
    if not TOPICS:
        TOPICS = {}
    if TOPICS.get(channel) is None:
        TOPICS[channel] = softload_json(TOPICS_FILEPATHS.get(channel),
                                        logger=logging.debug,
                                        raises=False)
        validate_ancestor_ids(
            TOPICS[channel])  # make sure ancestor_ids are set properly

        # Just loaded from disk, so have to restamp.
        annotate = True

    if annotate:
        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP and not force:
            topics = softload_json(TOPICS_FILEPATHS.get(channel) + ".cache",
                                   logger=logging.debug,
                                   raises=False)
            if topics:
                TOPICS[channel] = topics
                return TOPICS[channel]

        # Loop through all the nodes in the topic tree
        # and cross reference with the content_cache to check availability.
        content_cache = get_content_cache()

        def recurse_nodes(node):

            child_availability = []

            # Do the recursion
            for child in node.get("children", []):
                recurse_nodes(child)
                child_availability.append(child.get("available", False))

            # If child_availability is empty then node has no children so we can determine availability
            if child_availability:
                node["available"] = any(child_availability)
            else:
                # By default this is very charitable, assuming if something has not been annotated
                # it is available - needs to be updated for exercises.
                if content_cache.get(node.get("id"),
                                     {}).get("languages", True):
                    node["available"] = True

        recurse_nodes(TOPICS[channel])
        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP:
            try:
                with open(TOPICS_FILEPATHS.get(channel) + ".cache", "w") as f:
                    json.dump(TOPICS[channel], f)
            except IOError as e:
                logging.warn(
                    "Annotated topic cache file failed in saving with error {e}"
                    .format(e=e))

    return TOPICS[channel]
Esempio n. 6
0
def get_annotated_topic_tree(request, lang_code=None):
    call_command("videoscan")  # Could potentially be very slow, blocking request... but at least it's via an API request!

    lang_code = lang_code or request.language      # Get annotations for the current language.
    statusdict = dict(VideoFile.objects.values_list("youtube_id", "percent_complete"))

    return JsonResponse(annotate_topic_tree(softload_json(TOPICS_FILEPATHS.get(settings.CHANNEL), logger=logging.debug, raises=False), statusdict=statusdict, lang_code=lang_code))
Esempio n. 7
0
def update_json(youtube_id, lang_code, downloaded, api_response,
                time_of_attempt):
    """Update language_srt_map to reflect download status

    lang_code in IETF format
    """
    # Open JSON file
    filepath = get_lang_map_filepath(lang_code)
    language_srt_map = softload_json(filepath, logger=logging.error)
    if not language_srt_map:
        return False

    # create updated entry
    entry = language_srt_map[youtube_id]
    entry["downloaded"] = downloaded
    entry["api_response"] = api_response
    entry["last_attempt"] = time_of_attempt
    if api_response == "success":
        entry["last_success"] = time_of_attempt

    # update full-size JSON with new information
    language_srt_map[youtube_id].update(entry)

    # write it to file
    json_file = open(filepath, "wb")
    json_file.write(json.dumps(language_srt_map))
    json_file.close()
    logging.debug("File updated.")

    return True
Esempio n. 8
0
 def test_json_path(self):
     khan_path = os.path.realpath(os.path.join(PROJECT_PATH, "data", "khan"))
     msg = 'Testing access to and format of json files at "%s"...' % khan_path
     self._log(msg)
     fail_count = 0
     for json_file in self.JSON_FILES:
         json_path = os.path.realpath(os.path.join(khan_path, json_file)) + ''
         msg = '\n...checking access to "%s"...' % json_path
         if not self.check_path(json_path, os.R_OK, msg=msg, raise_fail=False, end_chars=""):
             fail_count += 1
         else:
             # Attempt to load .json file to validate format.
             # TODO(cpauya): Check if .json file is large to prevent delays.
             self._log("\n......loading json file...")
             json_content = softload_json(json_path, default=None)
             if json_content is None:
                 msg = "file has invalid json format or is empty..."
                 self._fail(msg, raise_fail=False, end_chars="")
                 fail_count += 1
             else:
                 self._pass(end_chars="")
     if fail_count > 0:
         self._fail("\n...Result: %s json file/s failed test..." % fail_count)
     else:
         self._pass("\n...Result: all json file/s are ok...")
def update_json(youtube_id, lang_code, downloaded, api_response, time_of_attempt):
    """Update language_srt_map to reflect download status

    lang_code in IETF format
    """
    # Open JSON file
    filepath = get_lang_map_filepath(lang_code)
    language_srt_map = softload_json(filepath, logger=logging.error)
    if not language_srt_map:
        return False

    # create updated entry
    entry = language_srt_map[youtube_id]
    entry["downloaded"] = downloaded
    entry["api_response"] = api_response
    entry["last_attempt"] = time_of_attempt
    if api_response == "success":
        entry["last_success"] = time_of_attempt

    # update full-size JSON with new information
    language_srt_map[youtube_id].update(entry)

    # write it to file
    json_file = open(filepath, "wb")
    json_file.write(json.dumps(language_srt_map))
    json_file.close()
    logging.debug("File updated.")

    return True
Esempio n. 10
0
 def test_json_path(self):
     khan_path = os.path.realpath(os.path.join(PROJECT_PATH, "data", "khan"))
     msg = 'Testing access to and format of json files at "%s"...' % khan_path
     self._log(msg)
     fail_count = 0
     for json_file in self.JSON_FILES:
         json_path = os.path.realpath(os.path.join(khan_path, json_file)) + ''
         msg = '\n...checking access to "%s"...' % json_path
         if not self.check_path(json_path, os.R_OK, msg=msg, raise_fail=False, end_chars=""):
             fail_count += 1
         else:
             # Attempt to load .json file to validate format.
             # TODO(cpauya): Check if .json file is large to prevent delays.
             self._log("\n......loading json file...")
             json_content = softload_json(json_path, default=None)
             if json_content is None:
                 msg = "file has invalid json format or is empty..."
                 self._fail(msg, raise_fail=False, end_chars="")
                 fail_count += 1
             else:
                 self._pass(end_chars="")
     if fail_count > 0:
         self._fail("\n...Result: %s json file/s failed test..." % fail_count)
     else:
         self._pass("\n...Result: all json file/s are ok...")
Esempio n. 11
0
def clear_subtitles_cache(lang_codes=None, locale_root=LOCALE_ROOT):
    """
    Language codes will be converted to django format (e.g. en_US)
    """
    lang_codes = lang_codes or get_langs_with_subtitles()
    for lang_code in lang_codes:
        lang_code = lcode_to_ietf(lang_code)

        # Clear the status file
        lm_file = get_lang_map_filepath(lang_code)
        download_status = softload_json(lm_file, raises=True)
        for key in download_status:
            download_status[key] = {
                u'downloaded': False,
                u'last_success': u'',
                u'last_attempt': u'',
                u'api_response': u''
            }
        with open(lm_file, "w") as fp:
            json.dump(download_status, fp)

        # Delete all srt files
        srt_path = get_srt_path(lang_code)
        if os.path.exists(srt_path):
            shutil.rmtree(srt_path)
Esempio n. 12
0
def get_exercise_cache(force=False):
    global EXERCISES, EXERCISES_FILEPATH
    if EXERCISES is None or force:
        EXERCISES = softload_json(EXERCISES_FILEPATH,
                                  logger=logging.debug,
                                  raises=False)

    return EXERCISES
Esempio n. 13
0
def get_assessment_item_cache(force=False):
    global ASSESSMENT_ITEMS, ASSESSMENT_ITEMS_FILEPATH
    if ASSESSMENT_ITEMS is None or force:
        ASSESSMENT_ITEMS = softload_json(ASSESSMENT_ITEMS_FILEPATH,
                                         logger=logging.debug,
                                         raises=False)

    return ASSESSMENT_ITEMS
def write_count_to_json(subtitle_counts, data_path):
    """Write JSON to file in static/data/subtitles/"""
    current_counts = softload_json(SUBTITLE_COUNTS_FILEPATH, logger=logging.error)
    current_counts.update(subtitle_counts)

    logging.debug("Writing fresh srt counts to %s" % SUBTITLE_COUNTS_FILEPATH)
    with open(SUBTITLE_COUNTS_FILEPATH, 'wb') as fp:
        # sort here, so we don't have to sort later when seving to clients
        json.dump(current_counts, fp, sort_keys=True)
Esempio n. 15
0
def get_language_names(lang_code=None):
    """
    Returns dictionary of names (English name, "Native" name)
    for a given language code.
    """
    global LANG_NAMES_MAP
    lang_code = lcode_to_ietf(lang_code)
    if not LANG_NAMES_MAP:
        LANG_NAMES_MAP = softload_json(settings.LANG_LOOKUP_FILEPATH)
    return LANG_NAMES_MAP.get(lang_code) if lang_code else LANG_NAMES_MAP
Esempio n. 16
0
def write_count_to_json(subtitle_counts, data_path):
    """Write JSON to file in static/data/subtitles/"""
    current_counts = softload_json(SUBTITLE_COUNTS_FILEPATH,
                                   logger=logging.error)
    current_counts.update(subtitle_counts)

    logging.debug("Writing fresh srt counts to %s" % SUBTITLE_COUNTS_FILEPATH)
    with open(SUBTITLE_COUNTS_FILEPATH, 'wb') as fp:
        # sort here, so we don't have to sort later when seving to clients
        json.dump(current_counts, fp, sort_keys=True)
Esempio n. 17
0
def _get_installed_language_packs():
    """
    On-disk method to show currently installed languages and meta data.
    """

    # There's always English, but without contents...
    installed_language_packs = [{
        'code': 'en',
        'software_version': SHORTVERSION,
        'language_pack_version':
        0,  # Set to '0', overwritten by ACTUAL content pack
        'percent_translated': 100,
        'subtitle_count': 0,
        'name': 'English',
        'native_name': 'English',
    }]

    # Loop through locale folders
    for locale_dir in settings.LOCALE_PATHS:
        if not os.path.exists(locale_dir):
            continue

        # Loop through folders in each locale dir
        # This is idiotic, it just assumes that every directory / file is
        # a valid language code
        for django_disk_code in os.listdir(locale_dir):

            # Skip if it's a file
            if not os.path.isdir(os.path.join(locale_dir, django_disk_code)):
                continue

            # Inside each folder, read from the JSON file - language name, % UI trans, version number
            try:
                # Get the metadata
                metadata_filepath = os.path.join(
                    locale_dir, django_disk_code,
                    "%s_metadata.json" % lcode_to_ietf(django_disk_code))
                lang_meta = softload_json(metadata_filepath, raises=True)

                logging.debug("Found language pack %s" % (django_disk_code))
            except IOError as e:
                if e.errno == errno.ENOENT:
                    logging.info("Ignoring non-language pack %s in %s" %
                                 (django_disk_code, locale_dir))
                else:
                    logging.error("Error reading %s metadata (%s): %s" %
                                  (django_disk_code, metadata_filepath, e))
                continue

            installed_language_packs.append(lang_meta)

    sorted_list = sorted(installed_language_packs,
                         key=lambda m: m['name'].lower())
    return OrderedDict([(lcode_to_ietf(val["code"]), val)
                        for val in sorted_list])
Esempio n. 18
0
def get_code2lang_map(lang_code=None, force=False):
    """Given a language code, returns metadata about that language."""
    global CODE2LANG_MAP

    if force or not CODE2LANG_MAP:
        lmap = softload_json(settings.LANG_LOOKUP_FILEPATH, logger=logging.debug)

        CODE2LANG_MAP = {}
        for lc, entry in lmap.iteritems():
            CODE2LANG_MAP[lcode_to_ietf(lc)] = entry  # key entries by ieft format

    return CODE2LANG_MAP.get(lcode_to_ietf(lang_code)) if lang_code else CODE2LANG_MAP
Esempio n. 19
0
def get_code2lang_map(lang_code=None, force=False):
    """Given a language code, returns metadata about that language."""
    global CODE2LANG_MAP

    if force or not CODE2LANG_MAP:
        lmap = softload_json(settings.LANG_LOOKUP_FILEPATH, logger=logging.debug)

        CODE2LANG_MAP = {}
        for lc, entry in lmap.iteritems():
            CODE2LANG_MAP[lcode_to_ietf(lc)] = entry  # key entries by ieft format

    return CODE2LANG_MAP.get(lcode_to_ietf(lang_code)) if lang_code else CODE2LANG_MAP
Esempio n. 20
0
def _get_installed_language_packs():
    """
    On-disk method to show currently installed languages and meta data.
    """

    # There's always English...
    installed_language_packs = [
        {
            "code": "en",
            "software_version": VERSION,
            "language_pack_version": 0,
            "percent_translated": 100,
            "subtitle_count": 0,
            "name": "English",
            "native_name": "English",
        }
    ]

    # Loop through locale folders
    for locale_dir in settings.LOCALE_PATHS:
        if not os.path.exists(locale_dir):
            continue

        # Loop through folders in each locale dir
        # This is idiotic, it just assumes that every directory / file is
        # a valid language code
        for django_disk_code in os.listdir(locale_dir):

            # Skip if it's a file
            if not os.path.isdir(os.path.join(locale_dir, django_disk_code)):
                continue

            # Inside each folder, read from the JSON file - language name, % UI trans, version number
            try:
                # Get the metadata
                metadata_filepath = os.path.join(
                    locale_dir, django_disk_code, "%s_metadata.json" % lcode_to_ietf(django_disk_code)
                )
                lang_meta = softload_json(metadata_filepath, raises=True)

                logging.debug("Found language pack %s" % (django_disk_code))
            except IOError as e:
                if e.errno == errno.ENOENT:
                    logging.info("Ignoring non-language pack %s in %s" % (django_disk_code, locale_dir))
                else:
                    logging.error("Error reading %s metadata (%s): %s" % (django_disk_code, metadata_filepath, e))
                continue

            installed_language_packs.append(lang_meta)

    sorted_list = sorted(installed_language_packs, key=lambda m: m["name"].lower())
    return OrderedDict([(lcode_to_ietf(val["code"]), val) for val in sorted_list])
Esempio n. 21
0
def get_code2lang_map(lang_code=None, force=False):
    """
    """
    global CODE2LANG_MAP

    if force or not CODE2LANG_MAP:
        lmap = softload_json(settings.LANG_LOOKUP_FILEPATH, logger=logging.debug)

        CODE2LANG_MAP = {}
        for lc, entry in lmap.iteritems():
            CODE2LANG_MAP[lcode_to_ietf(lc)] = entry

    return CODE2LANG_MAP.get(lang_code) if lang_code else CODE2LANG_MAP
Esempio n. 22
0
    def handle(self, **options):
        logging.info("fetching assessment items")

        channel = options.get("channel")

        json_path = os.path.join(django_settings.CONTENT_DATA_PATH, channel,
                                 'assessmentitems.json')

        # load the assessmentitems
        assessment_items = json.load(open(json_path))

        # delete assessment items that aren't referenced in the exercises list (likely due to blacklisting)
        dangling_ids = set(assessment_items.keys())
        exercises = softload_json(topic_tools_settings.EXERCISES_FILEPATH)
        for ex in exercises.values():
            for item in ex.get("all_assessment_items", []):
                item = json.loads(item)
                if item.get("id") in dangling_ids:
                    dangling_ids.remove(item.get("id"))
        for item_id in dangling_ids:
            del assessment_items[item_id]

        image_urls = find_all_image_urls(assessment_items)
        graphie_urls = find_all_graphie_urls(assessment_items)
        local_urls = find_all_local_urls(assessment_items)

        logging.info("rewriting urls")
        new_assessment_items = localize_all_image_urls(assessment_items)
        new_assessment_items = localize_all_content_links(new_assessment_items)
        new_assessment_items = localize_all_graphie_urls(new_assessment_items)
        new_assessment_items = localize_all_local_urls(new_assessment_items,
                                                       channel=channel)

        # TODO(jamalex): We should migrate this away from direct-to-zip so that we can re-run it
        # without redownloading all files. Not possible currently because ZipFile has no `delete`.
        logging.info("downloading images")
        with open(ZIP_FILE_PATH.format(channel=channel), "w") as f:
            zf = zipfile.ZipFile(
                f, "w"
            )  # zipfile.ZipFile isn't a context manager yet for python 2.6
            write_assessment_item_db_to_zip(zf, new_assessment_items)
            download_urls_to_zip(zf, image_urls)
            download_urls_to_zip(zf, graphie_urls)
            copy_local_files_to_zip(zf, local_urls)
            write_assessment_item_version_to_zip(zf)
            if channel:
                write_channel_info_to_zip(zf, channel=channel)
            zf.close()

        logging.info("Zip File with images placed in %s" %
                     ZIP_FILE_PATH.format(channel=channel))
Esempio n. 23
0
def get_dubbed_video_map(lang_code=None, force=False):
    """
    Stores a key per language.  Value is a dictionary between video_id and (dubbed) youtube_id
    """
    global DUBBED_VIDEO_MAP, DUBBED_VIDEO_MAP_RAW

    if DUBBED_VIDEO_MAP is None or force:
        try:
            if not os.path.exists(settings.DUBBED_VIDEOS_MAPPING_FILEPATH) or force:
                try:
                    # Never call commands that could fail from the distributed server.
                    #   Always create a central server API to abstract things
                    response = requests.get(
                        "%s://%s/api/i18n/videos/dubbed_video_map"
                        % (settings.SECURESYNC_PROTOCOL, settings.CENTRAL_SERVER_HOST)
                    )
                    response.raise_for_status()
                    with open(settings.DUBBED_VIDEOS_MAPPING_FILEPATH, "wb") as fp:
                        fp.write(
                            response.content.decode("utf-8")
                        )  # wait until content has been confirmed before opening file.
                except Exception as e:
                    if not os.path.exists(settings.DUBBED_VIDEOS_MAPPING_FILEPATH):
                        # Unrecoverable error, so raise
                        raise
                    elif DUBBED_VIDEO_MAP:
                        # No need to recover--allow the downstream dude to catch the error.
                        raise
                    else:
                        # We can recover by NOT forcing reload.
                        logging.warn("%s" % e)

            DUBBED_VIDEO_MAP_RAW = softload_json(settings.DUBBED_VIDEOS_MAPPING_FILEPATH, raises=True)
        except Exception as e:
            logging.info("Failed to get dubbed video mappings; defaulting to empty.")
            DUBBED_VIDEO_MAP_RAW = {}  # setting this will avoid triggering reload on every call

        DUBBED_VIDEO_MAP = {}
        for lang_name, video_map in DUBBED_VIDEO_MAP_RAW.iteritems():
            if lang_name:
                logging.debug(
                    "Adding dubbed video map entry for %s (name=%s)" % (get_langcode_map(lang_name), lang_name)
                )
                DUBBED_VIDEO_MAP[get_langcode_map(lang_name)] = video_map

    # Hardcode the Brazilian Portuguese mapping that only the central server knows about
    # TODO(jamalex): BURN IT ALL DOWN!
    if lang_code == "pt-BR":
        lang_code = "pt"

    return DUBBED_VIDEO_MAP.get(lang_code, {}) if lang_code else DUBBED_VIDEO_MAP
Esempio n. 24
0
def get_dubbed_video_map(lang_code=None, reload=None, force=False):
    """
    Stores a key per language.  Value is a dictionary between video_id and (dubbed) youtube_id
    """
    global DUBBED_VIDEO_MAP, DUBBED_VIDEO_MAP_RAW, DUBBED_VIDEOS_MAPPING_FILEPATH

    reload = (reload is None and force) or reload  # default of reload is force

    if DUBBED_VIDEO_MAP is None or reload:
        try:
            if not os.path.exists(DUBBED_VIDEOS_MAPPING_FILEPATH) or force:
                try:
                    # Generate from the spreadsheet
                    logging.debug("Generating dubbed video mappings.")
                    call_command("generate_dubbed_video_mappings", force=force)
                except Exception as e:
                    logging.debug(
                        "Error generating dubbed video mappings: %s" % e)
                    if not os.path.exists(DUBBED_VIDEOS_MAPPING_FILEPATH):
                        # Unrecoverable error, so raise
                        raise
                    elif DUBBED_VIDEO_MAP:
                        # No need to recover--allow the downstream dude to catch the error.
                        raise
                    else:
                        # We can recover by NOT forcing reload.
                        logging.warn("%s" % e)

            DUBBED_VIDEO_MAP_RAW = softload_json(
                DUBBED_VIDEOS_MAPPING_FILEPATH, raises=True)
        except Exception as e:
            logging.info(
                "Failed to get dubbed video mappings (%s); defaulting to empty."
            )
            DUBBED_VIDEO_MAP_RAW = {
            }  # setting this will avoid triggering reload on every call

        # Remove any empty items, as they break things
        if "" in DUBBED_VIDEO_MAP_RAW:
            del DUBBED_VIDEO_MAP_RAW[""]

        DUBBED_VIDEO_MAP = {}
        for lang_name, video_map in DUBBED_VIDEO_MAP_RAW.iteritems():
            if lang_name:
                logging.debug(
                    "Adding dubbed video map entry for %s (name=%s)" %
                    (get_langcode_map(lang_name), lang_name))
                DUBBED_VIDEO_MAP[get_langcode_map(lang_name)] = video_map

    return DUBBED_VIDEO_MAP.get(lang_code,
                                {}) if lang_code else DUBBED_VIDEO_MAP
Esempio n. 25
0
def get_dubbed_video_map(lang_code=None, force=False):
    """
    Stores a key per language.  Value is a dictionary between video_id and (dubbed) youtube_id
    """
    global DUBBED_VIDEO_MAP, DUBBED_VIDEO_MAP_RAW, DUBBED_VIDEOS_MAPPING_FILEPATH

    if DUBBED_VIDEO_MAP is None or force:
        try:
            if not os.path.exists(DUBBED_VIDEOS_MAPPING_FILEPATH) or force:
                try:
                    # Never call commands that could fail from the distributed server.
                    #   Always create a central server API to abstract things
                    response = requests.get(
                        "%s://%s/api/i18n/videos/dubbed_video_map" %
                        (settings.SECURESYNC_PROTOCOL,
                         settings.CENTRAL_SERVER_HOST))
                    response.raise_for_status()
                    with open(DUBBED_VIDEOS_MAPPING_FILEPATH, "wb") as fp:
                        fp.write(
                            response.content.decode('utf-8')
                        )  # wait until content has been confirmed before opening file.
                except Exception as e:
                    if not os.path.exists(DUBBED_VIDEOS_MAPPING_FILEPATH):
                        # Unrecoverable error, so raise
                        raise
                    elif DUBBED_VIDEO_MAP:
                        # No need to recover--allow the downstream dude to catch the error.
                        raise
                    else:
                        # We can recover by NOT forcing reload.
                        logging.warn("%s" % e)

            DUBBED_VIDEO_MAP_RAW = softload_json(
                DUBBED_VIDEOS_MAPPING_FILEPATH, raises=True)
        except Exception as e:
            logging.info(
                "Failed to get dubbed video mappings; defaulting to empty.")
            DUBBED_VIDEO_MAP_RAW = {
            }  # setting this will avoid triggering reload on every call

        DUBBED_VIDEO_MAP = {}
        for lang_name, video_map in DUBBED_VIDEO_MAP_RAW.iteritems():
            if lang_name:
                logging.debug(
                    "Adding dubbed video map entry for %s (name=%s)" %
                    (get_langcode_map(lang_name), lang_name))
                DUBBED_VIDEO_MAP[get_langcode_map(lang_name)] = video_map

    return DUBBED_VIDEO_MAP.get(lang_code,
                                {}) if lang_code else DUBBED_VIDEO_MAP
Esempio n. 26
0
def get_annotated_topic_tree(request, lang_code=None):
    call_command(
        "videoscan"
    )  # Could potentially be very slow, blocking request... but at least it's via an API request!

    lang_code = lang_code or request.language  # Get annotations for the current language.
    statusdict = dict(
        VideoFile.objects.values_list("youtube_id", "percent_complete"))

    return JsonResponse(
        annotate_topic_tree(softload_json(TOPICS_FILEPATHS.get(CHANNEL),
                                          logger=logging.debug,
                                          raises=False),
                            statusdict=statusdict,
                            lang_code=lang_code))
Esempio n. 27
0
def _get_installed_language_packs():
    """
    On-disk method to show currently installed languages and meta data.
    """

    # There's always English...
    installed_language_packs = [{
        'code': 'en',
        'software_version': VERSION,
        'language_pack_version': 0,
        'percent_translated': 100,
        'subtitle_count': 0,
        'name': 'English',
        'native_name': 'English',
    }]

    # Loop through locale folders
    for locale_dir in settings.LOCALE_PATHS:
        if not os.path.exists(locale_dir):
            continue

        # Loop through folders in each locale dir
        for django_disk_code in os.listdir(locale_dir):

            # Inside each folder, read from the JSON file - language name, % UI trans, version number
            try:
                # Get the metadata
                metadata_filepath = os.path.join(
                    locale_dir, django_disk_code,
                    "%s_metadata.json" % lcode_to_ietf(django_disk_code))
                lang_meta = softload_json(metadata_filepath, raises=True)

                logging.debug("Found language pack %s" % (django_disk_code))
            except Exception as e:
                if isinstance(e, IOError) and e.errno == 2:
                    logging.info("Ignoring non-language pack %s in %s" %
                                 (django_disk_code, locale_dir))
                else:
                    logging.error("Error reading %s metadata (%s): %s" %
                                  (django_disk_code, metadata_filepath, e))
                continue

            installed_language_packs.append(lang_meta)

    sorted_list = sorted(installed_language_packs,
                         key=lambda m: m['name'].lower())
    return OrderedDict([(lcode_to_ietf(val["code"]), val)
                        for val in sorted_list])
Esempio n. 28
0
def get_topic_tree(force=False, props=None):
    global TOPICS, TOPICS_FILEPATH
    if TOPICS is None or force:
        TOPICS = softload_json(TOPICS_FILEPATH, logger=logging.debug, raises=True)
        validate_ancestor_ids(TOPICS)  # make sure ancestor_ids are set properly

        # Limit the memory footprint by unloading particular values
        if props:
            node_cache = get_node_cache()
            for kind, list_by_kind in node_cache.iteritems():
                for node_list in list_by_kind.values():
                    for node in node_list:
                        for att in node.keys():
                            if att not in props:
                                del node[att]
    return TOPICS
    def handle(self, **options):
        logging.info("fetching assessment items")

        channel = options.get("channel")

        json_path = os.path.join(django_settings.CONTENT_DATA_PATH, channel, 'assessmentitems.json')

        # load the assessmentitems
        assessment_items = json.load(open(json_path))

        # delete assessment items that aren't referenced in the exercises list (likely due to blacklisting)
        dangling_ids = set(assessment_items.keys())
        exercises = softload_json(topic_tools_settings.EXERCISES_FILEPATH)
        for ex in exercises.values():
            for item in ex.get("all_assessment_items", []):
                item = json.loads(item)
                if item.get("id") in dangling_ids:
                    dangling_ids.remove(item.get("id"))
        for item_id in dangling_ids:
            del assessment_items[item_id]

        image_urls = find_all_image_urls(assessment_items)
        graphie_urls = find_all_graphie_urls(assessment_items)
        local_urls = find_all_local_urls(assessment_items)

        logging.info("rewriting urls")
        new_assessment_items = localize_all_image_urls(assessment_items)
        new_assessment_items = localize_all_content_links(new_assessment_items)
        new_assessment_items = localize_all_graphie_urls(new_assessment_items)
        new_assessment_items = localize_all_local_urls(new_assessment_items, channel=channel)

        # TODO(jamalex): We should migrate this away from direct-to-zip so that we can re-run it
        # without redownloading all files. Not possible currently because ZipFile has no `delete`.
        logging.info("downloading images")
        with open(ZIP_FILE_PATH.format(channel=channel), "w") as f:
            zf = zipfile.ZipFile(f, "w")  # zipfile.ZipFile isn't a context manager yet for python 2.6
            write_assessment_item_db_to_zip(zf, new_assessment_items)
            download_urls_to_zip(zf, image_urls)
            download_urls_to_zip(zf, graphie_urls)
            copy_local_files_to_zip(zf, local_urls)
            write_assessment_item_version_to_zip(zf)
            if channel:
                write_channel_info_to_zip(zf, channel=channel)
            zf.close()

        logging.info("Zip File with images placed in %s" % ZIP_FILE_PATH.format(channel=channel))
Esempio n. 30
0
def get_topic_tree(force=False, props=None):
    global TOPICS, TOPICS_FILEPATH
    if TOPICS is None or force:
        TOPICS = softload_json(TOPICS_FILEPATH,
                               logger=logging.debug,
                               raises=True)
        validate_ancestor_ids(
            TOPICS)  # make sure ancestor_ids are set properly

        # Limit the memory footprint by unloading particular values
        if props:
            node_cache = get_node_cache()
            for kind, list_by_kind in node_cache.iteritems():
                for node_list in list_by_kind.values():
                    for node in node_list:
                        for att in node.keys():
                            if att not in props:
                                del node[att]
    return TOPICS
Esempio n. 31
0
def get_dubbed_video_map(lang_code=None, reload=None, force=False):
    """
    Stores a key per language.  Value is a dictionary between video_id and (dubbed) youtube_id
    """
    global DUBBED_VIDEO_MAP, DUBBED_VIDEO_MAP_RAW, DUBBED_VIDEOS_MAPPING_FILEPATH

    reload = (reload is None and force) or reload  # default of reload is force

    if DUBBED_VIDEO_MAP is None or reload:
        try:
            if not os.path.exists(DUBBED_VIDEOS_MAPPING_FILEPATH) or force:
                try:
                    # Generate from the spreadsheet
                    logging.debug("Generating dubbed video mappings.")
                    call_command("generate_dubbed_video_mappings", force=force)
                except Exception as e:
                    logging.debug("Error generating dubbed video mappings: %s" % e)
                    if not os.path.exists(DUBBED_VIDEOS_MAPPING_FILEPATH):
                        # Unrecoverable error, so raise
                        raise
                    elif DUBBED_VIDEO_MAP:
                        # No need to recover--allow the downstream dude to catch the error.
                        raise
                    else:
                        # We can recover by NOT forcing reload.
                        logging.warn("%s" % e)

            DUBBED_VIDEO_MAP_RAW = softload_json(DUBBED_VIDEOS_MAPPING_FILEPATH, raises=True)
        except Exception as e:
            logging.info("Failed to get dubbed video mappings (%s); defaulting to empty.")
            DUBBED_VIDEO_MAP_RAW = {}  # setting this will avoid triggering reload on every call

        # Remove any empty items, as they break things
        if "" in DUBBED_VIDEO_MAP_RAW:
            del DUBBED_VIDEO_MAP_RAW[""]

        DUBBED_VIDEO_MAP = {}
        for lang_name, video_map in DUBBED_VIDEO_MAP_RAW.iteritems():
            if lang_name:
                logging.debug("Adding dubbed video map entry for %s (name=%s)" % (get_langcode_map(lang_name), lang_name))
                DUBBED_VIDEO_MAP[get_langcode_map(lang_name)] = video_map

    return DUBBED_VIDEO_MAP.get(lang_code, {}) if lang_code else DUBBED_VIDEO_MAP
Esempio n. 32
0
def get_dubbed_video_map(lang_code=None, force=False):
    """
    Stores a key per language.  Value is a dictionary between video_id and (dubbed) youtube_id
    """
    global DUBBED_VIDEO_MAP, DUBBED_VIDEO_MAP_RAW, DUBBED_VIDEOS_MAPPING_FILEPATH

    if DUBBED_VIDEO_MAP is None or force:
        try:
            if not os.path.exists(DUBBED_VIDEOS_MAPPING_FILEPATH) or force:
                try:
                    if settings.CENTRAL_SERVER:
                        # Never call commands that could fail from the distributed server.
                        #   Always create a central server API to abstract things (see below)
                        logging.debug("Generating dubbed video mappings.")
                        call_command("generate_dubbed_video_mappings", force=force)
                    else:
                        # Generate from the spreadsheet
                        response = requests.get("http://%s/api/i18n/videos/dubbed_video_map" % (settings.CENTRAL_SERVER_HOST))
                        response.raise_for_status()
                        with open(DUBBED_VIDEOS_MAPPING_FILEPATH, "wb") as fp:
                            fp.write(response.content.decode('utf-8'))  # wait until content has been confirmed before opening file.
                except Exception as e:
                    if not os.path.exists(DUBBED_VIDEOS_MAPPING_FILEPATH):
                        # Unrecoverable error, so raise
                        raise
                    elif DUBBED_VIDEO_MAP:
                        # No need to recover--allow the downstream dude to catch the error.
                        raise
                    else:
                        # We can recover by NOT forcing reload.
                        logging.warn("%s" % e)

            DUBBED_VIDEO_MAP_RAW = softload_json(DUBBED_VIDEOS_MAPPING_FILEPATH, raises=True)
        except Exception as e:
            logging.info("Failed to get dubbed video mappings; defaulting to empty.")
            DUBBED_VIDEO_MAP_RAW = {}  # setting this will avoid triggering reload on every call

        DUBBED_VIDEO_MAP = {}
        for lang_name, video_map in DUBBED_VIDEO_MAP_RAW.iteritems():
            logging.debug("Adding dubbed video map entry for %s (name=%s)" % (get_langcode_map(lang_name), lang_name))
            DUBBED_VIDEO_MAP[get_langcode_map(lang_name)] = video_map

    return DUBBED_VIDEO_MAP.get(lang_code, {}) if lang_code else DUBBED_VIDEO_MAP
Esempio n. 33
0
def _get_installed_language_packs():
    """
    On-disk method to show currently installed languages and meta data.
    """

    # There's always English...
    installed_language_packs = [{
        'code': 'en',
        'software_version': VERSION,
        'language_pack_version': 0,
        'percent_translated': 100,
        'subtitle_count': 0,
        'name': 'English',
        'native_name': 'English',
    }]

    # Loop through locale folders
    for locale_dir in settings.LOCALE_PATHS:
        if not os.path.exists(locale_dir):
            continue

        # Loop through folders in each locale dir
        for django_disk_code in os.listdir(locale_dir):

            # Inside each folder, read from the JSON file - language name, % UI trans, version number
            try:
                # Get the metadata
                metadata_filepath = os.path.join(locale_dir, django_disk_code, "%s_metadata.json" % lcode_to_ietf(django_disk_code))
                lang_meta = softload_json(metadata_filepath, raises=True)

                logging.debug("Found language pack %s" % (django_disk_code))
            except Exception as e:
                if isinstance(e, IOError) and e.errno == 2:
                    logging.info("Ignoring non-language pack %s in %s" % (django_disk_code, locale_dir))
                else:
                    logging.error("Error reading %s metadata (%s): %s" % (django_disk_code, metadata_filepath, e))
                continue

            installed_language_packs.append(lang_meta)

    sorted_list = sorted(installed_language_packs, key=lambda m: m['name'].lower())
    return OrderedDict([(lcode_to_ietf(val["code"]), val) for val in sorted_list])
def clear_subtitles_cache(lang_codes=None, locale_root=LOCALE_ROOT):
    """
    Language codes will be converted to django format (e.g. en_US)
    """
    lang_codes = lang_codes or get_langs_with_subtitles()
    for lang_code in lang_codes:
        lang_code = lcode_to_ietf(lang_code)

        # Clear the status file
        lm_file = get_lang_map_filepath(lang_code)
        download_status = softload_json(lm_file, raises=True)
        for key in download_status:
            download_status[key] = {u'downloaded': False, u'last_success': u'', u'last_attempt': u'', u'api_response': u''}
        with open(lm_file, "w") as fp:
            json.dump(download_status, fp)

        # Delete all srt files
        srt_path = get_srt_path(lang_code)
        if os.path.exists(srt_path):
            shutil.rmtree(srt_path)
Esempio n. 35
0
def download_subtitle(youtube_id, lang_code, format="srt"):
    """
    Return subtitles for YouTube ID in language specified. Return False if they do not exist. Update local JSON accordingly.

    Note: srt map deals with amara, so uses lower-cased ietf codes (e.g. en-us)
    """
    assert format == "srt", "We only support srt download at the moment."

    # srt map deals with amara, so uses ietf codes (e.g. en-us)
    api_info_map = softload_json(SRTS_JSON_FILEPATH, raises=True)

    # get amara id
    amara_code = api_info_map.get(youtube_id, {}).get("amara_code")

    # make request
    # Please see http://amara.readthedocs.org/en/latest/api.html
    base_url = "https://amara.org/api2/partners/videos"

    resp = make_request(
        AMARA_HEADERS, "%s/%s/languages/%s/subtitles/?format=srt" % (
            base_url,
            amara_code,
            lang_code.lower(),
        ))
    if isinstance(resp, basestring):
        return resp
    else:
        # return the subtitle text, replacing empty subtitle lines with
        # spaces to make the FLV player happy
        try:
            resp.encoding = "UTF-8"
            response = (resp.text or u"") \
                .replace("\n\n\n", "\n   \n\n") \
                .replace("\r\n\r\n\r\n", "\r\n   \r\n\r\n")
        except Exception as e:
            logging.error(e)
            response = "client-error"
        return response
    def handle(self, assessment_items_filepath, database_path, bulk_create, *args, **kwargs):

        database_alias = "assessment_items"

        # temporarily swap out the database path for the desired target
        database_path = database_path or connections.databases[database_alias]['NAME']
        temp_db_path, connections.databases[database_alias]['NAME'] = connections.databases[database_alias]['NAME'], database_path

        if bulk_create and os.path.isfile(database_path):
            os.remove(database_path)

        call_command("syncdb", interactive=False, database=database_alias)

        raw_items = softload_json(assessment_items_filepath, logger=logging.debug, raises=False)
        if bulk_create:
            items = [AssessmentItem(id=k, item_data=v["item_data"], author_names=v["author_names"]) for k, v in raw_items.items()]
            AssessmentItem.objects.bulk_create(items)
        else:
            for k, v in raw_items.iteritems():
                AssessmentItem.objects.using(database_alias).get_or_create(id=k, defaults={"item_data": v["item_data"], "author_names": v["author_names"]})

        # revert the database path to the original path
        connections.databases[database_alias]['NAME'] = temp_db_path
    def handle(self, *args, **options):
        if settings.CENTRAL_SERVER:
            raise CommandError("Run this command on the distributed server only.")

        # Load videos
        video_sizes = softload_json(REMOTE_VIDEO_SIZE_FILEPATH, logger=logging.debug)

        # Query current files
        all_video_filepaths = glob.glob(os.path.join(settings.CONTENT_ROOT, "*.mp4"))
        logging.info("Querying sizes for %d video(s)." % len(all_video_filepaths))

        # Get all current sizes
        for video_filepath in all_video_filepaths:
            youtube_id = os.path.splitext(os.path.basename(video_filepath))[0]
            # Set to max, so that local compressed videos will not affect things.
            video_sizes[youtube_id] = max(video_sizes.get(youtube_id, 0), os.path.getsize(video_filepath))

        # Sort results
        video_sizes = OrderedDict([(key, video_sizes[key]) for key in sorted(video_sizes.keys())])

        logging.info("Saving results to disk.")
        ensure_dir(os.path.dirname(REMOTE_VIDEO_SIZE_FILEPATH))
        with open(REMOTE_VIDEO_SIZE_FILEPATH, "w") as fp:
            json.dump(video_sizes, fp, indent=2)
Esempio n. 38
0
    def handle(self, *args, **options):
        if settings.CENTRAL_SERVER:
            raise CommandError("Run this command on the distributed server only.")

        # Load videos
        video_sizes = softload_json(REMOTE_VIDEO_SIZE_FILEPATH, logger=logging.debug)

        # Query current files
        all_video_filepaths = glob.glob(os.path.join(settings.CONTENT_ROOT, "*.mp4"))
        logging.info("Querying sizes for %d video(s)." % len(all_video_filepaths))

        # Get all current sizes
        for video_filepath in all_video_filepaths:
            youtube_id = os.path.splitext(os.path.basename(video_filepath))[0]
            # Set to max, so that local compressed videos will not affect things.
            video_sizes[youtube_id] = max(video_sizes.get(youtube_id, 0), os.path.getsize(video_filepath))

        # Sort results
        video_sizes = OrderedDict([(key, video_sizes[key]) for key in sorted(video_sizes.keys())])

        logging.info("Saving results to disk.")
        ensure_dir(os.path.dirname(REMOTE_VIDEO_SIZE_FILEPATH))
        with open(REMOTE_VIDEO_SIZE_FILEPATH, "w") as fp:
            json.dump(video_sizes, fp, indent=2)
def download_subtitle(youtube_id, lang_code, format="srt"):
    """
    Return subtitles for YouTube ID in language specified. Return False if they do not exist. Update local JSON accordingly.

    Note: srt map deals with amara, so uses lower-cased ietf codes (e.g. en-us)
    """
    assert format == "srt", "We only support srt download at the moment."


    # srt map deals with amara, so uses ietf codes (e.g. en-us)
    api_info_map = softload_json(SRTS_JSON_FILEPATH, raises=True)

    # get amara id
    amara_code = api_info_map.get(youtube_id, {}).get("amara_code")

    # make request
    # Please see http://amara.readthedocs.org/en/latest/api.html
    base_url = "https://amara.org/api2/partners/videos"

    resp = make_request(AMARA_HEADERS, "%s/%s/languages/%s/subtitles/?format=srt" % (
        base_url, amara_code, lang_code.lower(),
    ))
    if isinstance(resp, basestring):
        return resp
    else:
        # return the subtitle text, replacing empty subtitle lines with
        # spaces to make the FLV player happy
        try:
            resp.encoding = "UTF-8"
            response = (resp.text or u"") \
                .replace("\n\n\n", "\n   \n\n") \
                .replace("\r\n\r\n\r\n", "\r\n   \r\n\r\n")
        except Exception as e:
            logging.error(e)
            response = "client-error"
        return response
Esempio n. 40
0
def get_all_remote_video_sizes():
    global REMOTE_VIDEO_SIZES
    if REMOTE_VIDEO_SIZES is None:
        REMOTE_VIDEO_SIZES = softload_json(REMOTE_VIDEO_SIZE_FILEPATH, logger=logging.debug)
    return REMOTE_VIDEO_SIZES
Esempio n. 41
0
def get_remote_video_size(youtube_id, default=AVERAGE_VIDEO_SIZE, force=False):
    global REMOTE_VIDEO_SIZES
    if REMOTE_VIDEO_SIZES is None:
        REMOTE_VIDEO_SIZES = softload_json(REMOTE_VIDEO_SIZE_FILEPATH, logger=logging.debug)
    return REMOTE_VIDEO_SIZES.get(youtube_id, default)
def create_all_mappings(force=False,
                        frequency_to_save=100,
                        response_to_check=None,
                        date_to_check=None,
                        map_file=SRTS_JSON_FILEPATH):
    """
    Write or update JSON file that maps from YouTube ID to Amara code and languages available.

    This command updates the json file that records what languages videos have been subtitled in.
    It loops through all video ids, records a list of which languages Amara says it has been subtitled in
    and meta data about the request (e.g. date, response code).

    See the schema in the docstring for fcn update_video_entry.
    """
    youtube_ids = get_slug2id_map().values()

    # Initialize the data
    if not os.path.exists(map_file):
        ensure_dir(os.path.dirname(map_file))
        if not settings.DEBUG:
            raise CommandError(
                "TRUE central server's srts dict should never be empty; where is your %s?"
                % map_file)
        else:
            # Pull it from the central server
            try:
                logging.debug(
                    "Fetching central server's srt availability file.")
                resp = requests.get(
                    "http://kalite.learningequality.org:7007/media/testing/%s"
                    % (os.path.basename(map_file)))
                resp.raise_for_status()
                with open(map_file, "w") as fp:
                    fp.write(resp.content)
                srts_dict = json.loads(resp.content)
            except Exception as e:
                logging.error(
                    "Failed to download TRUE central server's srts availability file: %s"
                    % e)
                srts_dict = {}

    else:
        # Open the file, read, and clean out old videos.
        #   only handle the error if force=True.
        #   Otherwise, these data are too valuable to lose, so just assume a temp problem.
        srts_dict = softload_json(map_file,
                                  raises=not force,
                                  logger=logging.error)
        if srts_dict:
            logging.info("Loaded %d mappings." % (len(srts_dict)))

        # Set of videos no longer used by KA Lite
        removed_videos = set(srts_dict.keys()) - set(youtube_ids)
        if removed_videos:
            logging.info(
                "Removing subtitle information for %d videos (no longer used)."
                % len(removed_videos))
            for vid in removed_videos:
                del srts_dict[vid]
    logging.info("Querying %d mappings." %
                 (len(youtube_ids) -
                  (0 if (force or date_to_check) else len(srts_dict))))

    # Once we have the current mapping, proceed through logic to update the mapping
    n_refreshed = 0  # keep track to avoid writing if nothing's been refreshed.
    n_new_entries = 0  # keep track for reporting
    n_failures = 0  # keep track for reporting
    for youtube_id in youtube_ids:

        # Decide whether or not to update this video based on the arguments provided at the command line
        cached = youtube_id in srts_dict
        if not force and cached:

            # First, check against date
            flag_for_refresh = True  # not (response_code or last_attempt)
            last_attempt = srts_dict[youtube_id].get("last_attempt")
            last_attempt = None if not last_attempt else datetime.datetime.strptime(
                last_attempt, '%Y-%m-%d')
            flag_for_refresh = flag_for_refresh and (
                not date_to_check or date_to_check > last_attempt)
            if not flag_for_refresh:
                logging.debug("Skipping %s for date-check" % youtube_id)
                continue

            # Second, check against response code
            response_code = srts_dict[youtube_id].get("api_response")
            flag_for_refresh = flag_for_refresh and (
                not response_to_check or response_to_check == "all"
                or response_to_check == response_code)
            if not (flag_for_refresh):
                logging.debug("Skipping %s for response-code" % youtube_id)
                continue
            if not response_to_check and not date_to_check and cached:  # no flags specified and already cached - skip
                logging.debug(
                    "Skipping %s for already-cached and no flags specified" %
                    youtube_id)
                continue

        # We're gonna check; just report the reason why.
        if force and not cached:
            logging.debug(
                "Updating %s because force flag (-f) given and video not cached."
                % youtube_id)
        elif force and cached:
            logging.debug(
                "Updating %s because force flag (-f) given. Video was previously cached."
                % youtube_id)
        else:
            logging.debug(
                "Updating %s because video subtitles metadata not yet cached."
                % youtube_id)

        # If it makes it to here without hitting a continue, then update the entry

        try:
            srts_dict[youtube_id] = update_video_entry(youtube_id,
                                                       entry=srts_dict.get(
                                                           youtube_id, {}))
            n_refreshed += 1
        except Exception as e:
            logging.warn("Error updating video %s: %s" % (youtube_id, e))
            n_failures += 1
            continue

        if n_new_entries % frequency_to_save == 0:
            logging.info("On loop %d dumping dictionary into %s" %
                         (n_new_entries, map_file))
            with open(map_file, 'wb') as fp:
                json.dump(srts_dict, fp)
        n_new_entries += 1

    # Finished the loop: save and report
    if n_refreshed > 0:
        with open(map_file, 'wb') as fp:
            json.dump(srts_dict, fp)
    if n_failures == 0:
        logging.info(
            "Great success! Added %d entries, updated %d entries, of %d total."
            % (n_new_entries, n_refreshed, len(srts_dict)))
    else:
        logging.warn(
            "Stored %d new entries, refreshed %d entries, but with %s failures, of %d total."
            % (n_new_entries, n_refreshed, n_failures, len(srts_dict)))

    return n_refreshed != 0
Esempio n. 43
0
    def handle(self, *args, **options):
        if len(args) != 1:
            raise CommandError("Takes exactly 1 argument")

        dest_file = os.path.abspath(args[0])

        logger.info("Starting up KA Lite export2zim command")
        beginning = datetime.now()
        logger.info("Begin: {}".format(beginning))

        language = options.get('language')
        if not language:
            raise CommandError("Must specify a language!")

        if not options.get('tmp_dir'):
            tmp_dir = os.path.join(tempfile.gettempdir(),
                                   'ka-lite-zim_{}'.format(language))
        else:
            tmp_dir = options.get('tmp_dir')

        tmp_dir = os.path.abspath(tmp_dir)

        if os.path.exists(tmp_dir) and os.listdir(tmp_dir):
            if options['clear']:
                logger.info("Clearing directory {}".format(tmp_dir))
                shutil.rmtree(tmp_dir)
            elif options['resume']:
                logger.info(
                    "Resuming in dirty tmp directory {}".format(tmp_dir))
            else:
                raise CommandError(
                    "{} not empty, use the -c option to clean it, -r to resume, or use an empty destination directory."
                    .format(tmp_dir))

        zimwriterfs = options.get("zimwriterfs", None)
        publisher = options.get("publisher")
        transcode2webm = options.get("transcode2webm")
        ffmpeg = find_executable("ffmpeg")

        if not ffmpeg:
            logger.warning(
                "FFMpeg not found in your path, you won't be able to create missing thumbnails or transcode to webm."
            )

        if not zimwriterfs:
            zimwriterfs = find_executable("zimwriterfs")
            if not zimwriterfs:
                raise CommandError(
                    "Could not find zimwriterfs in your path, try specifying --zimwriterfs=/path"
                )

        if not os.path.exists(zimwriterfs):
            raise CommandError("Invalid --zimwriterfs")

        from kalite_zim import __name__ as base_path
        base_path = os.path.abspath(base_path)
        data_path = os.path.join(base_path, 'data')

        # Where subtitles are found in KA Lite
        subtitle_src_dir = i18n.get_srt_path(language)

        logger.info("Will export videos for language: {}".format(language))
        logger.info("Preparing KA Lite topic tree...")

        # Use live data
        if not options.get('test'):
            # This way of doing things will be deprecated in KA Lite 0.16
            topic_tree_json_path = topic_tools_settings.TOPICS_FILEPATHS.get(
                'khan')
            content_cache = get_content_cache(language=language, annotate=True)
            exercise_cache = get_exercise_cache(language=language)
        # Use test data
        else:
            topic_tree_json_path = os.path.join(data_path, 'test_topics.json')
            content_cache = json.load(
                open(os.path.join(data_path, 'test_content.json')))
            exercise_cache = json.load(
                open(os.path.join(data_path, 'test_exercise.json')))

        topic_tree = softload_json(topic_tree_json_path,
                                   logger=logger.debug,
                                   raises=False)

        content_json_output = {}
        exercise_json_output = {}

        def annotate_tree(topic, depth=0, parent=None):
            """
            We need to recurse into the tree in order to annotate elements
            with topic data and exercise data
            """
            children = topic.get('children', [])
            new_children = []
            for child_topic in children:
                if child_topic.get("kind") in ("Video", "Topic"):
                    annotate_tree(child_topic, depth=depth + 1, parent=topic)
                    new_children.append(child_topic)
            topic["children"] = new_children
            if topic.get("kind") == "Exercise":
                topic['exercise'] = exercise_cache.get(topic.get("id"), {})
                exercise_json_output[topic.get("id")] = topic['exercise']
            elif topic.get("kind") == "Topic":
                pass
            else:
                topic['exercise'] = None
                topic['content'] = content_cache.get(topic.get("id"), {})
                content_json_output[topic.get("id")] = topic['content']
                if not topic['content']:
                    logger.error('No content!?, id is: {}'.format(
                        topic.get('id')))

            # Translate everything for good measure
            with i18n.translate_block(language):
                topic["title"] = _(topic.get("title", ""))
                topic["description"] = _(topic.get(
                    "description", "")) if topic.get("description") else ""

            topic["url"] = topic["id"] + ".html"
            topic["parent"] = parent
            topic["depth"] = depth
            for key in ("child_data", "keywords", "hide", "contains"):
                topic.pop(key, None)

        # 1. Annotate a topic tree
        annotate_tree(topic_tree)

        # 2. Now go through the tree and copy each element into the destination
        # zim file system

        def copy_media(node):
            if node['kind'] == 'Topic':
                # Don't do anything if it's a topic
                pass
            elif node['kind'] == 'Exercise':
                # Exercises cannot be displayed
                node["content"]["available"] = False
            elif node['kind'] == 'Video':

                if node['content']['format'] == "webm":
                    logger.warning(
                        "Found a duplicate ID for {}, re-downloading".format(
                            node['id']))
                    node['content']['format'] = "mp4"

                # Available is False by default until we locate the file
                node["content"]["available"] = False
                node_dir = os.path.join(tmp_dir, node["path"])
                if not os.path.exists(node_dir):
                    os.makedirs(node_dir)
                video_file_name = node['id'] + '.' + node['content']['format']
                thumb_file_name = node['id'] + '.png'
                video_file_src = os.path.join(CONTENT_ROOT, video_file_name)
                video_file_dest = os.path.join(node_dir, video_file_name)
                thumb_file_src = os.path.join(CONTENT_ROOT, thumb_file_name)
                thumb_file_dest = os.path.join(node_dir, thumb_file_name)

                if options['download'] and not os.path.exists(video_file_src):
                    logger.info("Video file being downloaded to: {}".format(
                        video_file_src))
                    download_video(
                        node['content']['youtube_id'],
                        node['content']['format'],
                        CONTENT_ROOT,
                    )

                if os.path.exists(video_file_src):
                    if transcode2webm:
                        ffmpeg_pass_log = "/tmp/logfile_vp8.fpf"
                        if os.path.isfile(ffmpeg_pass_log):
                            os.unlink(ffmpeg_pass_log)
                        video_file_name = node['id'] + '.webm'
                        video_file_dest = os.path.join(node_dir,
                                                       video_file_name)
                        if os.path.isfile(video_file_dest):
                            logger.info(
                                "Already encoded: {}".format(video_file_dest))
                        else:
                            ffmpeg_base_args = [
                                ffmpeg,
                                "-i",
                                video_file_src,
                                "-codec:v",
                                "libvpx",
                                "-quality",
                                "best",
                                "-cpu-used",
                                "0",
                                "-b:v",
                                "300k",
                                "-qmin",
                                "10",  # 10=lowest value
                                "-qmax",
                                "35",  # 42=highest value
                                "-maxrate",
                                "300k",
                                "-bufsize",
                                "600k",
                                "-threads",
                                "8",
                                # "-vf", "scale=-1",
                                "-codec:a",
                                "libvorbis",
                                # "-b:a", "128k",
                                "-aq",
                                "5",
                                "-f",
                                "webm",
                            ]
                            ffmpeg_pass1 = ffmpeg_base_args + [
                                "-an",  # Disables audio, no effect first pass
                                "-pass",
                                "1",
                                "-passlogfile",
                                ffmpeg_pass_log,
                                video_file_dest,
                            ]
                            ffmpeg_pass2 = ffmpeg_base_args + [
                                "-pass",
                                "2",
                                "-y",
                                "-passlogfile",
                                ffmpeg_pass_log,
                                video_file_dest,
                            ]
                            for cmd in (ffmpeg_pass1, ffmpeg_pass2):
                                process = subprocess.Popen(
                                    cmd, stdout=subprocess.PIPE)
                                stdout_data, _stderr_data = process.communicate(
                                )
                                if process.returncode != 0:
                                    logger.error(
                                        "Error invoking ffmpeg: {}".format(
                                            (_stderr_data or "") +
                                            (stdout_data or "")))
                                    logger.error("Command was: {}".format(
                                        " ".join(cmd)))
                                    raise CommandError(
                                        "Could not complete transcoding")
                        node['content']['format'] = "webm"
                    else:
                        # If not transcoding, just link the original file
                        os.link(video_file_src, video_file_dest)
                    node["video_url"] = os.path.join(node["path"],
                                                     video_file_name)
                    copy_media.videos_found += 1
                    logger.info("Videos processed: {}".format(
                        copy_media.videos_found))
                    node["content"]["available"] = True

                    # Create thumbnail if it wasn't downloaded
                    if not os.path.exists(thumb_file_src):
                        fp = create_thumbnail(video_file_src,
                                              output_format="png")
                        if fp is None:
                            logger.error(
                                "Failed to create thumbnail for {}".format(
                                    video_file_src))
                        else:
                            logger.info(
                                "Successfully created thumbnail for {}".format(
                                    video_file_src))
                            file(thumb_file_src, 'wb').write(fp.read())

                    # Handle thumbnail
                    if os.path.exists(thumb_file_src):
                        node["thumbnail_url"] = os.path.join(
                            node["path"], node['id'] + '.png')
                        if not os.path.exists(thumb_file_dest):
                            os.link(thumb_file_src, thumb_file_dest)
                    else:
                        node["thumbnail_url"] = None

                    subtitle_srt = os.path.join(subtitle_src_dir,
                                                node['id'] + '.srt')
                    if os.path.isfile(subtitle_srt):
                        subtitle_vtt = os.path.join(node_dir,
                                                    node['id'] + '.vtt')
                        # Convert to .vtt because this format is understood
                        # by latest video.js and the old ones that read
                        # .srt don't work with newer jquery etc.
                        submarine_parser(subtitle_srt, subtitle_vtt)
                        if not os.path.exists(subtitle_vtt):
                            logger.warning("Subtitle not converted: {}".format(
                                subtitle_srt))
                        else:
                            logger.info(
                                "Subtitle convert from SRT to VTT: {}".format(
                                    subtitle_vtt))
                            node["subtitle_url"] = os.path.join(
                                node["path"], node['id'] + '.vtt')

                else:
                    if options['download']:
                        logger.error("File not found or downloaded: {}".format(
                            video_file_src))
            else:
                logger.error("Invalid node, kind: {}".format(
                    node.get("kind", None)))
                # Exercises cannot be displayed
                node["content"] = {"available": False}

            new_children = []
            for child in node.get('children', []):
                copy_media(child)
                empty_topic = child["kind"] == "Topic" and not child.get(
                    "children", [])
                unavailable_video = child["kind"] == "Video" and not child.get(
                    "content", {}).get("available", False)
                if not (empty_topic or unavailable_video):
                    new_children.append(child)
            node['children'] = new_children

        copy_media.videos_found = 0

        def render_topic_pages(node):

            parents = [node] if node.get("children") else []
            parent = node["parent"]
            while parent:
                parents.append(parent)
                parent = parent["parent"]

            # Finally, render templates into the destination
            template_context = {
                "topic_tree": topic_tree,
                "topic": node,
                "parents": parents
            }
            with i18n.translate_block(language):
                topic_html = render_to_string("kalite_zim/topic.html",
                                              template_context)
            # Replace absolute references to '/static' with relative
            topic_html = topic_html.replace("/static", "static")

            dest_html = os.path.join(tmp_dir, node["id"] + ".html")
            logger.info("Rendering {}".format(dest_html))

            open(dest_html, "w").write(topic_html)

            render_topic_pages.pages_rendered += 1

            for child in node.get('children', []):
                render_topic_pages(child)

        render_topic_pages.pages_rendered = 0

        logger.info("Hard linking video files from KA Lite...")
        copy_media(topic_tree)

        sys.stderr.write("\n")
        logger.info("Done!")

        # Configure django-compressor
        compressor_init(os.path.join(base_path, 'static'))

        # Finally, render templates into the destination
        template_context = {
            "topic_tree": topic_tree,
            "welcome": True,
        }

        with i18n.translate_block(language):
            welcome_html = render_to_string("kalite_zim/welcome.html",
                                            template_context)
            about_html = render_to_string("kalite_zim/about.html",
                                          template_context)
        # Replace absolute references to '/static' with relative
        welcome_html = welcome_html.replace("/static", "static")
        about_html = about_html.replace("/static", "static")

        # Write the welcome.html file
        open(os.path.join(tmp_dir, 'welcome.html'), 'w').write(welcome_html)
        open(os.path.join(tmp_dir, 'about.html'), 'w').write(about_html)

        # Render all topic html files
        render_topic_pages(topic_tree)

        # Copy in static data after it's been handled by django compressor
        # (this happens during template rendering)

        shutil.copytree(os.path.join(base_path, 'static'),
                        os.path.join(tmp_dir, 'static'))

        ending = datetime.now()
        duration = int((ending - beginning).total_seconds())
        logger.info("Total number of videos found: {}".format(
            copy_media.videos_found))
        logger.info("Total number of topic pages created: {}".format(
            render_topic_pages.pages_rendered))

        logger.info("Invoking zimwriterfs, writing to: {}".format(dest_file))

        zimwriterfs_args = (
            zimwriterfs,
            "--welcome",
            "welcome.html",
            "--favicon",
            "static/img/ka_leaf.png",
            "--publisher",
            publisher,
            "--creator",
            "KhanAcademy.org",
            "--description",
            "Khan Academy ({})".format(language),
            "--description",
            "Videos from Khan Academy",
            "--language",
            language,
            tmp_dir,
            dest_file,
        )

        process = subprocess.Popen(zimwriterfs_args, stdout=subprocess.PIPE)
        stdout_data, _stderr_data = process.communicate()

        if process.returncode != 0:
            logger.error("Error invoking zimwriterfs: {}").format(
                _stderr_data + stdout_data)

        logger.info("Duration: {h:} hours, {m:} minutes, {s:} seconds".format(
            h=duration // 3600,
            m=(duration % 3600) // 60,
            s=duration % 60,
        ))
Esempio n. 44
0
def get_exercise_cache(force=False, language=settings.LANGUAGE_CODE):
    global EXERCISES, EXERCISES_FILEPATH
    if EXERCISES is None:
        EXERCISES = {}
    if EXERCISES.get(language) is None:
        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP and not force:
            exercises = softload_json(EXERCISES_FILEPATH + "_" + language + ".cache", logger=logging.debug, raises=False)
            if exercises:
                EXERCISES[language] = exercises
                return EXERCISES[language]
        EXERCISES[language] = softload_json(EXERCISES_FILEPATH, logger=logging.debug, raises=False)
        exercise_root = os.path.join(settings.KHAN_EXERCISES_DIRPATH, "exercises")
        if os.path.exists(exercise_root):
            exercise_templates = os.listdir(exercise_root)
        else:
            exercise_templates = []
        assessmentitems = get_assessment_item_cache()
        TEMPLATE_FILE_PATH = os.path.join(settings.KHAN_EXERCISES_DIRPATH, "exercises", "%s")
        for exercise in EXERCISES[language].values():
            exercise_file = exercise["name"] + ".html"
            exercise_template = exercise_file
            exercise_lang = "en"

            if exercise.get("uses_assessment_items", False):
                available = False
                items = []
                for item in exercise.get("all_assessment_items","[]"):
                    item = json.loads(item)
                    if assessmentitems.get(item.get("id")):
                        items.append(item)
                        available = True
                exercise["all_assessment_items"] = items
            else:
                available = os.path.isfile(TEMPLATE_FILE_PATH % exercise_template)

                # Get the language codes for exercise templates that exist
                available_langs = set(["en"] + [lang_code for lang_code in exercise_templates if os.path.exists(os.path.join(exercise_root, lang_code, exercise_file))])

                # Return the best available exercise template
                exercise_lang = i18n.select_best_available_language(language, available_codes=available_langs)

            if exercise_lang == "en":
                exercise_template = exercise_file
            else:
                exercise_template = os.path.join(exercise_lang, exercise_file)


            with i18n.translate_block(language):
                exercise["available"] = available
                exercise["lang"] = exercise_lang
                exercise["template"] = exercise_template
                exercise["title"] = _(exercise.get("title", ""))
                exercise["description"] = _(exercise.get("description", "")) if exercise.get("description") else ""

        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP:
            try:
                with open(EXERCISES_FILEPATH + "_" + language + ".cache", "w") as f:
                    json.dump(EXERCISES[language], f)
            except IOError as e:
                logging.warn("Annotated exercise cache file failed in saving with error {e}".format(e=e))

    return EXERCISES[language]
Esempio n. 45
0
def get_content_cache(force=False, annotate=False, language=settings.LANGUAGE_CODE):
    global CONTENT, CONTENT_FILEPATH

    if CONTENT is None:
        CONTENT = {}
    if CONTENT.get(language) is None:
        CONTENT[language] = softload_json(CONTENT_FILEPATH, logger=logging.debug, raises=False)
        annotate = True

    if annotate:
        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP and not force:
            content = softload_json(CONTENT_FILEPATH + "_" + language + ".cache", logger=logging.debug, raises=False)
            if content:
                CONTENT[language] = content
                return CONTENT[language]

        # Loop through all content items and put thumbnail urls, content urls,
        # and subtitle urls on the content dictionary, and list all languages
        # that the content is available in.
        for content in CONTENT[language].values():
            default_thumbnail = create_thumbnail_url(content.get("id"))
            dubmap = i18n.get_id2oklang_map(content.get("id"))
            if dubmap:
                content_lang = i18n.select_best_available_language(language, available_codes=dubmap.keys()) or ""
                if content_lang:
                    dubbed_id = dubmap.get(content_lang)
                    format = content.get("format", "")
                    if is_content_on_disk(dubbed_id, format):
                        content["available"] = True
                        thumbnail = create_thumbnail_url(dubbed_id) or default_thumbnail
                        content["content_urls"] = {
                            "stream": settings.CONTENT_URL + dubmap.get(content_lang) + "." + format,
                            "stream_type": "{kind}/{format}".format(kind=content.get("kind", "").lower(), format=format),
                            "thumbnail": thumbnail,
                        }
                    elif settings.BACKUP_VIDEO_SOURCE:
                        content["available"] = True
                        content["content_urls"] = {
                            "stream": settings.BACKUP_VIDEO_SOURCE.format(youtube_id=dubbed_id, video_format=format),
                            "stream_type": "{kind}/{format}".format(kind=content.get("kind", "").lower(), format=format),
                            "thumbnail": settings.BACKUP_VIDEO_SOURCE.format(youtube_id=dubbed_id, video_format="png"),
                        }
                    else:
                        content["available"] = False
                else:
                    content["available"] = False
            else:
                content["available"] = False

            # Get list of subtitle language codes currently available
            subtitle_lang_codes = [] if not os.path.exists(i18n.get_srt_path()) else [lc for lc in os.listdir(i18n.get_srt_path()) if os.path.exists(i18n.get_srt_path(lc, content.get("id")))]

            # Generate subtitle URLs for any subtitles that do exist for this content item
            subtitle_urls = [{
                "code": lc,
                "url": settings.STATIC_URL + "srt/{code}/subtitles/{id}.srt".format(code=lc, id=content.get("id")),
                "name": i18n.get_language_name(lc)
                } for lc in subtitle_lang_codes if os.path.exists(i18n.get_srt_path(lc, content.get("id")))]

            # Sort all subtitle URLs by language code
            content["subtitle_urls"] = sorted(subtitle_urls, key=lambda x: x.get("code", ""))

            with i18n.translate_block(content_lang):
                content["selected_language"] = content_lang
                content["title"] = _(content["title"])
                content["description"] = _(content.get("description", "")) if content.get("description") else ""

        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP:
            try:
                with open(CONTENT_FILEPATH + "_" + language + ".cache", "w") as f:
                    json.dump(CONTENT[language], f)
            except IOError as e:
                logging.warn("Annotated content cache file failed in saving with error {e}".format(e=e))

    return CONTENT[language]
Esempio n. 46
0
def get_all_remote_video_sizes():
    global REMOTE_VIDEO_SIZES
    if REMOTE_VIDEO_SIZES is None:
        REMOTE_VIDEO_SIZES = softload_json(REMOTE_VIDEO_SIZE_FILEPATH,
                                           logger=logging.debug)
    return REMOTE_VIDEO_SIZES
Esempio n. 47
0
    def handle(self, *args, **options):
        if len(args) != 1:
            raise CommandError("Takes exactly 1 argument")

        dest_file = os.path.abspath(args[0])

        logger.info("Starting up KA Lite export2zim command")
        beginning = datetime.now()
        logger.info("Begin: {}".format(beginning))

        language = options.get('language')
        if not language:
            raise CommandError("Must specify a language!")

        if not options.get('tmp_dir'):
            tmp_dir = os.path.join(tempfile.gettempdir(), 'ka-lite-zim_{}'.format(language))
        else:
            tmp_dir = options.get('tmp_dir')

        tmp_dir = os.path.abspath(tmp_dir)

        if os.path.exists(tmp_dir) and os.listdir(tmp_dir):
            if options['clear']:
                logger.info("Clearing directory {}".format(tmp_dir))
                shutil.rmtree(tmp_dir)
            elif options['resume']:
                logger.info("Resuming in dirty tmp directory {}".format(tmp_dir))
            else:
                raise CommandError(
                    "{} not empty, use the -c option to clean it, -r to resume, or use an empty destination directory.".format(
                        tmp_dir
                    )
                )

        zimwriterfs = options.get("zimwriterfs", None)
        publisher = options.get("publisher")
        transcode2webm = options.get("transcode2webm")
        ffmpeg = find_executable("ffmpeg")

        if not ffmpeg:
            logger.warning("FFMpeg not found in your path, you won't be able to create missing thumbnails or transcode to webm.")

        if not zimwriterfs:
            zimwriterfs = find_executable("zimwriterfs")
            if not zimwriterfs:
                raise CommandError("Could not find zimwriterfs in your path, try specifying --zimwriterfs=/path")

        if not os.path.exists(zimwriterfs):
            raise CommandError("Invalid --zimwriterfs")

        from kalite_zim import __name__ as base_path
        base_path = os.path.abspath(base_path)
        data_path = os.path.join(base_path, 'data')

        # Where subtitles are found in KA Lite
        subtitle_src_dir = i18n.get_srt_path(language)

        logger.info("Will export videos for language: {}".format(language))
        logger.info("Preparing KA Lite topic tree...")

        # Use live data
        if not options.get('test'):
            # This way of doing things will be deprecated in KA Lite 0.16
            topic_tree_json_path = topic_tools_settings.TOPICS_FILEPATHS.get('khan')
            content_cache = get_content_cache(language=language, annotate=True)
            exercise_cache = get_exercise_cache(language=language)
        # Use test data
        else:
            topic_tree_json_path = os.path.join(data_path, 'test_topics.json')
            content_cache = json.load(
                open(os.path.join(data_path, 'test_content.json'))
            )
            exercise_cache = json.load(
                open(os.path.join(data_path, 'test_exercise.json'))
            )

        topic_tree = softload_json(topic_tree_json_path, logger=logger.debug, raises=False)

        content_json_output = {}
        exercise_json_output = {}

        def annotate_tree(topic, depth=0, parent=None):
            """
            We need to recurse into the tree in order to annotate elements
            with topic data and exercise data
            """
            children = topic.get('children', [])
            new_children = []
            for child_topic in children:
                if child_topic.get("kind") in ("Video", "Topic"):
                    annotate_tree(child_topic, depth=depth + 1, parent=topic)
                    new_children.append(child_topic)
            topic["children"] = new_children
            if topic.get("kind") == "Exercise":
                topic['exercise'] = exercise_cache.get(topic.get("id"), {})
                exercise_json_output[topic.get("id")] = topic['exercise']
            elif topic.get("kind") == "Topic":
                pass
            else:
                topic['exercise'] = None
                topic['content'] = content_cache.get(topic.get("id"), {})
                content_json_output[topic.get("id")] = topic['content']
                if not topic['content']:
                    logger.error('No content!?, id is: {}'.format(topic.get('id')))

            # Translate everything for good measure
            with i18n.translate_block(language):
                topic["title"] = _(topic.get("title", ""))
                topic["description"] = _(topic.get("description", "")) if topic.get("description") else ""

            topic["url"] = topic["id"] + ".html"
            topic["parent"] = parent
            topic["depth"] = depth
            for key in ("child_data", "keywords", "hide", "contains"):
                topic.pop(key, None)

        # 1. Annotate a topic tree
        annotate_tree(topic_tree)

        # 2. Now go through the tree and copy each element into the destination
        # zim file system

        def copy_media(node):
            if node['kind'] == 'Topic':
                # Don't do anything if it's a topic
                pass
            elif node['kind'] == 'Exercise':
                # Exercises cannot be displayed
                node["content"]["available"] = False
            elif node['kind'] == 'Video':

                if node['content']['format'] == "webm":
                    logger.warning("Found a duplicate ID for {}, re-downloading".format(node['id']))
                    node['content']['format'] = "mp4"

                # Available is False by default until we locate the file
                node["content"]["available"] = False
                node_dir = os.path.join(tmp_dir, node["path"])
                if not os.path.exists(node_dir):
                    os.makedirs(node_dir)
                video_file_name = node['id'] + '.' + node['content']['format']
                thumb_file_name = node['id'] + '.png'
                video_file_src = os.path.join(CONTENT_ROOT, video_file_name)
                video_file_dest = os.path.join(node_dir, video_file_name)
                thumb_file_src = os.path.join(CONTENT_ROOT, thumb_file_name)
                thumb_file_dest = os.path.join(node_dir, thumb_file_name)

                if options['download'] and not os.path.exists(video_file_src):
                    logger.info("Video file being downloaded to: {}".format(video_file_src))
                    download_video(
                        node['content']['youtube_id'],
                        node['content']['format'],
                        CONTENT_ROOT,
                    )

                if os.path.exists(video_file_src):
                    if transcode2webm:
                        ffmpeg_pass_log = "/tmp/logfile_vp8.fpf"
                        if os.path.isfile(ffmpeg_pass_log):
                            os.unlink(ffmpeg_pass_log)
                        video_file_name = node['id'] + '.webm'
                        video_file_dest = os.path.join(node_dir, video_file_name)
                        if os.path.isfile(video_file_dest):
                            logger.info("Already encoded: {}".format(video_file_dest))
                        else:
                            ffmpeg_base_args = [
                                ffmpeg,
                                "-i", video_file_src,
                                "-codec:v", "libvpx",
                                "-quality", "best",
                                "-cpu-used", "0",
                                "-b:v", "300k",
                                "-qmin", "10",  # 10=lowest value
                                "-qmax", "35",  # 42=highest value
                                "-maxrate", "300k",
                                "-bufsize", "600k",
                                "-threads", "8",
                                # "-vf", "scale=-1",
                                "-codec:a", "libvorbis",
                                # "-b:a", "128k",
                                "-aq", "5",
                                "-f", "webm",
                            ]
                            ffmpeg_pass1 = ffmpeg_base_args + [
                                "-an",  # Disables audio, no effect first pass
                                "-pass", "1",
                                "-passlogfile", ffmpeg_pass_log,
                                video_file_dest,
                            ]
                            ffmpeg_pass2 = ffmpeg_base_args + [
                                "-pass", "2",
                                "-y", "-passlogfile", ffmpeg_pass_log,
                                video_file_dest,
                            ]
                            for cmd in (ffmpeg_pass1, ffmpeg_pass2):
                                process = subprocess.Popen(cmd, stdout=subprocess.PIPE)
                                stdout_data, _stderr_data = process.communicate()
                                if process.returncode != 0:
                                    logger.error("Error invoking ffmpeg: {}".format((_stderr_data or "") + (stdout_data or "")))
                                    logger.error("Command was: {}".format(" ".join(cmd)))
                                    raise CommandError("Could not complete transcoding")
                        node['content']['format'] = "webm"
                    else:
                        # If not transcoding, just link the original file
                        os.link(video_file_src, video_file_dest)
                    node["video_url"] = os.path.join(
                        node["path"],
                        video_file_name
                    )
                    copy_media.videos_found += 1
                    logger.info("Videos processed: {}".format(copy_media.videos_found))
                    node["content"]["available"] = True

                    # Create thumbnail if it wasn't downloaded
                    if not os.path.exists(thumb_file_src):
                        fp = create_thumbnail(video_file_src, output_format="png")
                        if fp is None:
                            logger.error("Failed to create thumbnail for {}".format(video_file_src))
                        else:
                            logger.info("Successfully created thumbnail for {}".format(video_file_src))
                            file(thumb_file_src, 'wb').write(fp.read())

                    # Handle thumbnail
                    if os.path.exists(thumb_file_src):
                        node["thumbnail_url"] = os.path.join(
                            node["path"],
                            node['id'] + '.png'
                        )
                        if not os.path.exists(thumb_file_dest):
                            os.link(thumb_file_src, thumb_file_dest)
                    else:
                        node["thumbnail_url"] = None

                    subtitle_srt = os.path.join(
                        subtitle_src_dir,
                        node['id'] + '.srt'
                    )
                    if os.path.isfile(subtitle_srt):
                        subtitle_vtt = os.path.join(
                            node_dir,
                            node['id'] + '.vtt'
                        )
                        # Convert to .vtt because this format is understood
                        # by latest video.js and the old ones that read
                        # .srt don't work with newer jquery etc.
                        submarine_parser(subtitle_srt, subtitle_vtt)
                        if not os.path.exists(subtitle_vtt):
                            logger.warning("Subtitle not converted: {}".format(subtitle_srt))
                        else:
                            logger.info("Subtitle convert from SRT to VTT: {}".format(subtitle_vtt))
                            node["subtitle_url"] = os.path.join(
                                node["path"],
                                node['id'] + '.vtt'
                            )

                else:
                    if options['download']:
                        logger.error("File not found or downloaded: {}".format(video_file_src))
            else:
                logger.error("Invalid node, kind: {}".format(node.get("kind", None)))
                # Exercises cannot be displayed
                node["content"] = {"available": False}

            new_children = []
            for child in node.get('children', []):
                copy_media(child)
                empty_topic = child["kind"] == "Topic" and not child.get("children", [])
                unavailable_video = child["kind"] == "Video" and not child.get("content", {}).get("available", False)
                if not (empty_topic or unavailable_video):
                    new_children.append(child)
            node['children'] = new_children
        copy_media.videos_found = 0

        def render_topic_pages(node):

            parents = [node] if node.get("children") else []
            parent = node["parent"]
            while parent:
                parents.append(parent)
                parent = parent["parent"]

            # Finally, render templates into the destination
            template_context = {
                "topic_tree": topic_tree,
                "topic": node,
                "parents": parents
            }
            with i18n.translate_block(language):
                topic_html = render_to_string("kalite_zim/topic.html", template_context)
            # Replace absolute references to '/static' with relative
            topic_html = topic_html.replace("/static", "static")

            dest_html = os.path.join(tmp_dir, node["id"] + ".html")
            logger.info("Rendering {}".format(dest_html))

            open(dest_html, "w").write(topic_html)

            render_topic_pages.pages_rendered += 1

            for child in node.get('children', []):
                render_topic_pages(child)
        render_topic_pages.pages_rendered = 0

        logger.info("Hard linking video files from KA Lite...")
        copy_media(topic_tree)

        sys.stderr.write("\n")
        logger.info("Done!")

        # Configure django-compressor
        compressor_init(os.path.join(base_path, 'static'))

        # Finally, render templates into the destination
        template_context = {
            "topic_tree": topic_tree,
            "welcome": True,
        }

        with i18n.translate_block(language):
            welcome_html = render_to_string("kalite_zim/welcome.html", template_context)
            about_html = render_to_string("kalite_zim/about.html", template_context)
        # Replace absolute references to '/static' with relative
        welcome_html = welcome_html.replace("/static", "static")
        about_html = about_html.replace("/static", "static")

        # Write the welcome.html file
        open(os.path.join(tmp_dir, 'welcome.html'), 'w').write(welcome_html)
        open(os.path.join(tmp_dir, 'about.html'), 'w').write(about_html)

        # Render all topic html files
        render_topic_pages(topic_tree)

        # Copy in static data after it's been handled by django compressor
        # (this happens during template rendering)

        shutil.copytree(os.path.join(base_path, 'static'), os.path.join(tmp_dir, 'static'))

        ending = datetime.now()
        duration = int((ending - beginning).total_seconds())
        logger.info("Total number of videos found: {}".format(copy_media.videos_found))
        logger.info("Total number of topic pages created: {}".format(render_topic_pages.pages_rendered))

        logger.info("Invoking zimwriterfs, writing to: {}".format(dest_file))

        zimwriterfs_args = (
            zimwriterfs,
            "--welcome", "welcome.html",
            "--favicon", "static/img/ka_leaf.png",
            "--publisher", publisher,
            "--creator", "KhanAcademy.org",
            "--description", "Khan Academy ({})".format(language),
            "--description", "Videos from Khan Academy",
            "--language", language,
            tmp_dir,
            dest_file,
        )

        process = subprocess.Popen(zimwriterfs_args, stdout=subprocess.PIPE)
        stdout_data, _stderr_data = process.communicate()

        if process.returncode != 0:
            logger.error("Error invoking zimwriterfs: {}").format(_stderr_data + stdout_data)

        logger.info(
            "Duration: {h:} hours, {m:} minutes, {s:} seconds".format(
                h=duration // 3600,
                m=(duration % 3600) // 60,
                s=duration % 60,
            )
        )
Esempio n. 48
0
def generate_metadata(package_metadata=None, version=VERSION, force_version_update=False):
    """Loop through locale folder, create or update language specific meta
    and create or update master file, skipping broken languages
    """
    logging.info("Generating new language pack metadata")

    lang_codes = package_metadata.keys() if package_metadata else os.listdir(LOCALE_ROOT)
    broken_langs = [lc for lc, md in package_metadata.iteritems() if md.get("broken")] if package_metadata else []

    master_filepath = get_language_pack_availability_filepath(version=version)
    master_metadata = softload_json(master_filepath, logger=logging.warn, errmsg="Error opening master language pack metadata")

    # loop through all languages in locale, update master file
    crowdin_meta_dict = download_crowdin_metadata()

    for lc in lang_codes:
        lang_code_django = lcode_to_django_dir(lc)
        lang_code_ietf = lcode_to_ietf(lc)
        lang_name = get_language_name(lang_code_ietf)
        metadata_filepath = get_language_pack_metadata_filepath(lang_code_ietf, version=version)
        ensure_dir(os.path.dirname(metadata_filepath))

        if broken_langs and lang_code_django in broken_langs:  # broken_langs is django format
            logging.info("Skipping directory %s because it did not compile." % lang_code_django)
            continue

        # Gather existing metadata
        crowdin_meta = next((meta for meta in crowdin_meta_dict if meta["code"] == lang_code_ietf), {})
        stored_meta = softload_json(metadata_filepath, logger=logging.info, errmsg="Could not open %s language pack metadata" % lc)

        updated_meta = package_metadata.get(lang_code_ietf) or {}
        updated_meta.update({
            "code": lang_code_ietf,  # user-facing code
            "name": lang_name,
            "software_version": version,
        })

        try:
            # Augment the metadata
            updated_meta.update(get_code2lang_map(lang_code_django))
        except LanguageNotFoundError:
            logging.warning("Unrecognized language; unable to add extra naming metadata %s" % lang_code_django)
            continue

        if force_version_update:
            language_pack_version = 1 + stored_meta.get("language_pack_version", 0)  # will increment to one
        else:
            language_pack_version = increment_language_pack_version(stored_meta, updated_meta)

        updated_meta["language_pack_version"] = language_pack_version
        stored_meta.update(updated_meta)

        # Write locally (this is used on download by distributed server to update it's database)
        with open(metadata_filepath, 'w') as output:
            json.dump(stored_meta, output)

        # Update master (this is used for central server to handle API requests for data)
        master_metadata[lang_code_ietf] = stored_meta

    # Save updated master
    ensure_dir(os.path.dirname(master_filepath))
    with open(master_filepath, 'w') as fp:
        json.dump(master_metadata, fp)
    logging.info("Local record of translations updated")
Esempio n. 49
0
def get_remote_video_size(youtube_id, default=AVERAGE_VIDEO_SIZE, force=False):
    global REMOTE_VIDEO_SIZES
    if REMOTE_VIDEO_SIZES is None:
        REMOTE_VIDEO_SIZES = softload_json(REMOTE_VIDEO_SIZE_FILEPATH,
                                           logger=logging.debug)
    return REMOTE_VIDEO_SIZES.get(youtube_id, default)
Esempio n. 50
0
def get_content_cache(force=False, annotate=False, language=None):

    if not language:
        language = django_settings.LANGUAGE_CODE

    global CONTENT

    if CONTENT is None:
        CONTENT = {}

    if CONTENT.get(language) is None:
        content = None
        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP and not force:
            content = softload_sqlite_cache(settings.CONTENT_CACHE_FILEPATH)
        if content:
            CONTENT[language] = content
            return CONTENT[language]
        else:
            if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP:
                call_command("create_content_db")
                content = softload_sqlite_cache(settings.CONTENT_CACHE_FILEPATH)
            else:
                content = softload_json(settings.CONTENT_FILEPATH, logger=logging.debug, raises=False)
            CONTENT[language] = content
            annotate = True

    if annotate:

        # Loop through all content items and put thumbnail urls, content urls,
        # and subtitle urls on the content dictionary, and list all languages
        # that the content is available in.
        try:
            contents_folder = os.listdir(django_settings.CONTENT_ROOT)
        except OSError:
            contents_folder = []

        subtitle_langs = {}

        if os.path.exists(i18n.get_srt_path()):
            for (dirpath, dirnames, filenames) in os.walk(i18n.get_srt_path()):
                # Only both looking at files that are inside a 'subtitles' directory
                if os.path.basename(dirpath) == "subtitles":
                    lc = os.path.basename(os.path.dirname(dirpath))
                    for filename in filenames:
                        if filename in subtitle_langs:
                            subtitle_langs[filename].append(lc)
                        else:
                            subtitle_langs[filename] = [lc]

        for key, content in CONTENT[language].iteritems():
            default_thumbnail = create_thumbnail_url(content.get("id"))
            dubmap = i18n.get_id2oklang_map(content.get("id"))
            if dubmap:
                content_lang = i18n.select_best_available_language(language, available_codes=dubmap.keys()) or ""
                if content_lang:
                    dubbed_id = dubmap.get(content_lang)
                    format = content.get("format", "")
                    if (dubbed_id + "." + format) in contents_folder:
                        content["available"] = True
                        thumbnail = create_thumbnail_url(dubbed_id) or default_thumbnail
                        content["content_urls"] = {
                            "stream": django_settings.CONTENT_URL + dubmap.get(content_lang) + "." + format,
                            "stream_type": "{kind}/{format}".format(kind=content.get("kind", "").lower(), format=format),
                            "thumbnail": thumbnail,
                        }
                    elif django_settings.BACKUP_VIDEO_SOURCE:
                        content["available"] = True
                        content["content_urls"] = {
                            "stream": django_settings.BACKUP_VIDEO_SOURCE.format(youtube_id=dubbed_id, video_format=format),
                            "stream_type": "{kind}/{format}".format(kind=content.get("kind", "").lower(), format=format),
                            "thumbnail": django_settings.BACKUP_VIDEO_SOURCE.format(youtube_id=dubbed_id, video_format="png"),
                        }
                    else:
                        content["available"] = False
                else:
                    content["available"] = False
            else:
                content["available"] = False

            # Get list of subtitle language codes currently available
            subtitle_lang_codes = subtitle_langs.get("{id}.srt".format(id=content.get("id")), [])

            # Generate subtitle URLs for any subtitles that do exist for this content item
            subtitle_urls = [{
                "code": lc,
                "url": django_settings.STATIC_URL + "srt/{code}/subtitles/{id}.srt".format(code=lc, id=content.get("id")),
                "name": i18n.get_language_name(lc)
                } for lc in subtitle_lang_codes]

            # Sort all subtitle URLs by language code
            content["subtitle_urls"] = sorted(subtitle_urls, key=lambda x: x.get("code", ""))

            with i18n.translate_block(language):
                content["selected_language"] = content_lang
                content["title"] = _(content["title"])
                content["description"] = _(content.get("description")) if content.get("description") else ""

            CONTENT[language][key] = content

        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP:
            try:
                CONTENT[language].commit()
            except IOError as e:
                logging.warn("Annotated content cache file failed in saving with error {e}".format(e=e))

    return CONTENT[language]
Esempio n. 51
0
def get_assessment_item_cache(force=False):
    global ASSESSMENT_ITEMS, ASSESSMENT_ITEMS_FILEPATH
    if ASSESSMENT_ITEMS is None or force:
        ASSESSMENT_ITEMS = softload_json(ASSESSMENT_ITEMS_FILEPATH, logger=logging.debug, raises=False)

    return ASSESSMENT_ITEMS
Esempio n. 52
0
def get_topic_tree(force=False, annotate=False, channel=None, language=None, parent=None):

    # Hardcode the Brazilian Portuguese mapping that only the central server knows about
    # TODO(jamalex): BURN IT ALL DOWN!
    if language == "pt-BR":
        language = "pt"

    if not channel:
        channel = settings.CHANNEL

    if not language:
        language = django_settings.LANGUAGE_CODE

    global TOPICS
    if not TOPICS:
        TOPICS = {}
    if TOPICS.get(channel) is None:
        TOPICS[channel] = {}

    if annotate or TOPICS.get(channel, {}).get(language) is None:
        cached_topics = None
        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP and not force:
            cached_topics = softload_json(
                cache_file_path("topic_{0}_{1}.json".format(channel, language)),
                logger=logging.debug,
                raises=False
            )
        if cached_topics:
            TOPICS[channel][language] = cached_topics
            annotate = False
        else:
            topics = softload_json(settings.TOPICS_FILEPATHS.get(channel), logger=logging.debug, raises=False)
            # Just loaded from disk, so have to restamp.
            annotate = True

    if annotate:
        flat_topic_tree = []

        # Loop through all the nodes in the topic tree
        # and cross reference with the content_cache to check availability.
        content_cache = get_content_cache(language=language)
        exercise_cache = get_exercise_cache(language=language)

        def recurse_nodes(node, parent=""):

            node["parent"] = parent

            node.pop("child_data", None)

            child_availability = []

            child_ids = [child.get("id") for child in node.get("children", [])]

            # Do the recursion
            for child in node.get("children", []):
                recurse_nodes(child, node.get("id"))
                child_availability.append(child.get("available", False))

            if child_ids:
                node["children"] = child_ids

            # If child_availability is empty then node has no children so we can determine availability
            if child_availability:
                node["available"] = any(child_availability)
            else:
                # By default this is very charitable, assuming if something has not been annotated
                # it is available.
                if node.get("kind") == "Exercise":
                    cache_node = exercise_cache.get(node.get("id"), {})
                else:
                    cache_node = content_cache.get(node.get("id"), {})
                node["available"] = cache_node.get("available", True)

            # Translate everything for good measure
            with i18n.translate_block(language):
                node["title"] = _(node.get("title", ""))
                node["description"] = _(node.get("description", "")) if node.get("description") else ""

            flat_topic_tree.append(node)

        recurse_nodes(topics)

        TOPICS[channel][language] = flat_topic_tree

        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP:
            try:
                with open(cache_file_path("topic_{0}_{1}.json".format(channel, language)), "w") as f:
                    json.dump(TOPICS[channel][language], f)
            except IOError as e:
                logging.warn("Annotated topic cache file failed in saving with error {e}".format(e=e))

    if parent:
        return filter(lambda x: x.get("parent") == parent, TOPICS[channel][language])
    else:
        return TOPICS[channel][language]
Esempio n. 53
0
def get_exercise_cache(force=False, language=settings.LANGUAGE_CODE):
    global EXERCISES, EXERCISES_FILEPATH
    if EXERCISES is None:
        EXERCISES = {}
    if EXERCISES.get(language) is None:
        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP and not force:
            exercises = softload_json(EXERCISES_FILEPATH + "_" + language + ".cache", logger=logging.debug, raises=False)
            if exercises:
                EXERCISES[language] = exercises
                return EXERCISES[language]
        EXERCISES[language] = softload_json(EXERCISES_FILEPATH, logger=logging.debug, raises=False)
        exercise_root = os.path.join(settings.KHAN_EXERCISES_DIRPATH, "exercises")
        if os.path.exists(exercise_root):
            exercise_templates = os.listdir(exercise_root)
        else:
            exercise_templates = []
        assessmentitems = get_assessment_item_cache()
        TEMPLATE_FILE_PATH = os.path.join(settings.KHAN_EXERCISES_DIRPATH, "exercises", "%s")
        for exercise in EXERCISES[language].values():
            exercise_file = exercise["name"] + ".html"
            exercise_template = exercise_file
            exercise_lang = "en"

            if exercise.get("uses_assessment_items", False):
                available = False
                items = []
                for item in exercise.get("all_assessment_items","[]"):
                    item = json.loads(item)
                    if assessmentitems.get(item.get("id")):
                        items.append(item)
                        available = True
                exercise["all_assessment_items"] = items
            else:
                available = os.path.isfile(TEMPLATE_FILE_PATH % exercise_template)

                # Get the language codes for exercise templates that exist
                available_langs = set(["en"] + [lang_code for lang_code in exercise_templates if os.path.exists(os.path.join(exercise_root, lang_code, exercise_file))])

                # Return the best available exercise template
                exercise_lang = i18n.select_best_available_language(language, available_codes=available_langs)

            if exercise_lang == "en":
                exercise_template = exercise_file
            else:
                exercise_template = os.path.join(exercise_lang, exercise_file)


            with i18n.translate_block(language):
                exercise["available"] = available
                exercise["lang"] = exercise_lang
                exercise["template"] = exercise_template
                exercise["title"] = _(exercise.get("title", ""))
                exercise["description"] = _(exercise.get("description", "")) if exercise.get("description") else ""

        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP:
            try:
                with open(EXERCISES_FILEPATH + "_" + language + ".cache", "w") as f:
                    json.dump(EXERCISES[language], f)
            except IOError as e:
                logging.warn("Annotated exercise cache file failed in saving with error {e}".format(e=e))

    return EXERCISES[language]
Esempio n. 54
0
def get_exercise_cache(force=False, language=None):

    if not language:
        language = django_settings.LANGUAGE_CODE

    global EXERCISES
    if EXERCISES is None:
        EXERCISES = {}
    if EXERCISES.get(language) is None:
        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP and not force:
            exercises = softload_json(
                cache_file_path("exercises_{0}.json".format(language)),
                logger=logging.debug,
                raises=False
            )
            if exercises:
                EXERCISES[language] = exercises
                return EXERCISES[language]
        EXERCISES[language] = softload_json(settings.EXERCISES_FILEPATH, logger=logging.debug, raises=False)

        # English-language exercises live in application space, translations in user space
        if language == "en":
            exercise_root = os.path.join(settings.KHAN_EXERCISES_DIRPATH, "exercises")
        else:
            exercise_root = i18n.get_localized_exercise_dirpath(language)
        if os.path.exists(exercise_root):
            try:
                exercise_templates = os.listdir(exercise_root)
            except OSError:
                exercise_templates = []
        else:
            exercise_templates = []

        for exercise in EXERCISES[language].values():
            exercise_file = exercise["name"] + ".html"
            exercise_template = exercise_file
            exercise_lang = "en"

            # The central server doesn't have an assessment item database
            if django_settings.CENTRAL_SERVER:
                available = False
            elif exercise.get("uses_assessment_items", False):
                available = False
                items = []
                for item in exercise.get("all_assessment_items", []):
                    item = json.loads(item)
                    if get_assessment_item_data(request=None, assessment_item_id=item.get("id")):
                        items.append(item)
                        available = True
                exercise["all_assessment_items"] = items
            else:
                available = exercise_template in exercise_templates

                # Get the language codes for exercise templates that exist
                # Try to minimize the number of os.path.exists calls (since they're a bottleneck) by using the same
                # precedence rules in i18n.select_best_available_languages
                available_langs = set(["en"] + [language] * available)
                # Return the best available exercise template
                exercise_lang = i18n.select_best_available_language(language, available_codes=available_langs)

            if exercise_lang == "en":
                exercise_template = exercise_file
            else:
                exercise_template = os.path.join(exercise_lang, exercise_file)

            with i18n.translate_block(language):
                exercise["available"] = available
                exercise["lang"] = exercise_lang
                exercise["template"] = exercise_template
                exercise["title"] = _(exercise.get("title", ""))
                exercise["description"] = _(exercise.get("description", "")) if exercise.get("description") else ""

        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP:
            try:
                with open(cache_file_path("exercises_{0}.json".format(language)), "w") as f:
                    json.dump(EXERCISES[language], f)
            except IOError as e:
                logging.warn("Annotated exercise cache file failed in saving with error {e}".format(e=e))

    return EXERCISES[language]
Esempio n. 55
0
def get_content_cache(force=False, annotate=False, language=settings.LANGUAGE_CODE):
    global CONTENT, CONTENT_FILEPATH

    if CONTENT is None:
        CONTENT = {}
    if CONTENT.get(language) is None:
        CONTENT[language] = softload_json(CONTENT_FILEPATH, logger=logging.debug, raises=False)
        annotate = True

    if annotate:
        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP and not force:
            content = softload_json(CONTENT_FILEPATH + "_" + language + ".cache", logger=logging.debug, raises=False)
            if content:
                CONTENT[language] = content
                return CONTENT[language]

        # Loop through all content items and put thumbnail urls, content urls,
        # and subtitle urls on the content dictionary, and list all languages
        # that the content is available in.
        for content in CONTENT[language].values():
            default_thumbnail = create_thumbnail_url(content.get("id"))
            dubmap = i18n.get_id2oklang_map(content.get("id"))
            if dubmap:
                content_lang = i18n.select_best_available_language(language, available_codes=dubmap.keys()) or ""
                if content_lang:
                    dubbed_id = dubmap.get(content_lang)
                    format = content.get("format", "")
                    if is_content_on_disk(dubbed_id, format):
                        content["available"] = True
                        thumbnail = create_thumbnail_url(dubbed_id) or default_thumbnail
                        content["content_urls"] = {
                            "stream": settings.CONTENT_URL + dubmap.get(content_lang) + "." + format,
                            "stream_type": "{kind}/{format}".format(kind=content.get("kind", "").lower(), format=format),
                            "thumbnail": thumbnail,
                        }
                    else:
                        content["available"] = False
                else:
                    content["available"] = False
            else:
                content["available"] = False

            # Get list of subtitle language codes currently available
            subtitle_lang_codes = [] if not os.path.exists(i18n.get_srt_path()) else [lc for lc in os.listdir(i18n.get_srt_path()) if os.path.exists(i18n.get_srt_path(lc, content.get("id")))]

            # Generate subtitle URLs for any subtitles that do exist for this content item
            subtitle_urls = [{
                "code": lc,
                "url": settings.STATIC_URL + "srt/{code}/subtitles/{id}.srt".format(code=lc, id=content.get("id")),
                "name": i18n.get_language_name(lc)
                } for lc in subtitle_lang_codes if os.path.exists(i18n.get_srt_path(lc, content.get("id")))]

            # Sort all subtitle URLs by language code
            content["subtitle_urls"] = sorted(subtitle_urls, key=lambda x: x.get("code", ""))

            with i18n.translate_block(content_lang):
                content["selected_language"] = content_lang
                content["title"] = _(content["title"])
                content["description"] = _(content.get("description", "")) if content.get("description") else ""

        if settings.DO_NOT_RELOAD_CONTENT_CACHE_AT_STARTUP:
            try:
                with open(CONTENT_FILEPATH + "_" + language + ".cache", "w") as f:
                    json.dump(CONTENT[language], f)
            except IOError as e:
                logging.warn("Annotated content cache file failed in saving with error {e}".format(e=e))

    return CONTENT[language]
def generate_metadata(package_metadata=None, version=SHORTVERSION, force_version_update=False):
    """Loop through locale folder, create or update language specific meta
    and create or update master file, skipping broken languages
    """
    logging.info("Generating new language pack metadata")

    lang_codes = package_metadata.keys() if package_metadata else os.listdir(LOCALE_ROOT)
    broken_langs = [lc for lc, md in package_metadata.iteritems() if md.get("broken")] if package_metadata else []

    master_filepath = get_language_pack_availability_filepath(version=version)
    master_metadata = softload_json(master_filepath, logger=logging.warn, errmsg="Error opening master language pack metadata")

    # loop through all languages in locale, update master file
    crowdin_meta_dict = download_crowdin_metadata()

    for lc in lang_codes:
        lang_code_django = lcode_to_django_dir(lc)
        lang_code_ietf = lcode_to_ietf(lc)
        lang_name = get_language_name(lang_code_ietf)
        metadata_filepath = get_language_pack_metadata_filepath(lang_code_ietf, version=version)
        ensure_dir(os.path.dirname(metadata_filepath))

        if broken_langs and lang_code_django in broken_langs:  # broken_langs is django format
            logging.info("Skipping directory %s because it did not compile." % lang_code_django)
            continue

        # Gather existing metadata
        crowdin_meta = next((meta for meta in crowdin_meta_dict if meta["code"] == lang_code_ietf), {})
        stored_meta = softload_json(metadata_filepath, logger=logging.info, errmsg="Could not open %s language pack metadata" % lc)

        updated_meta = package_metadata.get(lang_code_ietf) or {}
        updated_meta.update({
            "code": lang_code_ietf,  # user-facing code
            "name": lang_name,
            "software_version": version,
        })

        try:
            # Augment the metadata
            updated_meta.update(get_code2lang_map(lang_code_django))
        except LanguageNotFoundError:
            logging.warning("Unrecognized language; unable to add extra naming metadata %s" % lang_code_django)
            continue

        if force_version_update:
            language_pack_version = 1 + stored_meta.get("language_pack_version", 0)  # will increment to one
        else:
            language_pack_version = increment_language_pack_version(stored_meta, updated_meta)

        updated_meta["language_pack_version"] = language_pack_version
        stored_meta.update(updated_meta)

        # Write locally (this is used on download by distributed server to update it's database)
        with open(metadata_filepath, 'w') as output:
            json.dump(stored_meta, output)

        # Update master (this is used for central server to handle API requests for data)
        master_metadata[lang_code_ietf] = stored_meta

    # Save updated master
    ensure_dir(os.path.dirname(master_filepath))
    with open(master_filepath, 'w') as fp:
        json.dump(master_metadata, fp)
    logging.info("Local record of translations updated")