Example #1
0
def divide_videos_by_language(youtube_ids):
    """Utility function for separating a list of youtube ids
    into a dictionary of lists, separated by video language
    (as determined by the current dubbed video map)
    """

    buckets_by_lang = defaultdict(lambda: [])
    for y_id in youtube_ids:
        buckets_by_lang[get_video_language(y_id)].append(y_id)
    return buckets_by_lang
Example #2
0
def divide_videos_by_language(youtube_ids):
    """Utility function for separating a list of youtube ids
    into a dictionary of lists, separated by video language
    (as determined by the current dubbed video map)
    """

    buckets_by_lang = defaultdict(lambda: [])
    for y_id in youtube_ids:
        buckets_by_lang[get_video_language(y_id)].append(y_id)
    return buckets_by_lang
Example #3
0
def get_supported_language_map(lang_code=None):
    lang_code = lcode_to_ietf(lang_code)
    global SUPPORTED_LANGUAGE_MAP
    if not SUPPORTED_LANGUAGE_MAP:
        with open(SUPPORTED_LANGUAGES_FILEPATH) as f:
            SUPPORTED_LANGUAGE_MAP = json.loads(f.read())

    if not lang_code:
        return SUPPORTED_LANGUAGE_MAP
    else:
        lang_map = defaultdict(lambda: lang_code)
        lang_map.update(SUPPORTED_LANGUAGE_MAP.get(lang_code) or {})
        return lang_map
Example #4
0
def get_supported_language_map(lang_code=None):
    lang_code = lcode_to_ietf(lang_code)
    global SUPPORTED_LANGUAGE_MAP
    if not SUPPORTED_LANGUAGE_MAP:
        with open(SUPPORTED_LANGUAGES_FILEPATH) as f:
            SUPPORTED_LANGUAGE_MAP = json.loads(f.read())

    if not lang_code:
        return SUPPORTED_LANGUAGE_MAP
    else:
        lang_map = defaultdict(lambda: lang_code)
        lang_map.update(SUPPORTED_LANGUAGE_MAP.get(lang_code) or {})
        return lang_map
def update_translations(lang_codes=None,
                        download_kalite_translations=True,
                        download_ka_translations=True,
                        zip_file=None,
                        ka_zip_file=None,
                        use_local=False,
                        version=SHORTVERSION):
    """
    Download translations (if necessary), repurpose them into needed files,
    then move the resulting files to the versioned storage directory.
    """
    package_metadata = {}

    if use_local:
        for lang_code in lang_codes:
            lang_code = lcode_to_ietf(lang_code)
            package_metadata[lang_code] = {}
            combined_po_file = get_po_build_path(lang_code, version=version)
            combined_metadata = get_po_metadata(combined_po_file)
            package_metadata[lang_code]["approved_translations"] = combined_metadata["approved_translations"]
            package_metadata[lang_code]["phrases"]               = combined_metadata["phrases"]

    else:
        logging.info("Downloading %s language(s)" % lang_codes)

        # Download latest UI translations from CrowdIn


        for lang_code in (lang_codes or [None]):

            lang_code = lcode_to_ietf(lang_code)
            lang_code_crowdin = get_supported_language_map(lang_code)['crowdin']
            if not lang_code_crowdin:
                logging.warning('Interface translations for %s are disabled for now' % lang_code)
                raise SkipTranslations

            # we make it a defaultdict so that if no value is present it's automatically 0
            package_metadata[lang_code] = defaultdict(
                lambda: 0,
                {
                    'approved_translations': 0,
                    'phrases': 0,
                    'kalite_ntranslations': 0,
                    'kalite_nphrases': 0,
                })                   # these values will likely yield the wrong values when download_kalite_translations == False.

            if not download_kalite_translations:
                logging.info("Skipping KA Lite translations")
                kalite_po_file = None
            else:
                logging.info("Downloading KA Lite translations...")
                kalite_po_file = download_latest_translations(
                    lang_code=lang_code_crowdin,
                    project_id=settings.CROWDIN_PROJECT_ID,
                    project_key=settings.CROWDIN_PROJECT_KEY,
                    zip_file=zip_file or (os.path.join(CROWDIN_CACHE_DIR, "kalite-%s.zip" % lang_code_crowdin) if settings.DEBUG else None),
                )

            # We have the po file, now get metadata.
            kalite_metadata = get_po_metadata(kalite_po_file)
            package_metadata[lang_code]["approved_translations"] = kalite_metadata["approved_translations"]
            package_metadata[lang_code]["phrases"]               = kalite_metadata["phrases"]
            package_metadata[lang_code]["kalite_ntranslations"]  = kalite_metadata["approved_translations"]
            package_metadata[lang_code]["kalite_nphrases"]       = kalite_metadata["phrases"]

            # Download Khan Academy translations too
            # (don't download po files for English, since original text is already in English,
            # and KA has English po files in their crowdin repo, but they're full of non-English text)
            if not download_ka_translations or lang_code == "en":
                logging.info("Skipping KA translations")
                combined_po_file = None
            else:
                logging.info("Downloading Khan Academy translations...")
                combined_po_file = download_latest_translations(
                    lang_code=lang_code_crowdin,
                    project_id=settings.KA_CROWDIN_PROJECT_ID,
                    project_key=settings.KA_CROWDIN_PROJECT_KEY,
                    zip_file=ka_zip_file or (os.path.join(CROWDIN_CACHE_DIR, "ka-%s.zip" % lang_code_crowdin) if settings.DEBUG else None),
                    combine_with_po_file=kalite_po_file,
                    rebuild=False,  # just to be friendly to KA--we shouldn't force a rebuild
                    download_type="ka",
                )

            # we have the po file; now
            ka_metadata = get_po_metadata(combined_po_file)
            package_metadata[lang_code]["approved_translations"] = ka_metadata["approved_translations"]
            package_metadata[lang_code]["phrases"]               = ka_metadata["phrases"]
            package_metadata[lang_code]["ka_ntranslations"]      = ka_metadata["approved_translations"] - package_metadata[lang_code]["kalite_ntranslations"]
            package_metadata[lang_code]["ka_nphrases"]           = ka_metadata["phrases"] - package_metadata[lang_code]["kalite_nphrases"]


            # here we compute the percent translated
            if download_ka_translations or download_kalite_translations:
                pmlc = package_metadata[lang_code] # shorter name, less characters
                if pmlc['kalite_nphrases'] + pmlc['ka_nphrases'] == 0:
                    pmlc['percent_translated'] = 0
                else:
                    pmlc["percent_translated"] = 100. * (pmlc['kalite_ntranslations'] + pmlc['ka_ntranslations']) / float(pmlc['kalite_nphrases'] + pmlc['ka_nphrases'])


    return package_metadata
Example #6
0
def update_translations(lang_codes=None,
                        download_kalite_translations=True,
                        download_ka_translations=True,
                        zip_file=None,
                        ka_zip_file=None,
                        use_local=False,
                        version=VERSION):
    """
    Download translations (if necessary), repurpose them into needed files,
    then move the resulting files to the versioned storage directory.
    """
    package_metadata = {}

    if use_local:
        for lang_code in lang_codes:
            lang_code = lcode_to_ietf(lang_code)
            package_metadata[lang_code] = {}
            combined_po_file = get_po_build_path(lang_code, version=version)
            combined_metadata = get_po_metadata(combined_po_file)
            package_metadata[lang_code]["approved_translations"] = combined_metadata["approved_translations"]
            package_metadata[lang_code]["phrases"]               = combined_metadata["phrases"]

    else:
        logging.info("Downloading %s language(s)" % lang_codes)

        # Download latest UI translations from CrowdIn


        for lang_code in (lang_codes or [None]):
            lang_code = lcode_to_ietf(lang_code)
            lang_code_crowdin = get_supported_language_map(lang_code)['crowdin']
            if not lang_code_crowdin:
                logging.warning('Interface translations for %s are disabled for now' % lang_code)
                raise SkipTranslations

            # we make it a defaultdict so that if no value is present it's automatically 0
            package_metadata[lang_code] = defaultdict(
                lambda: 0,
                {
                    'approved_translations': 0,
                    'phrases': 0,
                    'kalite_ntranslations': 0,
                    'kalite_nphrases': 0,
                })                   # these values will likely yield the wrong values when download_kalite_translations == False.

            if not download_kalite_translations:
                logging.info("Skipping KA Lite translations")
                kalite_po_file = None
            else:
                logging.info("Downloading KA Lite translations...")
                kalite_po_file = download_latest_translations(
                    lang_code=lang_code_crowdin,
                    project_id=settings.CROWDIN_PROJECT_ID,
                    project_key=settings.CROWDIN_PROJECT_KEY,
                    zip_file=zip_file or (os.path.join(CROWDIN_CACHE_DIR, "kalite-%s.zip" % lang_code_crowdin) if settings.DEBUG else None),
                )

            # We have the po file, now get metadata.
            kalite_metadata = get_po_metadata(kalite_po_file)
            package_metadata[lang_code]["approved_translations"] = kalite_metadata["approved_translations"]
            package_metadata[lang_code]["phrases"]               = kalite_metadata["phrases"]
            package_metadata[lang_code]["kalite_ntranslations"]  = kalite_metadata["approved_translations"]
            package_metadata[lang_code]["kalite_nphrases"]       = kalite_metadata["phrases"]

            # Download Khan Academy translations too
            if not download_ka_translations:
                logging.info("Skipping KA translations")
                combined_po_file = None
            else:
                logging.info("Downloading Khan Academy translations...")
                combined_po_file = download_latest_translations(
                    lang_code=lang_code_crowdin,
                    project_id=settings.KA_CROWDIN_PROJECT_ID,
                    project_key=settings.KA_CROWDIN_PROJECT_KEY,
                    zip_file=ka_zip_file or (os.path.join(CROWDIN_CACHE_DIR, "ka-%s.zip" % lang_code_crowdin) if settings.DEBUG else None),
                    combine_with_po_file=kalite_po_file,
                    rebuild=False,  # just to be friendly to KA--we shouldn't force a rebuild
                    download_type="ka",
                )

            # we have the po file; now
            ka_metadata = get_po_metadata(combined_po_file)
            package_metadata[lang_code]["approved_translations"] = ka_metadata["approved_translations"]
            package_metadata[lang_code]["phrases"]               = ka_metadata["phrases"]
            package_metadata[lang_code]["ka_ntranslations"]      = ka_metadata["approved_translations"] - package_metadata[lang_code]["kalite_ntranslations"]
            package_metadata[lang_code]["ka_nphrases"]           = ka_metadata["phrases"] - package_metadata[lang_code]["kalite_nphrases"]


            # here we compute the percent translated
            if download_ka_translations or download_kalite_translations:
                pmlc = package_metadata[lang_code] # shorter name, less characters
                if pmlc['kalite_nphrases'] == pmlc['ka_nphrases'] == 0:
                    pmlc['percent_translated'] = 0
                else:
                    pmlc["percent_translated"] = 100. * (pmlc['kalite_ntranslations'] + pmlc['ka_ntranslations']) / float(pmlc['kalite_nphrases'] + pmlc['ka_nphrases'])


            # english is always 100% translated
            if lang_code == 'en':
                pmlc['percent_translated'] = 100

    return package_metadata