def divide_videos_by_language(youtube_ids): """Utility function for separating a list of youtube ids into a dictionary of lists, separated by video language (as determined by the current dubbed video map) """ buckets_by_lang = defaultdict(lambda: []) for y_id in youtube_ids: buckets_by_lang[get_video_language(y_id)].append(y_id) return buckets_by_lang
def get_supported_language_map(lang_code=None): lang_code = lcode_to_ietf(lang_code) global SUPPORTED_LANGUAGE_MAP if not SUPPORTED_LANGUAGE_MAP: with open(SUPPORTED_LANGUAGES_FILEPATH) as f: SUPPORTED_LANGUAGE_MAP = json.loads(f.read()) if not lang_code: return SUPPORTED_LANGUAGE_MAP else: lang_map = defaultdict(lambda: lang_code) lang_map.update(SUPPORTED_LANGUAGE_MAP.get(lang_code) or {}) return lang_map
def update_translations(lang_codes=None, download_kalite_translations=True, download_ka_translations=True, zip_file=None, ka_zip_file=None, use_local=False, version=SHORTVERSION): """ Download translations (if necessary), repurpose them into needed files, then move the resulting files to the versioned storage directory. """ package_metadata = {} if use_local: for lang_code in lang_codes: lang_code = lcode_to_ietf(lang_code) package_metadata[lang_code] = {} combined_po_file = get_po_build_path(lang_code, version=version) combined_metadata = get_po_metadata(combined_po_file) package_metadata[lang_code]["approved_translations"] = combined_metadata["approved_translations"] package_metadata[lang_code]["phrases"] = combined_metadata["phrases"] else: logging.info("Downloading %s language(s)" % lang_codes) # Download latest UI translations from CrowdIn for lang_code in (lang_codes or [None]): lang_code = lcode_to_ietf(lang_code) lang_code_crowdin = get_supported_language_map(lang_code)['crowdin'] if not lang_code_crowdin: logging.warning('Interface translations for %s are disabled for now' % lang_code) raise SkipTranslations # we make it a defaultdict so that if no value is present it's automatically 0 package_metadata[lang_code] = defaultdict( lambda: 0, { 'approved_translations': 0, 'phrases': 0, 'kalite_ntranslations': 0, 'kalite_nphrases': 0, }) # these values will likely yield the wrong values when download_kalite_translations == False. if not download_kalite_translations: logging.info("Skipping KA Lite translations") kalite_po_file = None else: logging.info("Downloading KA Lite translations...") kalite_po_file = download_latest_translations( lang_code=lang_code_crowdin, project_id=settings.CROWDIN_PROJECT_ID, project_key=settings.CROWDIN_PROJECT_KEY, zip_file=zip_file or (os.path.join(CROWDIN_CACHE_DIR, "kalite-%s.zip" % lang_code_crowdin) if settings.DEBUG else None), ) # We have the po file, now get metadata. kalite_metadata = get_po_metadata(kalite_po_file) package_metadata[lang_code]["approved_translations"] = kalite_metadata["approved_translations"] package_metadata[lang_code]["phrases"] = kalite_metadata["phrases"] package_metadata[lang_code]["kalite_ntranslations"] = kalite_metadata["approved_translations"] package_metadata[lang_code]["kalite_nphrases"] = kalite_metadata["phrases"] # Download Khan Academy translations too # (don't download po files for English, since original text is already in English, # and KA has English po files in their crowdin repo, but they're full of non-English text) if not download_ka_translations or lang_code == "en": logging.info("Skipping KA translations") combined_po_file = None else: logging.info("Downloading Khan Academy translations...") combined_po_file = download_latest_translations( lang_code=lang_code_crowdin, project_id=settings.KA_CROWDIN_PROJECT_ID, project_key=settings.KA_CROWDIN_PROJECT_KEY, zip_file=ka_zip_file or (os.path.join(CROWDIN_CACHE_DIR, "ka-%s.zip" % lang_code_crowdin) if settings.DEBUG else None), combine_with_po_file=kalite_po_file, rebuild=False, # just to be friendly to KA--we shouldn't force a rebuild download_type="ka", ) # we have the po file; now ka_metadata = get_po_metadata(combined_po_file) package_metadata[lang_code]["approved_translations"] = ka_metadata["approved_translations"] package_metadata[lang_code]["phrases"] = ka_metadata["phrases"] package_metadata[lang_code]["ka_ntranslations"] = ka_metadata["approved_translations"] - package_metadata[lang_code]["kalite_ntranslations"] package_metadata[lang_code]["ka_nphrases"] = ka_metadata["phrases"] - package_metadata[lang_code]["kalite_nphrases"] # here we compute the percent translated if download_ka_translations or download_kalite_translations: pmlc = package_metadata[lang_code] # shorter name, less characters if pmlc['kalite_nphrases'] + pmlc['ka_nphrases'] == 0: pmlc['percent_translated'] = 0 else: pmlc["percent_translated"] = 100. * (pmlc['kalite_ntranslations'] + pmlc['ka_ntranslations']) / float(pmlc['kalite_nphrases'] + pmlc['ka_nphrases']) return package_metadata
def update_translations(lang_codes=None, download_kalite_translations=True, download_ka_translations=True, zip_file=None, ka_zip_file=None, use_local=False, version=VERSION): """ Download translations (if necessary), repurpose them into needed files, then move the resulting files to the versioned storage directory. """ package_metadata = {} if use_local: for lang_code in lang_codes: lang_code = lcode_to_ietf(lang_code) package_metadata[lang_code] = {} combined_po_file = get_po_build_path(lang_code, version=version) combined_metadata = get_po_metadata(combined_po_file) package_metadata[lang_code]["approved_translations"] = combined_metadata["approved_translations"] package_metadata[lang_code]["phrases"] = combined_metadata["phrases"] else: logging.info("Downloading %s language(s)" % lang_codes) # Download latest UI translations from CrowdIn for lang_code in (lang_codes or [None]): lang_code = lcode_to_ietf(lang_code) lang_code_crowdin = get_supported_language_map(lang_code)['crowdin'] if not lang_code_crowdin: logging.warning('Interface translations for %s are disabled for now' % lang_code) raise SkipTranslations # we make it a defaultdict so that if no value is present it's automatically 0 package_metadata[lang_code] = defaultdict( lambda: 0, { 'approved_translations': 0, 'phrases': 0, 'kalite_ntranslations': 0, 'kalite_nphrases': 0, }) # these values will likely yield the wrong values when download_kalite_translations == False. if not download_kalite_translations: logging.info("Skipping KA Lite translations") kalite_po_file = None else: logging.info("Downloading KA Lite translations...") kalite_po_file = download_latest_translations( lang_code=lang_code_crowdin, project_id=settings.CROWDIN_PROJECT_ID, project_key=settings.CROWDIN_PROJECT_KEY, zip_file=zip_file or (os.path.join(CROWDIN_CACHE_DIR, "kalite-%s.zip" % lang_code_crowdin) if settings.DEBUG else None), ) # We have the po file, now get metadata. kalite_metadata = get_po_metadata(kalite_po_file) package_metadata[lang_code]["approved_translations"] = kalite_metadata["approved_translations"] package_metadata[lang_code]["phrases"] = kalite_metadata["phrases"] package_metadata[lang_code]["kalite_ntranslations"] = kalite_metadata["approved_translations"] package_metadata[lang_code]["kalite_nphrases"] = kalite_metadata["phrases"] # Download Khan Academy translations too if not download_ka_translations: logging.info("Skipping KA translations") combined_po_file = None else: logging.info("Downloading Khan Academy translations...") combined_po_file = download_latest_translations( lang_code=lang_code_crowdin, project_id=settings.KA_CROWDIN_PROJECT_ID, project_key=settings.KA_CROWDIN_PROJECT_KEY, zip_file=ka_zip_file or (os.path.join(CROWDIN_CACHE_DIR, "ka-%s.zip" % lang_code_crowdin) if settings.DEBUG else None), combine_with_po_file=kalite_po_file, rebuild=False, # just to be friendly to KA--we shouldn't force a rebuild download_type="ka", ) # we have the po file; now ka_metadata = get_po_metadata(combined_po_file) package_metadata[lang_code]["approved_translations"] = ka_metadata["approved_translations"] package_metadata[lang_code]["phrases"] = ka_metadata["phrases"] package_metadata[lang_code]["ka_ntranslations"] = ka_metadata["approved_translations"] - package_metadata[lang_code]["kalite_ntranslations"] package_metadata[lang_code]["ka_nphrases"] = ka_metadata["phrases"] - package_metadata[lang_code]["kalite_nphrases"] # here we compute the percent translated if download_ka_translations or download_kalite_translations: pmlc = package_metadata[lang_code] # shorter name, less characters if pmlc['kalite_nphrases'] == pmlc['ka_nphrases'] == 0: pmlc['percent_translated'] = 0 else: pmlc["percent_translated"] = 100. * (pmlc['kalite_ntranslations'] + pmlc['ka_ntranslations']) / float(pmlc['kalite_nphrases'] + pmlc['ka_nphrases']) # english is always 100% translated if lang_code == 'en': pmlc['percent_translated'] = 100 return package_metadata