def retrieve_language_resources(version: str, sublangargs: dict, no_subtitles: bool) -> LangpackResources: node_data = retrieve_kalite_data(lang=sublangargs["content_lang"], force=True) video_ids = [node.get("id") for node in node_data if node.get("kind") == "Video"] subtitle_data = retrieve_subtitles(video_ids, sublangargs["subtitle_lang"]) if not no_subtitles else {} # retrieve KA Lite po files from CrowdIn interface_lang = sublangargs["interface_lang"] if interface_lang == "en": kalite_catalog = Catalog() ka_catalog = Catalog() else: crowdin_project_name = "ka-lite" crowdin_secret_key = os.environ["KALITE_CROWDIN_SECRET_KEY"] includes = "*{}*.po".format(version) kalite_catalog = retrieve_translations(crowdin_project_name, crowdin_secret_key, lang_code=sublangargs["interface_lang"], includes=includes, force=True) # retrieve Khan Academy po files from CrowdIn crowdin_project_name = "khanacademy" crowdin_secret_key = os.environ["KA_CROWDIN_SECRET_KEY"] includes = [] ka_catalog = retrieve_translations(crowdin_project_name, crowdin_secret_key, lang_code=sublangargs["interface_lang"], force=True) return LangpackResources(node_data, subtitle_data, kalite_catalog, ka_catalog)
def retrieve_language_resources(version: str, sublangargs: dict, ka_domain: str, no_subtitles: bool, no_dubbed_videos: bool) -> LangpackResources: node_data = retrieve_kalite_data(lang=sublangargs["content_lang"], force=True, ka_domain=ka_domain, no_dubbed_videos=no_dubbed_videos) subtitle_data = [] # retrieve KA Lite po files from CrowdIn interface_lang = sublangargs["interface_lang"] if interface_lang == EN_LANG_CODE: ka_catalog = Catalog() else: # retrieve Khan Academy po files from CrowdIn crowdin_project_name = "khanacademy" crowdin_secret_key = os.environ["KA_CROWDIN_SECRET_KEY"] includes = [] ka_catalog = retrieve_translations( crowdin_project_name, crowdin_secret_key, lang_code=sublangargs["interface_lang"], force=True) return LangpackResources(node_data, subtitle_data, ka_catalog)
def retrieve_translations(crowdin_project_name, crowdin_secret_key, lang_code=EN_LANG_CODE, force=False, includes="*.po") -> Catalog: request_url_template = ("https://api.crowdin.com/api/" "project/{project_id}/download/" "{lang_code}.zip?key={key}") export_url_template = ("https://api.crowdin.com/api/" "project/{project_id}/export/" "{lang_code}.zip?key={key}") request_url = request_url_template.format( project_id=crowdin_project_name, lang_code=lang_code, key=crowdin_secret_key, ) export_url = request_url_template.format( project_id=crowdin_project_name, lang_code=lang_code, key=crowdin_secret_key, ) logging.info("requesting CrowdIn to rebuild latest translations.") try: requests.get(export_url) except requests.exceptions.RequestException as e: logging.warning( "Got exception when building CrowdIn translations: {}".format(e)) logging.debug("Retrieving translations from {}".format(request_url)) zip_path = download_and_cache_file(request_url, ignorecache=force) zip_extraction_path = tempfile.mkdtemp() with zipfile.ZipFile(zip_path) as zf: zf.extractall(zip_extraction_path) all_filenames = glob.iglob(os.path.join(zip_extraction_path, "**"), recursive=True) filenames = fnmatch.filter(all_filenames, includes) # use the polib library, since it's much faster at concatenating # po files. it doesn't have a dict interface though, so we'll # reread the file using babel.Catalog. with tempfile.NamedTemporaryFile() as f: main_pofile = polib.POFile(fpath=f.name) for filename in filenames: pofile = polib.pofile(filename) main_pofile.merge(pofile) for entry in main_pofile: entry.obsolete = False main_pofile.save() shutil.rmtree(zip_extraction_path) msgid_mapping = Catalog(main_pofile) return msgid_mapping