def retrieve_language_resources(version: str, sublangargs: dict, no_subtitles: bool) -> LangpackResources:
    node_data = retrieve_kalite_data(lang=sublangargs["content_lang"], force=True)

    video_ids = [node.get("id") for node in node_data if node.get("kind") == "Video"]
    subtitle_data = retrieve_subtitles(video_ids, sublangargs["subtitle_lang"]) if not no_subtitles else {}

    # retrieve KA Lite po files from CrowdIn
    interface_lang = sublangargs["interface_lang"]
    if interface_lang == "en":
        kalite_catalog = Catalog()
        ka_catalog = Catalog()
    else:
        crowdin_project_name = "ka-lite"
        crowdin_secret_key = os.environ["KALITE_CROWDIN_SECRET_KEY"]

        includes = "*{}*.po".format(version)
        kalite_catalog = retrieve_translations(crowdin_project_name, crowdin_secret_key,
                                               lang_code=sublangargs["interface_lang"], includes=includes, force=True)

        # retrieve Khan Academy po files from CrowdIn
        crowdin_project_name = "khanacademy"
        crowdin_secret_key = os.environ["KA_CROWDIN_SECRET_KEY"]
        includes = []
        ka_catalog = retrieve_translations(crowdin_project_name, crowdin_secret_key,
                                           lang_code=sublangargs["interface_lang"], force=True)

    return LangpackResources(node_data, subtitle_data, kalite_catalog, ka_catalog)
Exemple #2
0
def retrieve_language_resources(version: str, sublangargs: dict,
                                ka_domain: str, no_subtitles: bool,
                                no_dubbed_videos: bool) -> LangpackResources:
    node_data = retrieve_kalite_data(lang=sublangargs["content_lang"],
                                     force=True,
                                     ka_domain=ka_domain,
                                     no_dubbed_videos=no_dubbed_videos)

    subtitle_data = []

    # retrieve KA Lite po files from CrowdIn
    interface_lang = sublangargs["interface_lang"]
    if interface_lang == EN_LANG_CODE:
        ka_catalog = Catalog()
    else:
        # retrieve Khan Academy po files from CrowdIn
        crowdin_project_name = "khanacademy"
        crowdin_secret_key = os.environ["KA_CROWDIN_SECRET_KEY"]
        includes = []
        ka_catalog = retrieve_translations(
            crowdin_project_name,
            crowdin_secret_key,
            lang_code=sublangargs["interface_lang"],
            force=True)

    return LangpackResources(node_data, subtitle_data, ka_catalog)
Exemple #3
0
def retrieve_translations(crowdin_project_name,
                          crowdin_secret_key,
                          lang_code=EN_LANG_CODE,
                          force=False,
                          includes="*.po") -> Catalog:
    request_url_template = ("https://api.crowdin.com/api/"
                            "project/{project_id}/download/"
                            "{lang_code}.zip?key={key}")
    export_url_template = ("https://api.crowdin.com/api/"
                           "project/{project_id}/export/"
                           "{lang_code}.zip?key={key}")
    request_url = request_url_template.format(
        project_id=crowdin_project_name,
        lang_code=lang_code,
        key=crowdin_secret_key,
    )
    export_url = request_url_template.format(
        project_id=crowdin_project_name,
        lang_code=lang_code,
        key=crowdin_secret_key,
    )

    logging.info("requesting CrowdIn to rebuild latest translations.")
    try:
        requests.get(export_url)
    except requests.exceptions.RequestException as e:
        logging.warning(
            "Got exception when building CrowdIn translations: {}".format(e))

    logging.debug("Retrieving translations from {}".format(request_url))
    zip_path = download_and_cache_file(request_url, ignorecache=force)
    zip_extraction_path = tempfile.mkdtemp()

    with zipfile.ZipFile(zip_path) as zf:
        zf.extractall(zip_extraction_path)

    all_filenames = glob.iglob(os.path.join(zip_extraction_path, "**"),
                               recursive=True)
    filenames = fnmatch.filter(all_filenames, includes)

    # use the polib library, since it's much faster at concatenating
    # po files.  it doesn't have a dict interface though, so we'll
    # reread the file using babel.Catalog.
    with tempfile.NamedTemporaryFile() as f:
        main_pofile = polib.POFile(fpath=f.name)

        for filename in filenames:
            pofile = polib.pofile(filename)
            main_pofile.merge(pofile)

        for entry in main_pofile:
            entry.obsolete = False

        main_pofile.save()

    shutil.rmtree(zip_extraction_path)

    msgid_mapping = Catalog(main_pofile)

    return msgid_mapping