def command_gen_subset_fonts(): """ Creates custom fonts that attempt to contain all the glyphs and other font features that are used in user-facing text for the translation in each language. We make a separate subset font for common strings, which generally overaps somewhat with the individual language subsets. This slightly increases how much the client needs to download on first request, but reduces Kolibri's distribution size by a couple megabytes. """ logging.info("generating subset fonts...") _clean_up(SCOPE_COMMON) _clean_up(SCOPE_SUBSET) _subset_and_merge_fonts( text=" ".join(_get_common_strings()), default_font=NOTO_SANS_LATIN, subset_reg_path=_woff_font_path(SCOPE_COMMON, is_bold=False), subset_bold_path=_woff_font_path(SCOPE_COMMON, is_bold=True), ) languages = utils.supported_languages(include_in_context=True, include_english=True) for lang_info in languages: logging.info("gen subset for {}".format(lang_info[utils.KEY_ENG_NAME])) strings = [] strings.extend(_get_lang_strings(utils.local_locale_path(lang_info))) strings.extend( _get_lang_strings(utils.local_perseus_locale_path(lang_info))) name = lang_info[utils.KEY_INTL_CODE] _subset_and_merge_fonts( text=" ".join(strings), default_font=lang_info[utils.KEY_DEFAULT_FONT], subset_reg_path=_woff_font_path(_scoped(SCOPE_SUBSET, name), is_bold=False), subset_bold_path=_woff_font_path(_scoped(SCOPE_SUBSET, name), is_bold=True), ) # generate common subset file _generate_inline_font_css(name=SCOPE_COMMON, font_family=SCOPE_COMMON) # generate language-specific subset font files languages = utils.supported_languages(include_in_context=True, include_english=True) for lang in languages: _generate_inline_font_css( name=_scoped(SCOPE_SUBSET, lang[utils.KEY_INTL_CODE]), font_family=SCOPE_SUBSET, ) logging.info("subsets created")
def pretranslate(branch, approve_all=False): """ Apply pre-translation to the given branch """ checkApiKey() params = [] files = [ "{}/{}".format(branch, f) for f in crowdin_files(branch, get_crowdin_details()) ] params.extend([("files[]", file) for file in files]) codes = [ lang[utils.KEY_CROWDIN_CODE] for lang in utils.supported_languages() ] params.extend([("languages[]", code) for code in codes]) msg = ( "Crowdin: pre-translating and pre-approving untranslated matches in '{}'..." if approve_all else "Crowdin: pre-translating untranslated matches in '{}'...") msg += "\n\tNote that this operation can take a long time and may time out." msg += "\n\tYou should see the results on Crowdin eventually..." logging.info(msg.format(branch)) r = requests.post( PRETRANSLATE_URL.format(approve_option=0 if approve_all else 1), params=params) r.raise_for_status() logging.info("Crowdin: succeeded!")
def main(title, message, link_text): """ Generate JSON suitable for sending in nutrition facts notifications """ supported_languages = utils.supported_languages(include_in_context=False, include_english=True) output = {} for lang_object in supported_languages: file_path = os.path.join(utils.local_locale_path(lang_object), FILE_NAME) i18n = {} with open(file_path) as f: input_data = json.load(f) if title in input_data: i18n[I18N_TITLE] = input_data[title] if message in input_data: i18n[I18N_MESSAGE] = input_data[message] if link_text in input_data: i18n[I18N_LINK_TEXT] = input_data[link_text] output[lang_object[utils.KEY_INTL_CODE]] = i18n # output JSON print( json.dumps(output, sort_keys=True, indent=2, separators=(",", ": "), ensure_ascii=False))
def command_download(branch): """ Downloads and updates the local translation files from the given branch on Crowdin """ logging.info("Crowdin: downloading '{}'...".format(branch)) # delete previous files _wipe_translations(utils.LOCALE_PATH) _wipe_translations(utils.PERSEUS_LOCALE_PATH) for lang_object in utils.supported_languages(include_in_context=True): code = lang_object[utils.KEY_CROWDIN_CODE] url = DOWNLOAD_URL.format(language=code, branch=branch) r = requests.get(url) r.raise_for_status() z = zipfile.ZipFile(io.BytesIO(r.content)) target = utils.local_locale_path(lang_object) logging.info("\tExtracting {} to {}".format(code, target)) z.extractall(target) # hack for perseus perseus_target = utils.local_perseus_locale_path(lang_object) if not os.path.exists(perseus_target): os.makedirs(perseus_target) shutil.move( os.path.join(target, PERSEUS_FILE), os.path.join(perseus_target, PERSEUS_FILE), ) _format_json_files() # clean them up to make git diffs more meaningful logging.info("Crowdin: download succeeded!")
def _font_priorities(default_font): """ Given a default font, return a list of all possible font names roughly in the order that we ought to look for glyphs in. Many fonts contain overlapping sets of glyphs. Without doing this: we risk loading a bunch of random font files just because they happen to contain one of the glyphs, and we also risk loading the 'wrong' version of the glyphs if they happen to differ. """ # start with the default font_names = [default_font] # look in the latin set next if default_font is not NOTO_SANS_LATIN: font_names.append(NOTO_SANS_LATIN) # then look at the rest of the supported languages' default fonts for lang_info in utils.supported_languages(): name = lang_info[utils.KEY_DEFAULT_FONT] if name not in font_names: font_names.append(name) # finally look at the remaining langauges font_names.extend( [fn for fn in noto_source.FONT_MANIFEST if fn not in font_names]) return font_names
def _csv_to_json(): """ Convert all CSV json files to JSON and ensure consistent diffs with ordered keys """ for lang_object in utils.supported_languages(include_in_context=True): locale_path = utils.local_locale_path(lang_object) perseus_path = utils.local_perseus_locale_path(lang_object) csv_locale_dir_path = os.path.join( utils.local_locale_csv_path(), lang_object["crowdin_code"] ) perseus_locale_dir_path = os.path.join( utils.local_perseus_locale_csv_path(), lang_object["crowdin_code"] ) # Make sure that the Perseus directory for CSV_FILES/{lang_code} exists if not os.path.exists(perseus_locale_dir_path): os.makedirs(perseus_locale_dir_path) csv_dirs = os.listdir(csv_locale_dir_path) + os.listdir(perseus_locale_dir_path) for file_name in csv_dirs: if "csv" not in file_name: continue if file_name is PERSEUS_CSV: csv_path = os.path.join(perseus_locale_dir_path, file_name) else: csv_path = os.path.join(csv_locale_dir_path, file_name) # Account for csv reading differences in Pythons 2 and 3 try: newline = None if sys.version_info[0] < 3 else "" mode = "r+b" if sys.version_info[0] < 3 else "r" encoding = None if sys.version_info[0] < 3 else "utf-8" csv_file = io.open( csv_path, mode=mode, encoding=encoding, newline=newline ) except EnvironmentError as e: logging.info("Failed to find CSV file in: {}".format(csv_path)) continue with csv_file as f: csv_data = list(row for row in csv.DictReader(f)) data = _locale_data_from_csv(csv_data) if file_name is PERSEUS_CSV: utils.json_dump_formatted( data, perseus_path, file_name.replace("csv", "json") ) else: utils.json_dump_formatted( data, locale_path, file_name.replace("csv", "json") )
def upload_translations(branch): """ Upload translations to the given branch """ checkPerseus() checkApiKey() supported_languages = utils.supported_languages(include_in_context=False, include_english=False) for lang_object in supported_languages: _upload_translation(branch, lang_object)
def loadLang(): # from en_US to en system_lang = QtCore.QLocale.system().name()[:-3] _kwipe_trans = QtCore.QTranslator() _qt_trans = QtCore.QTranslator() for lang in utils.supported_languages(): if system_lang == lang.split('_')[1][:-3]: _kwipe_trans.load(lang, path_to_files + '/language/') _qt_trans.load( 'qt_' + QtCore.QLocale.system().name(), QtCore.QLibraryInfo.location(QtCore.QLibraryInfo.TranslationsPath)) return _kwipe_trans, _qt_trans
def command_gen_full_fonts(): logging.info("generating full fonts...") _clean_up(SCOPE_FULL) for font_name in noto_source.FONT_MANIFEST: _write_full_font(font_name, is_bold=False) _write_full_font(font_name, is_bold=True) languages = utils.supported_languages(include_in_context=True, include_english=True) for lang_info in languages: _gen_full_css_modern(lang_info) _gen_full_css_basic(lang_info) logging.info("finished generating full fonts")
def _format_json_files(): """ re-print all json files to ensure consistent diffs with ordered keys """ locale_paths = [] for lang_object in utils.supported_languages(include_in_context=True): locale_paths.append(utils.local_locale_path(lang_object)) locale_paths.append(utils.local_perseus_locale_path(lang_object)) for locale_path in locale_paths: for file_name in os.listdir(locale_path): if not file_name.endswith(".json"): continue file_path = os.path.join(locale_path, file_name) with io.open(file_path, mode="r", encoding="utf-8") as f: data = json.load(f) utils.json_dump_formatted(data, file_path)
def main(title, message, link_text): """ Generate JSON suitable for sending in nutrition facts notifications """ supported_languages = utils.supported_languages(include_in_context=False, include_english=True) output = {} for lang_object in supported_languages: file_path = os.path.join(utils.local_locale_path(lang_object), FILE_NAME) i18n = {} # If the language code is "en", parse csv file instead of json file. # Note that `make i18n-extract-frontend` should have been run to generate the csv file. if lang_object[utils.KEY_INTL_CODE] == "en": file_path = file_path.replace("json", "csv") with open(file_path) as f: csv_reader = csv.DictReader(f) for row in csv_reader: identifier = row["Identifier"] if title == identifier: i18n[I18N_TITLE] = row["Source String"] if message == identifier: i18n[I18N_MESSAGE] = row["Source String"] if link_text == identifier: i18n[I18N_LINK_TEXT] = row["Source String"] else: with open(file_path) as f: input_data = json.load(f) if title in input_data: i18n[I18N_TITLE] = input_data[title] if message in input_data: i18n[I18N_MESSAGE] = input_data[message] if link_text in input_data: i18n[I18N_LINK_TEXT] = input_data[link_text] output[lang_object[utils.KEY_INTL_CODE]] = i18n # output JSON print( json.dumps(output, sort_keys=True, indent=2, separators=(",", ": "), ensure_ascii=False))
def download_translations(branch): """ Download translations from the given branch """ checkPerseus() checkApiKey() logging.info("Crowdin: downloading '{}'...".format(branch)) # delete previous files _wipe_translations(utils.LOCALE_PATH) _wipe_translations(utils.PERSEUS_LOCALE_PATH) for lang_object in utils.supported_languages(include_in_context=True): code = lang_object[utils.KEY_CROWDIN_CODE] url = DOWNLOAD_URL.format(language=code, branch=branch) r = requests.get(url) r.raise_for_status() z = zipfile.ZipFile(io.BytesIO(r.content)) target = utils.local_locale_csv_path() logging.info("\tExtracting {} to {}".format(code, target)) z.extractall(target) # hack for perseus perseus_target = os.path.join(utils.local_perseus_locale_csv_path(), lang_object["crowdin_code"]) # TODO - Update this to work with perseus properly - likely to need to update # the kolibri-exercise-perseus-plugin repo directly to produce a CSV for its # translations. if not os.path.exists(perseus_target): os.makedirs(perseus_target) try: shutil.move( os.path.join(target, lang_object["crowdin_code"], PERSEUS_CSV), os.path.join(perseus_target, PERSEUS_CSV), ) except Exception as e: logging.error("Ignoring an exception") logging.error(e) pass # TODO Don't need to format here... going to do this in the new command. _csv_to_json() # clean them up to make git diffs more meaningful logging.info("Crowdin: download succeeded!")
def _format_json_files(): """ re-print all json files to ensure consistent diffs with ordered keys """ for lang_object in utils.supported_languages(include_in_context=True): locale_path = utils.local_locale_path(lang_object) perseus_path = utils.local_perseus_locale_path(lang_object) csv_locale_dir_path = os.path.join(utils.local_locale_csv_path(), lang_object["crowdin_code"]) for file_name in os.listdir(csv_locale_dir_path): if file_name.endswith("json"): # Then it is a Perseus JSON file - just copy it. source = os.path.join(csv_locale_dir_path, file_name) target = os.path.join(perseus_path, file_name) try: os.makedirs(perseus_path) except: pass shutil.copyfile(source, target) continue elif not file_name.endswith("csv"): continue csv_path = os.path.join(csv_locale_dir_path, file_name) # Account for csv reading differences in Pythons 2 and 3 if sys.version_info[0] < 3: csv_file = open(csv_path, "rb") else: csv_file = open(csv_path, "r", newline="") with csv_file as f: csv_data = list(row for row in csv.DictReader(f)) data = _locale_data_from_csv(csv_data) utils.json_dump_formatted(data, locale_path, file_name.replace("csv", "json"))
def _get_common_strings(): """ Text useful for all languages: displaying the language switcher, Kolibri version numbers, symbols, and other un-translated text """ # Special characters that are used directly in untranslated template strings. # Search the codebase with this regex to find new ones: [^\x00-\x7F©–—…‘’“”•→›] strings = [ chr(0x0), # null "©", "–", # en dash "—", # em dash "…", "‘", "’", "“", "”", "•", "●", "→", "›", ] # all the basic printable ascii characters strings.extend([chr(c) for c in range(32, 127)]) # text from language names, both lower- and upper-case languages = utils.supported_languages(include_in_context=True, include_english=True) for lang in languages: strings.append(lang[utils.KEY_LANG_NAME]) strings.append(lang[utils.KEY_LANG_NAME].upper()) strings.append(lang[utils.KEY_ENG_NAME]) strings.append(lang[utils.KEY_ENG_NAME].upper()) return strings
def translation_stats(branch): """ Print stats for the given branch """ checkApiKey() logging.info("Crowdin: getting details for '{}'...".format(branch)) def _is_branch_node(node): return node["node_type"] == "branch" and node["name"] == branch needs_approval_table = [] strings_total = 0 untranslated_table = [] words_total = 0 sorted_languages = sorted(utils.supported_languages(), key=lambda x: x[utils.KEY_ENG_NAME]) for lang in sorted_languages: logging.info("Retrieving stats for {}...".format( lang[utils.KEY_ENG_NAME])) r = requests.post( LANG_STATUS_URL.format(language=lang[utils.KEY_CROWDIN_CODE])) r.raise_for_status() try: branch_node = next(node for node in r.json()["files"] if _is_branch_node(node)) except StopIteration: logging.error("Branch '{}' not found on Crowdin".format(branch)) sys.exit(1) needs_approval_table.append(( lang[utils.KEY_ENG_NAME], branch_node["words_translated"] - branch_node["words_approved"], branch_node["translated"] - branch_node["approved"], )) untranslated_table.append(( lang[utils.KEY_ENG_NAME], branch_node["words"] - branch_node["words_translated"], branch_node["phrases"] - branch_node["translated"], )) strings_total = branch_node[ "phrases"] # should be the same across languages words_total = branch_node[ "words"] # should be the same across languages total_untranslated_strings = sum([row[2] for row in untranslated_table]) total_unapproved_strings = sum([row[2] for row in needs_approval_table]) avg_untranslated_strings = round(total_untranslated_strings / len(untranslated_table)) avg_unapproved_strings = round(total_unapproved_strings / len(needs_approval_table)) total_untranslated_words = sum([row[1] for row in untranslated_table]) total_unapproved_words = sum([row[1] for row in needs_approval_table]) avg_untranslated_words = round(total_untranslated_words / len(untranslated_table)) avg_unapproved_words = round(total_unapproved_words / len(needs_approval_table)) summary_table_headers = ["", "Words", "Strings"] summary_table = [ ("Avg. Untranslated", avg_untranslated_words, avg_untranslated_strings), ("Avg. Needs Approval", avg_unapproved_words, avg_unapproved_strings), ("Total (for a new language)", words_total, strings_total), ] needs_approval_table_headers = ["Language", "Words", "Strings"] untranslated_table_headers = ["Language", "Words", "Strings"] logging.info( STATS_TEMPLATE.format( branch=branch, summary_table=tabulate(summary_table, headers=summary_table_headers), untranslated_table=tabulate(untranslated_table, headers=untranslated_table_headers), needs_approval_table=tabulate( needs_approval_table, headers=needs_approval_table_headers), ))
def command_upload_translations(branch): supported_languages = utils.supported_languages( include_in_context=False, include_english=False ) for lang_object in supported_languages: _upload_translation(branch, lang_object)