def assign_country_codes(filtered_languages, all_languages): sorted_languages = sorted( all_languages, key=lambda lang: len(all_languages[lang].get('counter', [])), reverse=True) previous_lang = None previous_code = None countries = 0 for current_code in sorted(filtered_languages): current_lang = current_code.split('-')[0] # count country codes per language if current_lang == previous_lang: countries += 1 else: if previous_lang is not None: # if language has no single country code if countries == 0: # try to get country code with most supported engines for l in sorted_languages: l_parts = l.split('-') if len(l_parts) == 2 and l_parts[0] == previous_lang: filtered_languages[l] = all_languages[l] filtered_languages[l]['country'] = '' countries = 1 break if countries == 0: # get most likely country code from babel subtags = get_global('likely_subtags').get( previous_lang) if subtags: subtag_parts = subtags.split('_') new_code = subtag_parts[0] + '-' + subtag_parts[-1] filtered_languages[new_code] = all_languages[ previous_lang] countries = 1 if countries == 1: # remove countryless version of language if there's only one country del filtered_languages[previous_lang] if previous_code in filtered_languages: filtered_languages[previous_code]['country'] = '' countries = 0 previous_lang = current_lang previous_code = current_code
def assign_country_codes(filtered_languages, all_languages): sorted_languages = sorted(all_languages, key=lambda lang: len(all_languages[lang].get('counter', [])), reverse=True) previous_lang = None previous_code = None countries = 0 for current_code in sorted(filtered_languages): current_lang = current_code.split('-')[0] # count country codes per language if current_lang == previous_lang: countries += 1 else: if previous_lang is not None: # if language has no single country code if countries == 0: # try to get country code with most supported engines for l in sorted_languages: l_parts = l.split('-') if len(l_parts) == 2 and l_parts[0] == previous_lang: filtered_languages[l] = all_languages[l] filtered_languages[l]['country'] = '' countries = 1 break if countries == 0: # get most likely country code from babel subtags = get_global('likely_subtags').get(previous_lang) if subtags: subtag_parts = subtags.split('_') new_code = subtag_parts[0] + '-' + subtag_parts[-1] filtered_languages[new_code] = all_languages[previous_lang] countries = 1 if countries == 1: # remove countryless version of language if there's only one country del filtered_languages[previous_lang] if previous_code in filtered_languages: filtered_languages[previous_code]['country'] = '' countries = 0 previous_lang = current_lang previous_code = current_code
def filter_language_list(all_languages): min_engines_per_lang = 15 min_engines_per_country = 10 main_engines = [ engine_name for engine_name in engines.keys() if 'general' in engines[engine_name].categories and engines[engine_name].supported_languages and not engines[engine_name].disabled ] # filter list to include only languages supported by most engines or all default general engines filtered_languages = { code: lang for code, lang in all_languages.items() if (len(lang['counter']) >= min_engines_per_lang or all( main_engine in lang['counter'] for main_engine in main_engines)) } def _copy_lang_data(lang, country_name=None): new_dict = dict() new_dict['name'] = all_languages[lang]['name'] new_dict['english_name'] = all_languages[lang]['english_name'] if country_name: new_dict['country_name'] = country_name return new_dict def _country_count(i): return len(countries[sorted_countries[i]]['counter']) # for each language get country codes supported by most engines or at least one country code filtered_languages_with_countries = dict() for lang, lang_data in filtered_languages.items(): countries = lang_data['countries'] filtered_countries = dict() # get language's country codes with enough supported engines for lang_country, country_data in countries.items(): if len(country_data['counter']) >= min_engines_per_country: filtered_countries[lang_country] = _copy_lang_data( lang, country_data['country_name']) # add language without countries too if there's more than one country to choose from if len(filtered_countries) > 1: filtered_countries[lang] = _copy_lang_data(lang) elif len(filtered_countries) == 1: # if there's only one country per language, it's not necessary to show country name lang_country = next(iter(filtered_countries)) filtered_countries[lang_country]['country_name'] = None # if no country has enough engines try to get most likely country code from babel if not filtered_countries: lang_country = None subtags = get_global('likely_subtags').get(lang) if subtags: country_code = subtags.split('_')[-1] if len(country_code) == 2: lang_country = "{lang}-{country}".format( lang=lang, country=country_code) if lang_country: filtered_countries[lang_country] = _copy_lang_data(lang) else: filtered_countries[lang] = _copy_lang_data(lang) filtered_languages_with_countries.update(filtered_countries) return filtered_languages_with_countries