def assign_country_codes(filtered_languages, all_languages):
    sorted_languages = sorted(
        all_languages,
        key=lambda lang: len(all_languages[lang].get('counter', [])),
        reverse=True)
    previous_lang = None
    previous_code = None
    countries = 0
    for current_code in sorted(filtered_languages):
        current_lang = current_code.split('-')[0]

        # count country codes per language
        if current_lang == previous_lang:
            countries += 1

        else:
            if previous_lang is not None:
                # if language has no single country code
                if countries == 0:
                    # try to get country code with most supported engines
                    for l in sorted_languages:
                        l_parts = l.split('-')
                        if len(l_parts) == 2 and l_parts[0] == previous_lang:
                            filtered_languages[l] = all_languages[l]
                            filtered_languages[l]['country'] = ''
                            countries = 1
                            break

                    if countries == 0:
                        # get most likely country code from babel
                        subtags = get_global('likely_subtags').get(
                            previous_lang)
                        if subtags:
                            subtag_parts = subtags.split('_')
                            new_code = subtag_parts[0] + '-' + subtag_parts[-1]
                            filtered_languages[new_code] = all_languages[
                                previous_lang]
                            countries = 1

                if countries == 1:
                    # remove countryless version of language if there's only one country
                    del filtered_languages[previous_lang]
                    if previous_code in filtered_languages:
                        filtered_languages[previous_code]['country'] = ''

            countries = 0
            previous_lang = current_lang

        previous_code = current_code
Exemple #2
0
def assign_country_codes(filtered_languages, all_languages):
    sorted_languages = sorted(all_languages,
                              key=lambda lang: len(all_languages[lang].get('counter', [])),
                              reverse=True)
    previous_lang = None
    previous_code = None
    countries = 0
    for current_code in sorted(filtered_languages):
        current_lang = current_code.split('-')[0]

        # count country codes per language
        if current_lang == previous_lang:
            countries += 1

        else:
            if previous_lang is not None:
                # if language has no single country code
                if countries == 0:
                    # try to get country code with most supported engines
                    for l in sorted_languages:
                        l_parts = l.split('-')
                        if len(l_parts) == 2 and l_parts[0] == previous_lang:
                            filtered_languages[l] = all_languages[l]
                            filtered_languages[l]['country'] = ''
                            countries = 1
                            break

                    if countries == 0:
                        # get most likely country code from babel
                        subtags = get_global('likely_subtags').get(previous_lang)
                        if subtags:
                            subtag_parts = subtags.split('_')
                            new_code = subtag_parts[0] + '-' + subtag_parts[-1]
                            filtered_languages[new_code] = all_languages[previous_lang]
                            countries = 1

                if countries == 1:
                    # remove countryless version of language if there's only one country
                    del filtered_languages[previous_lang]
                    if previous_code in filtered_languages:
                        filtered_languages[previous_code]['country'] = ''

            countries = 0
            previous_lang = current_lang

        previous_code = current_code
Exemple #3
0
def filter_language_list(all_languages):
    min_engines_per_lang = 15
    min_engines_per_country = 10
    main_engines = [
        engine_name for engine_name in engines.keys()
        if 'general' in engines[engine_name].categories
        and engines[engine_name].supported_languages
        and not engines[engine_name].disabled
    ]

    # filter list to include only languages supported by most engines or all default general engines
    filtered_languages = {
        code: lang
        for code, lang in all_languages.items()
        if (len(lang['counter']) >= min_engines_per_lang or all(
            main_engine in lang['counter'] for main_engine in main_engines))
    }

    def _copy_lang_data(lang, country_name=None):
        new_dict = dict()
        new_dict['name'] = all_languages[lang]['name']
        new_dict['english_name'] = all_languages[lang]['english_name']
        if country_name:
            new_dict['country_name'] = country_name
        return new_dict

    def _country_count(i):
        return len(countries[sorted_countries[i]]['counter'])

    # for each language get country codes supported by most engines or at least one country code
    filtered_languages_with_countries = dict()
    for lang, lang_data in filtered_languages.items():
        countries = lang_data['countries']
        filtered_countries = dict()

        # get language's country codes with enough supported engines
        for lang_country, country_data in countries.items():
            if len(country_data['counter']) >= min_engines_per_country:
                filtered_countries[lang_country] = _copy_lang_data(
                    lang, country_data['country_name'])

        # add language without countries too if there's more than one country to choose from
        if len(filtered_countries) > 1:
            filtered_countries[lang] = _copy_lang_data(lang)
        elif len(filtered_countries) == 1:
            # if there's only one country per language, it's not necessary to show country name
            lang_country = next(iter(filtered_countries))
            filtered_countries[lang_country]['country_name'] = None

        # if no country has enough engines try to get most likely country code from babel
        if not filtered_countries:
            lang_country = None
            subtags = get_global('likely_subtags').get(lang)
            if subtags:
                country_code = subtags.split('_')[-1]
                if len(country_code) == 2:
                    lang_country = "{lang}-{country}".format(
                        lang=lang, country=country_code)

            if lang_country:
                filtered_countries[lang_country] = _copy_lang_data(lang)
            else:
                filtered_countries[lang] = _copy_lang_data(lang)

        filtered_languages_with_countries.update(filtered_countries)

    return filtered_languages_with_countries