Beispiel #1
0
 def _transliterate(self):
     registry.register(HebrewLanguagePack, force=True)
     registry.register(GreekLanguagePack, force=True)
     if self.lang_id.code == 'el_GR':
         self.transliteration = translit(self.name, 'el', reversed=True)
     else:
         self.transliteration = translit(self.name, 'he', reversed=True)
    def test_33_register_unregister(self):
        """
        Testing register/unregister.
        """
        from transliterate.contrib.languages.hy.translit_language_pack import (
            ArmenianLanguagePack)

        class A(TranslitLanguagePack):
            language_code = "ru"
            language_name = "Example"
            mapping = data.test_33_register_unregister_mapping

        # Since key `ru` already exists in the registry it can't be replaced
        # (without force-register).
        res = registry.register(A)
        self.assertTrue(not res)

        # Now with force-register it can.
        res = registry.register(A, force=True)
        self.assertTrue(res)

        # Once we have it there and it's forced, we can't register another.
        res = registry.register(A, force=True)
        self.assertTrue(not res)

        # Unregister non-forced language pack.
        res = registry.unregister(ArmenianLanguagePack)
        self.assertTrue(res and not ArmenianLanguagePack.language_code
                        in get_available_language_codes())

        res = registry.unregister(A)
        self.assertTrue(not res
                        and A.language_code in get_available_language_codes())
Beispiel #3
0
    def test_33_register_unregister(self):
        """
        Testing register/unregister.
        """
        from transliterate.contrib.languages.hy.translit_language_pack import ArmenianLanguagePack

        class A(TranslitLanguagePack):
            language_code = "ru"
            language_name = "Example"
            mapping = (
                u"abcdefghij",
                u"1234567890",
            )
        # Since key `ru` already exists in the registry it can't be replaced (without force-register).
        res = registry.register(A)
        self.assertTrue(not res)

        # Now with force-register it can.
        res = registry.register(A, force=True)
        self.assertTrue(res)

        # Once we have it there and it's forced, we can't register another.
        res = registry.register(A, force=True)
        self.assertTrue(not res)

        # Unregister non-forced language pack.
        res = registry.unregister(ArmenianLanguagePack)
        self.assertTrue(res and not ArmenianLanguagePack.language_code in get_available_language_codes())

        res = registry.unregister(A)
        self.assertTrue(not res and A.language_code in get_available_language_codes())
Beispiel #4
0
 def _transliterating(self):
     if self.lang_id.name == 'Greek':
         registry.register(GreekLanguagePack, force=True)
         transliteration = translit(self.content, 'el', reversed=True)
     elif self.lang_id.name == 'Hebrew':
         registry.register(HebrewLanguagePack, force=True)
         transliteration = translit(self.content, 'he', reversed=True)
     return transliteration
Beispiel #5
0
def content_file_name(instance, filename):
    autodiscover()
    registry.register(KazakhLanguagePack)

    ext = filename.split('.')[-1]

    filename = "%s_%s_%s.%s" % (
        str(instance.pk), translit(instance.last_name, 'kz', reversed=True),
        translit(instance.first_name, 'kz', reversed=True), ext)

    filename = filename.lower()

    return os.path.join('portraits', filename)
Beispiel #6
0
    def test_15_register_custom_language_pack(self):
        """Test registering of a custom language pack."""
        class ExampleLanguagePack(TranslitLanguagePack):
            """Example language pack."""

            language_code = "example"
            language_name = "Example"
            mapping = data.test_15_register_custom_language_pack_mapping

        registry.register(ExampleLanguagePack)

        assert 'example' in get_available_language_codes()
        res = translit(self.latin_text, 'example')
        self.assertEqual(res, 'Lor5m 9psum 4olor s9t 1m5t')
        return res
    def test_15_register_custom_language_pack(self):
        """
        Testing registering of a custom language pack.
        """
        class ExampleLanguagePack(TranslitLanguagePack):
            """
            Example language pack.
            """
            language_code = "example"
            language_name = "Example"
            mapping = data.test_15_register_custom_language_pack_mapping

        registry.register(ExampleLanguagePack)

        assert 'example' in get_available_language_codes()
        res = translit(self.latin_text, 'example')
        self.assertEqual(res, 'Lor5m 9psum 4olor s9t 1m5t')
        return res
Beispiel #8
0
    async def cyrillify(self, ctx, *text):
        class ExampleLanguagePack(TranslitLanguagePack):
            language_code = "custom"
            language_name = "Custom"
            mapping = (
                "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwYyZz",
                "АаБбКкДдЕеФфГгХхИиЙйКкЛлМмНнОоПпКкРрСсТтУуВвУуЙйЗз",
            )
            pre_processor_mapping = {
                "scht": "щ",
                "sht": "щ",
                "sh": "ш",
                "tsch": "ч",
                "tch": "ч",
                "sch": "ш",
                "zh": "ж",
                "tz": "ц",
                "ch": "ч",
                "yu": "ю",
                "ya": "я",
                "x": "кс",
                "ck": "к",
                "ph": "ф",
            }
            chars = list(pre_processor_mapping.keys())
            for lat in chars:
                cyr = pre_processor_mapping[lat]
                pre_processor_mapping[lat.capitalize()] = cyr.capitalize()

        registry.register(ExampleLanguagePack)

        if not text:
            text = "Lorem ipsum dolor sit amet."
        else:
            text = " ".join(text)

        await ctx.send(translit(text, "custom"))
    def __test_34_latin_to_latin(self):
        class LatinToLatinLanguagePack(TranslitLanguagePack):
            """
            Custom language pack which gets rid of accented characters in Greek but leaves other characters intact.
            """
            language_code = "l2l"
            language_name = "Latin to Latin"
            mapping = data.test_34_latin_to_latin_mapping
            characters = data.test_34_latin_to_latin_characters
            reversed_characters = data.test_34_latin_to_latin_reversed_characters

        res = registry.register(LatinToLatinLanguagePack)
        self.assertTrue(res)

        text = data.test_34_latin_to_latin_text
        pack = LatinToLatinLanguagePack()
        res = pack.translit(text, strict=True, fail_silently=False)
    def __test_34_latin_to_latin(self):

        class LatinToLatinLanguagePack(TranslitLanguagePack):
            """
            Custom language pack which gets rid of accented characters in Greek but leaves other characters intact.
            """
            language_code = "l2l"
            language_name = "Latin to Latin"
            mapping = data.test_34_latin_to_latin_mapping
            characters = data.test_34_latin_to_latin_characters
            reversed_characters = data.test_34_latin_to_latin_reversed_characters

        res = registry.register(LatinToLatinLanguagePack)
        self.assertTrue(res)

        text = data.test_34_latin_to_latin_text
        pack = LatinToLatinLanguagePack()
        res = pack.translit(text, strict=True, fail_silently=False)
:example:

>>> from foo.greekunaccented import *
>>> from transliterate import translit
>>> print translit(u'άέήίϊΐόύϋΰώΆΈΉΊΪΌΎΫΏ', 'el2')
αεηιιιουυυωΑΕΗΙΙΟΥΥΩ
"""

from transliterate import translit
from transliterate.discover import autodiscover
from transliterate.base import TranslitLanguagePack, registry

# First autodicover bundled language packs.
autodiscover()


class GreekUnaccentedLanguagePack(TranslitLanguagePack):
    """
    Custom language pack which gets rid of accented characters in Greek but leaves other characters intact.
    """
    language_code = "el2"
    language_name = "Greek without accented characters"
    mapping = (
        u"άέήίϊΐόύϋΰώΆΈΉΊΪΌΎΫΏ",
        u"αεηιιιουυυωΑΕΗΙΙΟΥΥΩ",
    )


# Register
registry.register(GreekUnaccentedLanguagePack)
Beispiel #12
0
# -*- coding: utf-8 -*-

__title__ = 'transliterate.contrib.languages.uk.translit_language_pack'
__author__ = 'Timofey Pchelintsev'
__copyright__ = '2014-2015 Timofey Pchelintsev'
__license__ = 'GPL 2.0/LGPL 2.1'
__all__ = ('UkrainianLanguagePack', )

from transliterate.base import TranslitLanguagePack, registry
from transliterate.contrib.languages.uk import data


class UkrainianLanguagePack(TranslitLanguagePack):
    """
    Language pack for Ukrainian language.
    See `http://en.wikipedia.org/wiki/Ukrainian_alphabet` for details.
    """
    language_code = "uk"
    language_name = "Ukrainian"
    character_ranges = ((0x0400, 0x04FF), (0x0500, 0x052F))
    mapping = data.mapping
    reversed_specific_mapping = data.reversed_specific_mapping
    pre_processor_mapping = data.pre_processor_mapping


registry.register(UkrainianLanguagePack)
Beispiel #13
0
widgets = [Percentage(), Bar('>'), ' ', ETA(), ' ']
pbar = ProgressBar(widgets=widgets, maxval=10000000)

autodiscover()


class ReverseInverseRussianLanguagePack(TranslitLanguagePack):
    language_code = "ru_inv_en"
    language_name = "ru_inv_en"
    mapping = (
        u"йцукенгшщзхъфывапролджэёячсмитьбю",
        u"qwertyuiop[]asdfghjkl;'\zxcvbnm,.",
    )


registry.register(ReverseInverseRussianLanguagePack)


def _reporthook(numblocks, blocksize, filesize, url=None):
    base = os.path.basename(url)
    try:
        percent = min((numblocks * blocksize * 100) / filesize, 100)
    except:
        percent = 100
    if numblocks != 0:
        sys.stdout.write("\b" * 70)
    sys.stdout.write("%-66s%3d%%" % (base, percent))


def downloaddictionaries(dictionary_strings):
    url = dictionary_urls[dictionary_strings]
# -*- coding: utf-8 -*-

from transliterate.base import TranslitLanguagePack, registry
from transliterate.contrib.languages.el import data

__title__ = 'transliterate.contrib.languages.el.translit_language_pack'
__author__ = 'Artur Barseghyan'
__copyright__ = '2013-2018 Artur Barseghyan'
__license__ = 'GPL-2.0-only OR LGPL-2.1-or-later'
__all__ = ('GreekLanguagePack',)


class GreekLanguagePack(TranslitLanguagePack):
    """Language pack for Greek language.

    See `http://en.wikipedia.org/wiki/Greek_alphabet` and
    `https://en.wikipedia.org/wiki/Romanization_of_Greek#Modern_Greek`
    for details.
    """
    language_code = "el"
    language_name = "Greek"
    character_ranges = ((0x0370, 0x03FF), (0x1F00, 0x1FFF))
    mapping = data.mapping
    reversed_specific_mapping = data.reversed_specific_mapping
    pre_processor_mapping = data.pre_processor_mapping
    detectable = True


registry.register(GreekLanguagePack)
    )

    # TODO
    # reversed_specific_mapping = (
    # )

    # TODO
    # reversed_specific_pre_processor_mapping = {
    # }

    pre_processor_mapping = {
        u"ч": u"ch",
        u"щ": u"şh",
        u"ю": u"iu",
        u"я": u"ia",
        u"Ч": u"Ch",
        u"Щ": u"Şh",
        u"Ю": u"İu",
        u"Я": u"İa",
    }


registry.register(KazakhLanguagePack)

translit_kk = get_translit_function("kk")


def translit(value: str) -> str:
    """Transliterate the text."""
    return translit_kk(value)
# -*- coding: utf-8 -*-

from transliterate.base import TranslitLanguagePack, registry
from transliterate.contrib.languages.sr import data

__title__ = 'transliterate.contrib.languages.sr.translit_language_pack'
__author__ = 'Saša Kelečević'
__copyright__ = '2017 Saša Kelečević'
__license__ = 'GPL-2.0-only OR LGPL-2.1-or-later'
__all__ = ('SerbianLanguagePack',)


class SerbianLanguagePack(TranslitLanguagePack):
    """Language pack for Serbian language.

    See https://en.wikipedia.org/wiki/Romanization_of_Serbian for details.
    """
    language_code = "sr"
    language_name = "Serbian"
    character_ranges = ((0x0408, 0x04F0), (0x0000, 0x017F))
    mapping = data.mapping
    reversed_specific_mapping = data.reversed_specific_mapping
    pre_processor_mapping = data.pre_processor_mapping
    detectable = False


registry.register(SerbianLanguagePack)
Beispiel #17
0
        u"ю": u"yu",
        u"я": u"ya",
        u"Е": u"Ye",
        u"Ё": u"Yo",
        u"Ж": u"ZH",
        u"Х": u"Kh",
        u"Ц": u"Ts",
        u"Ч": u"Ch",
        u"Ш": u"Sh",
        u"Щ": u"Shch",
        u"Ю": u"Yu",
        u"Я": u"Ya",
    }


registry.register(LanguagePack)


def get_cities(page, count):
    if page == 1:
        page = 0
    else:
        page = (page - 1) * count

    results = []
    file = open('RU.txt', 'r', encoding='utf-8')
    lines = file.readlines()
    for i in range(page, page + count):
        try:
            city = main_dictionary(lines[i])
            results.append(city)
Beispiel #18
0
# -*- coding: utf-8 -*-

__title__ = 'transliterate.contrib.languages.mk.translit_language_pack'
__author__ = 'Igor Stamatovski'
__copyright__ = '2016 Igor Stamatovski'
__license__ = 'GPL 2.0/LGPL 2.1'
__all__ = ('MacedonianLanguagePack', )

from transliterate.base import TranslitLanguagePack, registry
from transliterate.contrib.languages.mk import data


class MacedonianLanguagePack(TranslitLanguagePack):
    """
    Language pack for Macedonian language.
    See http://en.wikipedia.org/wiki/Romanization_of_Macedonian for details.
    """
    language_code = "mk"
    language_name = "Macedonian"
    character_ranges = ((0x0400, 0x04FF), (0x0500, 0x052F))
    mapping = data.mapping
    reversed_specific_mapping = data.reversed_specific_mapping
    pre_processor_mapping = data.pre_processor_mapping
    detectable = False


registry.register(MacedonianLanguagePack)
Beispiel #19
0
        u'Є': 'E',
        u'Ж': 'J',
        u'З': 'Z',
        u'И': 'I',
        u'І': 'I',
        u'Ї': 'Yi',
        u'Й': 'Y',
        u'К': 'K',
        u'Л': 'L',
        u'М': 'M',
        u'Н': 'N',
        u'О': 'O',
        u'П': 'P',
        u'Р': 'R',
        u'С': 'S',
        u'Т': 'T',
        u'У': 'U',
        u'Ф': 'F',
        u'Х': 'H',
        u'Ц': 'C',
        u'Ч': 'Ch',
        u'Ш': 'Sh',
        u'Щ': "Sh'",
        u'Ь': "'",
        u'Ю': 'Yu',
        u'Я': 'Ya',
    }


registry.register(UkrainskiyLanguagePack, force=True)
Beispiel #20
0
print '\nRegistering custom language pack\n---------------------------------------'
from transliterate.base import TranslitLanguagePack, registry

class ExampleLanguagePack(TranslitLanguagePack):
    """
    Example language pack.
    """
    language_code = "example"
    language_name = "Example"
    mapping = (
        u"abcdefghij",
        u"1234567890",
    )

registry.register(ExampleLanguagePack)

print '\nList of available (registered) languages after registering a new language pack \n---------------------------------------'
print get_available_language_codes()

print '\nTransliteration to Example\n---------------------------------------'
print translit(text, 'example')

print '\nTransliterated lorem ipsum generator \n---------------------------------------'
from transliterate.contrib.apps.translipsum import TranslipsumGenerator
g_am = TranslipsumGenerator(language_code='hy')

print 'Generating paragraphs'
print g_am.generate_paragraph()

g_ru = TranslipsumGenerator(language_code='ru')
# -*- coding: utf-8 -*-
"""
Latin to Latin example.
"""

from transliterate import translit
from transliterate.discover import autodiscover
from transliterate.base import TranslitLanguagePack, registry

# First autodicover bundled language packs.
autodiscover()

class LatinToLatinLanguagePack(TranslitLanguagePack):
    """
    Custom language pack which gets rid of accented characters in Greek but leaves other characters intact.
    """
    language_code = "l2l"
    language_name = "Latin to Latin"
    mapping = (
        u"abgdezilxkhmjnpsvtrcqw&ofABGDEZILXKHMJNPSVTRCQOFW",
        u"zbgdeailxkhnjmpswtrcqv&ofZBGDEAILXKHNJMPSWTRCQOFV",
    )
    characters = u"abgdezilxkhmjnpsvtrcqw&ofABGDEZILXKHMJNPSVTRCQOFW"
    reversed_characters = u"abgdezilxkhmjnpsvtrcqw&ofABGDEZILXKHMJNPSVTRCQOFW"


# Register
registry.register(LatinToLatinLanguagePack)
# -*- coding: utf-8 -*-

__title__ = 'transliterate.contrib.languages.mk.translit_language_pack'
__author__ = 'Igor Stamatovski'
__copyright__ = '2016 Igor Stamatovski'
__license__ = 'GPL 2.0/LGPL 2.1'
__all__ = ('MacedonianLanguagePack',)

from transliterate.base import TranslitLanguagePack, registry
from transliterate.contrib.languages.mk import data

class MacedonianLanguagePack(TranslitLanguagePack):
    """
    Language pack for Macedonian language.
    See http://en.wikipedia.org/wiki/Romanization_of_Macedonian for details.
    """
    language_code = "mk"
    language_name = "Macedonian"
    character_ranges = ((0x0400, 0x04FF), (0x0500, 0x052F))
    mapping = data.mapping
    reversed_specific_mapping = data.reversed_specific_mapping
    pre_processor_mapping = data.pre_processor_mapping
    detectable = False


registry.register(MacedonianLanguagePack)
        u"Shch": u"Щ",
        # u"Iu": u"Ю",
        # u"Ia": u"Я",
        u"Yu": u"Ю",
        u"Ya": u"Я",
    }

    reversed_specific_pre_processor_mapping = {
        u"ъ": u"",
        u"ь": u"",
        u"Ъ": u"",
        u"Ь": u""
    }


registry.register(Gost2006RuLangPack, force=True)

LANG_PADDING = max([len(lang) for lang in get_available_language_codes()])


@lru_cache
def transliterate(text, lang='ru-gost', reversed=True):
    trans = translit(text, lang, reversed)
    print(f"translit[{lang:{LANG_PADDING}}]: {text} => {trans}")

    if trans.isascii():
        return trans
    else:
        for i in trans:
            if not i.isascii():
                print(i, i.isascii())
    See `http://en.wikipedia.org/wiki/Georgian_alphabet for details.
    """
    language_code = "ka"
    language_name = "Georgian"
    character_ranges = ((0x10A0, 0x10C5), (0x10D0, 0x10FC), (0x2D00, 0x2D25))
    mapping = data.mapping
    pre_processor_mapping = data.pre_processor_mapping
    detectable = True

    def translit(self,
                 value,
                 reversed=False,
                 strict=False,
                 fail_silently=True):

        # Georgian language knows no capitals. Therefore, we convert
        # everything to lowercase.
        value = value.lower()

        # Continue the standard way
        return super(GeorgianLanguagePack, self).translit(
            value=value,
            reversed=reversed,
            strict=strict,
            fail_silently=fail_silently
        )


registry.register(GeorgianLanguagePack)
from transliterate.base import TranslitLanguagePack, registry
from transliterate.contrib.languages.l1 import data

__title__ = 'transliterate.contrib.languages.l1.translit_language_pack'
__author__ = 'Marco Pattaro'
__copyright__ = '2016-2017 Marco Pattaro'
__license__ = 'GPL-2.0-only OR LGPL-2.1-or-later'
__all__ = ('Latin1SupplementLanguagePack',)


class Latin1SupplementLanguagePack(TranslitLanguagePack):
    """Language pack for Latin1 Supplement.

    Though not exactly a language, it's a set of commonly found unicode
    characters. See
    `http://en.wikipedia.org/wiki/Latin-1_Supplement_%28Unicode_block%29` for
    details.
    """
    language_code = "l1"
    language_name = "Latin1 Supplement"
    character_ranges = ((0x00C0, 0x00D6), (0x00D8, 0x00F6), (0x00F8, 0x00FF))
    mapping = data.mapping
    reversed_specific_mapping = data.reversed_specific_mapping
    reversed_specific_pre_processor_mapping = \
        data.reversed_specific_pre_processor_mapping
    detectable = True


registry.register(Latin1SupplementLanguagePack)
Beispiel #26
0
from transliterate.base import TranslitLanguagePack, registry


class ExampleLanguagePack(TranslitLanguagePack):
    """
    Example language pack.
    """
    language_code = "example"
    language_name = "Example"
    mapping = (
        u"abcdefghij",
        u"1234567890",
    )


registry.register(ExampleLanguagePack)

print '\nList of available (registered) languages after registering a new language pack \n---------------------------------------'
print get_available_language_codes()

print '\nTransliteration to Example\n---------------------------------------'
print translit(text, 'example')

print '\nTransliterated lorem ipsum generator \n---------------------------------------'
from transliterate.contrib.apps.translipsum import TranslipsumGenerator
g_am = TranslipsumGenerator(language_code='hy')

print 'Generating paragraphs'
print g_am.generate_paragraph()

g_ru = TranslipsumGenerator(language_code='ru')
Beispiel #27
0
        u'ф': u'f',
        u'х': u'h',
        u'ц': u'ts',
        u'ч': u'ch',
        u'ш': u'sh',
        u'щ': u'sch',
        u'ъ': u'',
        u'ы': u'y',
        u'ь': u'',
        u'э': u'e',
        u'ю': u'yu',
        u'я': u'ya',
    }


registry.register(ExampleLanguagePack, force=True)

print(get_available_language_codes())

# ['el', 'hy', 'ka', 'ru', 'example']
text = '40 лет Октября'


def transliterate(text):
    trans = translit(text, 'example')
    print(trans)
    return trans


if __name__ == "__main__":
    transliterate('Коммунистическая')
from transliterate import get_available_language_codes, translit, slugify
from transliterate.base import TranslitLanguagePack, registry
from transliterate.contrib.languages.ru.translit_language_pack import RussianLanguagePack
from functools import lru_cache

registry.register(RussianLanguagePack)


class ExampleLanguagePack(TranslitLanguagePack):
    language_code = "example"
    language_name = "Example"
    mapping = {u"", u""}

    pre_processor_mapping = {
        u'А': u'A',
        u'Б': u'B',
        u'В': u'V',
        u'Г': u'G',
        u'Д': u'D',
        u'Е': u'E',
        u'Ё': u'E',
        u'Ж': u'Zh',
        u'З': u'Z',
        u'И': u'I',
        u'Й': u'Y',
        u'К': u'K',
        u'Л': u'L',
        u'М': u'M',
        u'Н': u'N',
        u'О': u'O',
        u'П': u'P',
# -*- coding: utf-8 -*-

__title__ = 'transliterate.contrib.languages.hy.translit_language_pack'
__author__ = 'Artur Barseghyan'
__copyright__ = 'Copyright (c) 2013 Artur Barseghyan'
__license__ = 'GPL 2.0/LGPL 2.1'
__all__ = ('ArmenianLanguagePack', )

from transliterate.base import TranslitLanguagePack, registry
from transliterate.contrib.languages.hy import data


class ArmenianLanguagePack(TranslitLanguagePack):
    """
    Language pack for Armenian language. See https://en.wikipedia.org/wiki/Armenian_alphabet for details.
    """
    language_code = "hy"
    language_name = "Armenian"
    character_ranges = ((0x0530, 0x058F), (0xFB10, 0xFB1F))
    mapping = data.mapping
    reversed_specific_mapping = data.reversed_specific_mapping
    reversed_specific_pre_processor_mapping = data.reversed_specific_pre_processor_mapping
    pre_processor_mapping = data.pre_processor_mapping
    detectable = True


registry.register(ArmenianLanguagePack)
Beispiel #30
0
widgets = [Percentage(), Bar('>'), ' ', ETA(), ' ']
pbar = ProgressBar(widgets=widgets, maxval=10000000)


autodiscover()


class ReverseInverseRussianLanguagePack(TranslitLanguagePack):
    language_code = "ru_inv_en"
    language_name = "ru_inv_en"
    mapping = (
        u"йцукенгшщзхъфывапролджэёячсмитьбю",
        u"qwertyuiop[]asdfghjkl;'\zxcvbnm,.",
    )

registry.register(ReverseInverseRussianLanguagePack)



def _reporthook(numblocks, blocksize, filesize, url=None):
    base = os.path.basename(url)
    try:
        percent = min((numblocks * blocksize * 100) / filesize, 100)
    except:
        percent = 100
    if numblocks != 0:
        sys.stdout.write("\b" * 70)
    sys.stdout.write("%-66s%3d%%" % (base, percent))


def downloaddictionaries(dictionary_strings):
# -*- coding: utf-8 -*-

__title__ = 'transliterate.contrib.languages.ru.translit_language_pack'
__author__ = 'Artur Barseghyan'
__copyright__ = '2013-2015 Artur Barseghyan'
__license__ = 'GPL 2.0/LGPL 2.1'
__all__ = ('RussianLanguagePack',)

from transliterate.base import TranslitLanguagePack, registry
from transliterate.contrib.languages.ru import data

class RussianLanguagePack(TranslitLanguagePack):
    """
    Language pack for Russian language. See
    `http://en.wikipedia.org/wiki/Russian_alphabet` for details.
    """
    language_code = "ru"
    language_name = "Russian"
    character_ranges = ((0x0400, 0x04FF), (0x0500, 0x052F))
    mapping = data.mapping
    reversed_specific_mapping = data.reversed_specific_mapping
    pre_processor_mapping = data.pre_processor_mapping
    detectable = True


registry.register(RussianLanguagePack)
Beispiel #32
0
from typing import Iterator, TYPE_CHECKING

from algoliasearch_django import raw_search
from django.db import models
from django.utils.translation import ugettext_lazy as _
from transliterate import translit
from transliterate.base import registry

from cinemanio.api.helpers import global_id
from cinemanio.core.translit.ru import RussianLanguagePack

if TYPE_CHECKING:
    from django.db.models import QuerySet  # noqa
    from typing import Iterable, List  # noqa

registry.register(RussianLanguagePack)


class BaseModel(models.Model):
    """
    Base model for Movie and Person
    """
    # TODO: remove field
    slug = models.SlugField(_('Slug'),
                            max_length=100,
                            unique=True,
                            null=True,
                            blank=True)

    site_official_url = models.URLField(_('Official site'),
                                        null=True,
Beispiel #33
0
# -*- coding: utf-8 -*-

__title__ = 'transliterate.contrib.languages.bg.translit_language_pack'
__author__ = 'Petar Chakarov'
__copyright__ = 'Copyright (c) 2014 Petar Chakarov'
__license__ = 'GPL 2.0/LGPL 2.1'
__all__ = ('BulgarianLanguagePack', )

from transliterate.base import TranslitLanguagePack, registry
from transliterate.contrib.languages.bg import data


class BulgarianLanguagePack(TranslitLanguagePack):
    """
    Language pack for Bulgarian language.
    See http://en.wikipedia.org/wiki/Romanization_of_Bulgarian for details.
    """
    language_code = "bg"
    language_name = "Bulgarian"
    character_ranges = ((0x0400, 0x04FF), (0x0500, 0x052F))
    mapping = data.mapping
    reversed_specific_mapping = data.reversed_specific_mapping
    pre_processor_mapping = data.pre_processor_mapping
    detectable = False


registry.register(BulgarianLanguagePack)
Beispiel #34
0
# -*- coding: utf-8 -*-

from transliterate.base import TranslitLanguagePack, registry
from transliterate.contrib.languages.mn import data

__title__ = 'transliterate.contrib.languages.mn.translit_language_pack'
__author__ = 'Enkhbold Bataa'
__copyright__ = '2016 Enkhbold Bataa'
__license__ = 'GPL 2.0/LGPL 2.1'
__all__ = ('MongolianLanguagePack',)


class MongolianLanguagePack(TranslitLanguagePack):
    """Language pack for Mongolian language.

    See `https://en.wikipedia.org/wiki/Mongolian_Cyrillic_alphabet` for
    details.
    """
    language_code = "mn"
    language_name = "Mongolian"
    character_ranges = ((0x0400, 0x04FF), (0x0500, 0x052F))
    mapping = data.mapping
    reversed_specific_mapping = data.reversed_specific_mapping
    pre_processor_mapping = data.pre_processor_mapping
    detectable = False


registry.register(MongolianLanguagePack)
# -*- coding: utf-8 -*-
from transliterate.base import TranslitLanguagePack, registry
from transliterate.contrib.languages.uk import data


__title__ = 'transliterate.contrib.languages.uk.translit_language_pack'
__author__ = 'Timofey Pchelintsev'
__copyright__ = '2014-2015 Timofey Pchelintsev'
__license__ = 'GPL-2.0-only OR LGPL-2.1-or-later'
__all__ = ('UkrainianLanguagePack',)


class UkrainianLanguagePack(TranslitLanguagePack):
    """Language pack for Ukrainian language.

    See `http://en.wikipedia.org/wiki/Ukrainian_alphabet` for details.
    """
    language_code = "uk"
    language_name = "Ukrainian"
    character_ranges = ((0x0400, 0x04FF), (0x0500, 0x052F))
    mapping = data.mapping
    reversed_specific_mapping = data.reversed_specific_mapping
    pre_processor_mapping = data.pre_processor_mapping


registry.register(UkrainianLanguagePack)
Beispiel #36
0
        u"ч": u"ch",
        u"Ш": u"Sh",
        u"ш": u"sh",
        u"Щ": u"Shch",
        u"щ": u"shch",
        u"Ю": u"Yu",
        u"ю": u"iu",
        u"Я": u"Ya",
        u"я": u"ia",
        u"ь": u"",
        u"ъ": u"",
        u"\'": u"",
    }


registry.register(UkrTranslitLanguagePack)


def transliterateField(request):

    fieldData = request.GET.get('fieldData')
    # print(translit(fieldData, 'en'))
    print(get_available_language_codes())
    print(translit(fieldData, 'ukrTranslit'))
    fieldData = fieldData.lower().title()
    data = {
        'transliteration': translit(fieldData, 'ukrTranslit'),
    }
    return JsonResponse(data)

Beispiel #37
0
def translit2ru(s):
    registry.register(ImprovedRussianLanguagePack)
    return translit(s.replace(u"zhy",u"жи").replace(u"Zhy",u"Жи"), 'ru_new')
# -*- coding: utf-8 -*-
"""
Latin to Latin example.
"""

from transliterate import translit
from transliterate.discover import autodiscover
from transliterate.base import TranslitLanguagePack, registry

# First autodicover bundled language packs.
autodiscover()


class LatinToLatinLanguagePack(TranslitLanguagePack):
    """
    Custom language pack which gets rid of accented characters in Greek but leaves other characters intact.
    """
    language_code = "l2l"
    language_name = "Latin to Latin"
    mapping = (
        u"abgdezilxkhmjnpsvtrcqw&ofABGDEZILXKHMJNPSVTRCQOFW",
        u"zbgdeailxkhnjmpswtrcqv&ofZBGDEAILXKHNJMPSWTRCQOFV",
    )
    characters = u"abgdezilxkhmjnpsvtrcqw&ofABGDEZILXKHMJNPSVTRCQOFW"
    reversed_characters = u"abgdezilxkhmjnpsvtrcqw&ofABGDEZILXKHMJNPSVTRCQOFW"


# Register
registry.register(LatinToLatinLanguagePack)
Beispiel #39
0
        u"dz": u"ձ",
        u"gh": u"ղ",
        u"tch": u"ճ",
        u"sh": u"շ",
        u"vo": u"ո",
        u"ch": u"չ",
        u"dj": u"ջ",
        u"ph": u"փ",
        u"u": u"ու",

        # uppercase
        u"E'": u"Է",
        u"Y": u"Ը",
        u"Th": u"Թ",
        u"Jh": u"Ժ",
        u"Ts": u"Ծ",
        u"Dz": u"Ձ",
        u"Gh": u"Ղ",
        u"Tch": u"Ճ",
        u"Sh": u"Շ",
        u"Vo": u"Ո",
        u"Ch": u"Չ",
        u"Dj": u"Ջ",
        u"Ph": u"Փ",
        u"U": u"Ու"
    }
    detectable = True


registry.register(ArmenianLanguagePack)
    """Language pack for Georgian language.

    See `http://en.wikipedia.org/wiki/Georgian_alphabet for details.
    """
    language_code = "ka"
    language_name = "Georgian"
    character_ranges = ((0x10A0, 0x10C5), (0x10D0, 0x10FC), (0x2D00, 0x2D25))
    mapping = data.mapping
    pre_processor_mapping = data.pre_processor_mapping
    detectable = True

    def translit(self,
                 value,
                 reversed=False,
                 strict=False,
                 fail_silently=True):

        # Georgian language knows no capitals. Therefore, we convert
        # everything to lowercase.
        value = value.lower()

        # Continue the standard way
        return super(GeorgianLanguagePack,
                     self).translit(value=value,
                                    reversed=reversed,
                                    strict=strict,
                                    fail_silently=fail_silently)


registry.register(GeorgianLanguagePack)
# -*- coding: utf-8 -*-

from transliterate.base import TranslitLanguagePack, registry
from transliterate.contrib.languages.el import data

__title__ = 'transliterate.contrib.languages.el.translit_language_pack'
__author__ = 'Artur Barseghyan'
__copyright__ = '2013-2018 Artur Barseghyan'
__license__ = 'GPL-2.0-only OR LGPL-2.1-or-later'
__all__ = ('GreekLanguagePack', )


class GreekLanguagePack(TranslitLanguagePack):
    """Language pack for Greek language.

    See `http://en.wikipedia.org/wiki/Greek_alphabet` and
    `https://en.wikipedia.org/wiki/Romanization_of_Greek#Modern_Greek`
    for details.
    """
    language_code = "el"
    language_name = "Greek"
    character_ranges = ((0x0370, 0x03FF), (0x1F00, 0x1FFF))
    mapping = data.mapping
    reversed_specific_mapping = data.reversed_specific_mapping
    pre_processor_mapping = data.pre_processor_mapping
    detectable = True


registry.register(GreekLanguagePack)
# -*- coding: utf-8 -*-

from transliterate.base import TranslitLanguagePack, registry
from transliterate.contrib.languages.mn import data

__title__ = 'transliterate.contrib.languages.mn.translit_language_pack'
__author__ = 'Enkhbold Bataa'
__copyright__ = '2016 Enkhbold Bataa'
__license__ = 'GPL-2.0-only OR LGPL-2.1-or-later'
__all__ = ('MongolianLanguagePack',)


class MongolianLanguagePack(TranslitLanguagePack):
    """Language pack for Mongolian language.

    See `https://en.wikipedia.org/wiki/Mongolian_Cyrillic_alphabet` for
    details.
    """
    language_code = "mn"
    language_name = "Mongolian"
    character_ranges = ((0x0400, 0x04FF), (0x0500, 0x052F))
    mapping = data.mapping
    reversed_specific_mapping = data.reversed_specific_mapping
    pre_processor_mapping = data.pre_processor_mapping
    detectable = False


registry.register(MongolianLanguagePack)
Beispiel #43
0
# -*- coding: utf-8 -*-

from transliterate.base import TranslitLanguagePack, registry
from transliterate.contrib.languages.sr import data

__title__ = 'transliterate.contrib.languages.sr.translit_language_pack'
__author__ = 'Saša Kelečević'
__copyright__ = '2017 Saša Kelečević'
__license__ = 'GPL 2.0/LGPL 2.1'
__all__ = ('SerbianLanguagePack',)


class SerbianLanguagePack(TranslitLanguagePack):
    """Language pack for Serbian language.

    See https://en.wikipedia.org/wiki/Romanization_of_Serbian for details.
    """
    language_code = "sr"
    language_name = "Serbian"
    character_ranges = ((0x0408, 0x04F0), (0x0000, 0x017F))
    mapping = data.mapping
    reversed_specific_mapping = data.reversed_specific_mapping
    pre_processor_mapping = data.pre_processor_mapping
    detectable = False


registry.register(SerbianLanguagePack)
# -*- coding: utf-8 -*-

__title__ = 'transliterate.contrib.languages.bg.translit_language_pack'
__author__ = 'Petar Chakarov'
__copyright__ = 'Copyright (c) 2014 Petar Chakarov'
__license__ = 'GPL 2.0/LGPL 2.1'
__all__ = ('BulgarianLanguagePack',)

from transliterate.base import TranslitLanguagePack, registry
from transliterate.contrib.languages.bg import data

class BulgarianLanguagePack(TranslitLanguagePack):
    """
    Language pack for Bulgarian language.
    See http://en.wikipedia.org/wiki/Romanization_of_Bulgarian for details.
    """
    language_code = "bg"
    language_name = "Bulgarian"
    character_ranges = ((0x0400, 0x04FF), (0x0500, 0x052F))
    mapping = data.mapping
    reversed_specific_mapping = data.reversed_specific_mapping
    pre_processor_mapping = data.pre_processor_mapping
    detectable = False


registry.register(BulgarianLanguagePack)
get rid of accented characters in Greek but leave other characters intact.

:example:

>>> from foo.greekunaccented import *
>>> from transliterate import translit
>>> print translit(u'άέήίϊΐόύϋΰώΆΈΉΊΪΌΎΫΏ', 'el2')
αεηιιιουυυωΑΕΗΙΙΟΥΥΩ
"""

from transliterate import translit
from transliterate.discover import autodiscover
from transliterate.base import TranslitLanguagePack, registry

# First autodicover bundled language packs.
autodiscover()

class GreekUnaccentedLanguagePack(TranslitLanguagePack):
    """
    Custom language pack which gets rid of accented characters in Greek but leaves other characters intact.
    """
    language_code = "el2"
    language_name = "Greek without accented characters"
    mapping = (
        u"άέήίϊΐόύϋΰώΆΈΉΊΪΌΎΫΏ",
        u"αεηιιιουυυωΑΕΗΙΙΟΥΥΩ",
    )

# Register
registry.register(GreekUnaccentedLanguagePack)
Beispiel #46
0
from transliterate.base import TranslitLanguagePack, registry
from transliterate.discover import autodiscover
from transliterate import translit

autodiscover()


class CzechLanguagePack(TranslitLanguagePack):
    language_code = 'cs'
    language_name = 'Czech'
    pre_processor_mapping = {u'ch': u'ch'}
    mapping = (u'aábcčdďeéěfghiíjklmnňoópqrřsštťuúůvwxyýzž',
               u'aabccddeeefghiijklmnnoopqrrssttuuuvwxyyzz')


registry.register(CzechLanguagePack)


class Translit:
    def __init__(self):
        pass

    @staticmethod
    def translit(string):
        # Try CZ
        string = translit(string, 'cs')

        # Normalize for RU chars
        string = translit(string, 'ru', reversed=True)

        # Remove spaces and punctuation