def _transliterate(self): registry.register(HebrewLanguagePack, force=True) registry.register(GreekLanguagePack, force=True) if self.lang_id.code == 'el_GR': self.transliteration = translit(self.name, 'el', reversed=True) else: self.transliteration = translit(self.name, 'he', reversed=True)
def test_33_register_unregister(self): """ Testing register/unregister. """ from transliterate.contrib.languages.hy.translit_language_pack import ( ArmenianLanguagePack) class A(TranslitLanguagePack): language_code = "ru" language_name = "Example" mapping = data.test_33_register_unregister_mapping # Since key `ru` already exists in the registry it can't be replaced # (without force-register). res = registry.register(A) self.assertTrue(not res) # Now with force-register it can. res = registry.register(A, force=True) self.assertTrue(res) # Once we have it there and it's forced, we can't register another. res = registry.register(A, force=True) self.assertTrue(not res) # Unregister non-forced language pack. res = registry.unregister(ArmenianLanguagePack) self.assertTrue(res and not ArmenianLanguagePack.language_code in get_available_language_codes()) res = registry.unregister(A) self.assertTrue(not res and A.language_code in get_available_language_codes())
def test_33_register_unregister(self): """ Testing register/unregister. """ from transliterate.contrib.languages.hy.translit_language_pack import ArmenianLanguagePack class A(TranslitLanguagePack): language_code = "ru" language_name = "Example" mapping = ( u"abcdefghij", u"1234567890", ) # Since key `ru` already exists in the registry it can't be replaced (without force-register). res = registry.register(A) self.assertTrue(not res) # Now with force-register it can. res = registry.register(A, force=True) self.assertTrue(res) # Once we have it there and it's forced, we can't register another. res = registry.register(A, force=True) self.assertTrue(not res) # Unregister non-forced language pack. res = registry.unregister(ArmenianLanguagePack) self.assertTrue(res and not ArmenianLanguagePack.language_code in get_available_language_codes()) res = registry.unregister(A) self.assertTrue(not res and A.language_code in get_available_language_codes())
def _transliterating(self): if self.lang_id.name == 'Greek': registry.register(GreekLanguagePack, force=True) transliteration = translit(self.content, 'el', reversed=True) elif self.lang_id.name == 'Hebrew': registry.register(HebrewLanguagePack, force=True) transliteration = translit(self.content, 'he', reversed=True) return transliteration
def content_file_name(instance, filename): autodiscover() registry.register(KazakhLanguagePack) ext = filename.split('.')[-1] filename = "%s_%s_%s.%s" % ( str(instance.pk), translit(instance.last_name, 'kz', reversed=True), translit(instance.first_name, 'kz', reversed=True), ext) filename = filename.lower() return os.path.join('portraits', filename)
def test_15_register_custom_language_pack(self): """Test registering of a custom language pack.""" class ExampleLanguagePack(TranslitLanguagePack): """Example language pack.""" language_code = "example" language_name = "Example" mapping = data.test_15_register_custom_language_pack_mapping registry.register(ExampleLanguagePack) assert 'example' in get_available_language_codes() res = translit(self.latin_text, 'example') self.assertEqual(res, 'Lor5m 9psum 4olor s9t 1m5t') return res
def test_15_register_custom_language_pack(self): """ Testing registering of a custom language pack. """ class ExampleLanguagePack(TranslitLanguagePack): """ Example language pack. """ language_code = "example" language_name = "Example" mapping = data.test_15_register_custom_language_pack_mapping registry.register(ExampleLanguagePack) assert 'example' in get_available_language_codes() res = translit(self.latin_text, 'example') self.assertEqual(res, 'Lor5m 9psum 4olor s9t 1m5t') return res
async def cyrillify(self, ctx, *text): class ExampleLanguagePack(TranslitLanguagePack): language_code = "custom" language_name = "Custom" mapping = ( "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwYyZz", "АаБбКкДдЕеФфГгХхИиЙйКкЛлМмНнОоПпКкРрСсТтУуВвУуЙйЗз", ) pre_processor_mapping = { "scht": "щ", "sht": "щ", "sh": "ш", "tsch": "ч", "tch": "ч", "sch": "ш", "zh": "ж", "tz": "ц", "ch": "ч", "yu": "ю", "ya": "я", "x": "кс", "ck": "к", "ph": "ф", } chars = list(pre_processor_mapping.keys()) for lat in chars: cyr = pre_processor_mapping[lat] pre_processor_mapping[lat.capitalize()] = cyr.capitalize() registry.register(ExampleLanguagePack) if not text: text = "Lorem ipsum dolor sit amet." else: text = " ".join(text) await ctx.send(translit(text, "custom"))
def __test_34_latin_to_latin(self): class LatinToLatinLanguagePack(TranslitLanguagePack): """ Custom language pack which gets rid of accented characters in Greek but leaves other characters intact. """ language_code = "l2l" language_name = "Latin to Latin" mapping = data.test_34_latin_to_latin_mapping characters = data.test_34_latin_to_latin_characters reversed_characters = data.test_34_latin_to_latin_reversed_characters res = registry.register(LatinToLatinLanguagePack) self.assertTrue(res) text = data.test_34_latin_to_latin_text pack = LatinToLatinLanguagePack() res = pack.translit(text, strict=True, fail_silently=False)
:example: >>> from foo.greekunaccented import * >>> from transliterate import translit >>> print translit(u'άέήίϊΐόύϋΰώΆΈΉΊΪΌΎΫΏ', 'el2') αεηιιιουυυωΑΕΗΙΙΟΥΥΩ """ from transliterate import translit from transliterate.discover import autodiscover from transliterate.base import TranslitLanguagePack, registry # First autodicover bundled language packs. autodiscover() class GreekUnaccentedLanguagePack(TranslitLanguagePack): """ Custom language pack which gets rid of accented characters in Greek but leaves other characters intact. """ language_code = "el2" language_name = "Greek without accented characters" mapping = ( u"άέήίϊΐόύϋΰώΆΈΉΊΪΌΎΫΏ", u"αεηιιιουυυωΑΕΗΙΙΟΥΥΩ", ) # Register registry.register(GreekUnaccentedLanguagePack)
# -*- coding: utf-8 -*- __title__ = 'transliterate.contrib.languages.uk.translit_language_pack' __author__ = 'Timofey Pchelintsev' __copyright__ = '2014-2015 Timofey Pchelintsev' __license__ = 'GPL 2.0/LGPL 2.1' __all__ = ('UkrainianLanguagePack', ) from transliterate.base import TranslitLanguagePack, registry from transliterate.contrib.languages.uk import data class UkrainianLanguagePack(TranslitLanguagePack): """ Language pack for Ukrainian language. See `http://en.wikipedia.org/wiki/Ukrainian_alphabet` for details. """ language_code = "uk" language_name = "Ukrainian" character_ranges = ((0x0400, 0x04FF), (0x0500, 0x052F)) mapping = data.mapping reversed_specific_mapping = data.reversed_specific_mapping pre_processor_mapping = data.pre_processor_mapping registry.register(UkrainianLanguagePack)
widgets = [Percentage(), Bar('>'), ' ', ETA(), ' '] pbar = ProgressBar(widgets=widgets, maxval=10000000) autodiscover() class ReverseInverseRussianLanguagePack(TranslitLanguagePack): language_code = "ru_inv_en" language_name = "ru_inv_en" mapping = ( u"йцукенгшщзхъфывапролджэёячсмитьбю", u"qwertyuiop[]asdfghjkl;'\zxcvbnm,.", ) registry.register(ReverseInverseRussianLanguagePack) def _reporthook(numblocks, blocksize, filesize, url=None): base = os.path.basename(url) try: percent = min((numblocks * blocksize * 100) / filesize, 100) except: percent = 100 if numblocks != 0: sys.stdout.write("\b" * 70) sys.stdout.write("%-66s%3d%%" % (base, percent)) def downloaddictionaries(dictionary_strings): url = dictionary_urls[dictionary_strings]
# -*- coding: utf-8 -*- from transliterate.base import TranslitLanguagePack, registry from transliterate.contrib.languages.el import data __title__ = 'transliterate.contrib.languages.el.translit_language_pack' __author__ = 'Artur Barseghyan' __copyright__ = '2013-2018 Artur Barseghyan' __license__ = 'GPL-2.0-only OR LGPL-2.1-or-later' __all__ = ('GreekLanguagePack',) class GreekLanguagePack(TranslitLanguagePack): """Language pack for Greek language. See `http://en.wikipedia.org/wiki/Greek_alphabet` and `https://en.wikipedia.org/wiki/Romanization_of_Greek#Modern_Greek` for details. """ language_code = "el" language_name = "Greek" character_ranges = ((0x0370, 0x03FF), (0x1F00, 0x1FFF)) mapping = data.mapping reversed_specific_mapping = data.reversed_specific_mapping pre_processor_mapping = data.pre_processor_mapping detectable = True registry.register(GreekLanguagePack)
) # TODO # reversed_specific_mapping = ( # ) # TODO # reversed_specific_pre_processor_mapping = { # } pre_processor_mapping = { u"ч": u"ch", u"щ": u"şh", u"ю": u"iu", u"я": u"ia", u"Ч": u"Ch", u"Щ": u"Şh", u"Ю": u"İu", u"Я": u"İa", } registry.register(KazakhLanguagePack) translit_kk = get_translit_function("kk") def translit(value: str) -> str: """Transliterate the text.""" return translit_kk(value)
# -*- coding: utf-8 -*- from transliterate.base import TranslitLanguagePack, registry from transliterate.contrib.languages.sr import data __title__ = 'transliterate.contrib.languages.sr.translit_language_pack' __author__ = 'Saša Kelečević' __copyright__ = '2017 Saša Kelečević' __license__ = 'GPL-2.0-only OR LGPL-2.1-or-later' __all__ = ('SerbianLanguagePack',) class SerbianLanguagePack(TranslitLanguagePack): """Language pack for Serbian language. See https://en.wikipedia.org/wiki/Romanization_of_Serbian for details. """ language_code = "sr" language_name = "Serbian" character_ranges = ((0x0408, 0x04F0), (0x0000, 0x017F)) mapping = data.mapping reversed_specific_mapping = data.reversed_specific_mapping pre_processor_mapping = data.pre_processor_mapping detectable = False registry.register(SerbianLanguagePack)
u"ю": u"yu", u"я": u"ya", u"Е": u"Ye", u"Ё": u"Yo", u"Ж": u"ZH", u"Х": u"Kh", u"Ц": u"Ts", u"Ч": u"Ch", u"Ш": u"Sh", u"Щ": u"Shch", u"Ю": u"Yu", u"Я": u"Ya", } registry.register(LanguagePack) def get_cities(page, count): if page == 1: page = 0 else: page = (page - 1) * count results = [] file = open('RU.txt', 'r', encoding='utf-8') lines = file.readlines() for i in range(page, page + count): try: city = main_dictionary(lines[i]) results.append(city)
# -*- coding: utf-8 -*- __title__ = 'transliterate.contrib.languages.mk.translit_language_pack' __author__ = 'Igor Stamatovski' __copyright__ = '2016 Igor Stamatovski' __license__ = 'GPL 2.0/LGPL 2.1' __all__ = ('MacedonianLanguagePack', ) from transliterate.base import TranslitLanguagePack, registry from transliterate.contrib.languages.mk import data class MacedonianLanguagePack(TranslitLanguagePack): """ Language pack for Macedonian language. See http://en.wikipedia.org/wiki/Romanization_of_Macedonian for details. """ language_code = "mk" language_name = "Macedonian" character_ranges = ((0x0400, 0x04FF), (0x0500, 0x052F)) mapping = data.mapping reversed_specific_mapping = data.reversed_specific_mapping pre_processor_mapping = data.pre_processor_mapping detectable = False registry.register(MacedonianLanguagePack)
u'Є': 'E', u'Ж': 'J', u'З': 'Z', u'И': 'I', u'І': 'I', u'Ї': 'Yi', u'Й': 'Y', u'К': 'K', u'Л': 'L', u'М': 'M', u'Н': 'N', u'О': 'O', u'П': 'P', u'Р': 'R', u'С': 'S', u'Т': 'T', u'У': 'U', u'Ф': 'F', u'Х': 'H', u'Ц': 'C', u'Ч': 'Ch', u'Ш': 'Sh', u'Щ': "Sh'", u'Ь': "'", u'Ю': 'Yu', u'Я': 'Ya', } registry.register(UkrainskiyLanguagePack, force=True)
print '\nRegistering custom language pack\n---------------------------------------' from transliterate.base import TranslitLanguagePack, registry class ExampleLanguagePack(TranslitLanguagePack): """ Example language pack. """ language_code = "example" language_name = "Example" mapping = ( u"abcdefghij", u"1234567890", ) registry.register(ExampleLanguagePack) print '\nList of available (registered) languages after registering a new language pack \n---------------------------------------' print get_available_language_codes() print '\nTransliteration to Example\n---------------------------------------' print translit(text, 'example') print '\nTransliterated lorem ipsum generator \n---------------------------------------' from transliterate.contrib.apps.translipsum import TranslipsumGenerator g_am = TranslipsumGenerator(language_code='hy') print 'Generating paragraphs' print g_am.generate_paragraph() g_ru = TranslipsumGenerator(language_code='ru')
# -*- coding: utf-8 -*- """ Latin to Latin example. """ from transliterate import translit from transliterate.discover import autodiscover from transliterate.base import TranslitLanguagePack, registry # First autodicover bundled language packs. autodiscover() class LatinToLatinLanguagePack(TranslitLanguagePack): """ Custom language pack which gets rid of accented characters in Greek but leaves other characters intact. """ language_code = "l2l" language_name = "Latin to Latin" mapping = ( u"abgdezilxkhmjnpsvtrcqw&ofABGDEZILXKHMJNPSVTRCQOFW", u"zbgdeailxkhnjmpswtrcqv&ofZBGDEAILXKHNJMPSWTRCQOFV", ) characters = u"abgdezilxkhmjnpsvtrcqw&ofABGDEZILXKHMJNPSVTRCQOFW" reversed_characters = u"abgdezilxkhmjnpsvtrcqw&ofABGDEZILXKHMJNPSVTRCQOFW" # Register registry.register(LatinToLatinLanguagePack)
# -*- coding: utf-8 -*- __title__ = 'transliterate.contrib.languages.mk.translit_language_pack' __author__ = 'Igor Stamatovski' __copyright__ = '2016 Igor Stamatovski' __license__ = 'GPL 2.0/LGPL 2.1' __all__ = ('MacedonianLanguagePack',) from transliterate.base import TranslitLanguagePack, registry from transliterate.contrib.languages.mk import data class MacedonianLanguagePack(TranslitLanguagePack): """ Language pack for Macedonian language. See http://en.wikipedia.org/wiki/Romanization_of_Macedonian for details. """ language_code = "mk" language_name = "Macedonian" character_ranges = ((0x0400, 0x04FF), (0x0500, 0x052F)) mapping = data.mapping reversed_specific_mapping = data.reversed_specific_mapping pre_processor_mapping = data.pre_processor_mapping detectable = False registry.register(MacedonianLanguagePack)
u"Shch": u"Щ", # u"Iu": u"Ю", # u"Ia": u"Я", u"Yu": u"Ю", u"Ya": u"Я", } reversed_specific_pre_processor_mapping = { u"ъ": u"", u"ь": u"", u"Ъ": u"", u"Ь": u"" } registry.register(Gost2006RuLangPack, force=True) LANG_PADDING = max([len(lang) for lang in get_available_language_codes()]) @lru_cache def transliterate(text, lang='ru-gost', reversed=True): trans = translit(text, lang, reversed) print(f"translit[{lang:{LANG_PADDING}}]: {text} => {trans}") if trans.isascii(): return trans else: for i in trans: if not i.isascii(): print(i, i.isascii())
See `http://en.wikipedia.org/wiki/Georgian_alphabet for details. """ language_code = "ka" language_name = "Georgian" character_ranges = ((0x10A0, 0x10C5), (0x10D0, 0x10FC), (0x2D00, 0x2D25)) mapping = data.mapping pre_processor_mapping = data.pre_processor_mapping detectable = True def translit(self, value, reversed=False, strict=False, fail_silently=True): # Georgian language knows no capitals. Therefore, we convert # everything to lowercase. value = value.lower() # Continue the standard way return super(GeorgianLanguagePack, self).translit( value=value, reversed=reversed, strict=strict, fail_silently=fail_silently ) registry.register(GeorgianLanguagePack)
from transliterate.base import TranslitLanguagePack, registry from transliterate.contrib.languages.l1 import data __title__ = 'transliterate.contrib.languages.l1.translit_language_pack' __author__ = 'Marco Pattaro' __copyright__ = '2016-2017 Marco Pattaro' __license__ = 'GPL-2.0-only OR LGPL-2.1-or-later' __all__ = ('Latin1SupplementLanguagePack',) class Latin1SupplementLanguagePack(TranslitLanguagePack): """Language pack for Latin1 Supplement. Though not exactly a language, it's a set of commonly found unicode characters. See `http://en.wikipedia.org/wiki/Latin-1_Supplement_%28Unicode_block%29` for details. """ language_code = "l1" language_name = "Latin1 Supplement" character_ranges = ((0x00C0, 0x00D6), (0x00D8, 0x00F6), (0x00F8, 0x00FF)) mapping = data.mapping reversed_specific_mapping = data.reversed_specific_mapping reversed_specific_pre_processor_mapping = \ data.reversed_specific_pre_processor_mapping detectable = True registry.register(Latin1SupplementLanguagePack)
from transliterate.base import TranslitLanguagePack, registry class ExampleLanguagePack(TranslitLanguagePack): """ Example language pack. """ language_code = "example" language_name = "Example" mapping = ( u"abcdefghij", u"1234567890", ) registry.register(ExampleLanguagePack) print '\nList of available (registered) languages after registering a new language pack \n---------------------------------------' print get_available_language_codes() print '\nTransliteration to Example\n---------------------------------------' print translit(text, 'example') print '\nTransliterated lorem ipsum generator \n---------------------------------------' from transliterate.contrib.apps.translipsum import TranslipsumGenerator g_am = TranslipsumGenerator(language_code='hy') print 'Generating paragraphs' print g_am.generate_paragraph() g_ru = TranslipsumGenerator(language_code='ru')
u'ф': u'f', u'х': u'h', u'ц': u'ts', u'ч': u'ch', u'ш': u'sh', u'щ': u'sch', u'ъ': u'', u'ы': u'y', u'ь': u'', u'э': u'e', u'ю': u'yu', u'я': u'ya', } registry.register(ExampleLanguagePack, force=True) print(get_available_language_codes()) # ['el', 'hy', 'ka', 'ru', 'example'] text = '40 лет Октября' def transliterate(text): trans = translit(text, 'example') print(trans) return trans if __name__ == "__main__": transliterate('Коммунистическая')
from transliterate import get_available_language_codes, translit, slugify from transliterate.base import TranslitLanguagePack, registry from transliterate.contrib.languages.ru.translit_language_pack import RussianLanguagePack from functools import lru_cache registry.register(RussianLanguagePack) class ExampleLanguagePack(TranslitLanguagePack): language_code = "example" language_name = "Example" mapping = {u"", u""} pre_processor_mapping = { u'А': u'A', u'Б': u'B', u'В': u'V', u'Г': u'G', u'Д': u'D', u'Е': u'E', u'Ё': u'E', u'Ж': u'Zh', u'З': u'Z', u'И': u'I', u'Й': u'Y', u'К': u'K', u'Л': u'L', u'М': u'M', u'Н': u'N', u'О': u'O', u'П': u'P',
# -*- coding: utf-8 -*- __title__ = 'transliterate.contrib.languages.hy.translit_language_pack' __author__ = 'Artur Barseghyan' __copyright__ = 'Copyright (c) 2013 Artur Barseghyan' __license__ = 'GPL 2.0/LGPL 2.1' __all__ = ('ArmenianLanguagePack', ) from transliterate.base import TranslitLanguagePack, registry from transliterate.contrib.languages.hy import data class ArmenianLanguagePack(TranslitLanguagePack): """ Language pack for Armenian language. See https://en.wikipedia.org/wiki/Armenian_alphabet for details. """ language_code = "hy" language_name = "Armenian" character_ranges = ((0x0530, 0x058F), (0xFB10, 0xFB1F)) mapping = data.mapping reversed_specific_mapping = data.reversed_specific_mapping reversed_specific_pre_processor_mapping = data.reversed_specific_pre_processor_mapping pre_processor_mapping = data.pre_processor_mapping detectable = True registry.register(ArmenianLanguagePack)
widgets = [Percentage(), Bar('>'), ' ', ETA(), ' '] pbar = ProgressBar(widgets=widgets, maxval=10000000) autodiscover() class ReverseInverseRussianLanguagePack(TranslitLanguagePack): language_code = "ru_inv_en" language_name = "ru_inv_en" mapping = ( u"йцукенгшщзхъфывапролджэёячсмитьбю", u"qwertyuiop[]asdfghjkl;'\zxcvbnm,.", ) registry.register(ReverseInverseRussianLanguagePack) def _reporthook(numblocks, blocksize, filesize, url=None): base = os.path.basename(url) try: percent = min((numblocks * blocksize * 100) / filesize, 100) except: percent = 100 if numblocks != 0: sys.stdout.write("\b" * 70) sys.stdout.write("%-66s%3d%%" % (base, percent)) def downloaddictionaries(dictionary_strings):
# -*- coding: utf-8 -*- __title__ = 'transliterate.contrib.languages.ru.translit_language_pack' __author__ = 'Artur Barseghyan' __copyright__ = '2013-2015 Artur Barseghyan' __license__ = 'GPL 2.0/LGPL 2.1' __all__ = ('RussianLanguagePack',) from transliterate.base import TranslitLanguagePack, registry from transliterate.contrib.languages.ru import data class RussianLanguagePack(TranslitLanguagePack): """ Language pack for Russian language. See `http://en.wikipedia.org/wiki/Russian_alphabet` for details. """ language_code = "ru" language_name = "Russian" character_ranges = ((0x0400, 0x04FF), (0x0500, 0x052F)) mapping = data.mapping reversed_specific_mapping = data.reversed_specific_mapping pre_processor_mapping = data.pre_processor_mapping detectable = True registry.register(RussianLanguagePack)
from typing import Iterator, TYPE_CHECKING from algoliasearch_django import raw_search from django.db import models from django.utils.translation import ugettext_lazy as _ from transliterate import translit from transliterate.base import registry from cinemanio.api.helpers import global_id from cinemanio.core.translit.ru import RussianLanguagePack if TYPE_CHECKING: from django.db.models import QuerySet # noqa from typing import Iterable, List # noqa registry.register(RussianLanguagePack) class BaseModel(models.Model): """ Base model for Movie and Person """ # TODO: remove field slug = models.SlugField(_('Slug'), max_length=100, unique=True, null=True, blank=True) site_official_url = models.URLField(_('Official site'), null=True,
# -*- coding: utf-8 -*- __title__ = 'transliterate.contrib.languages.bg.translit_language_pack' __author__ = 'Petar Chakarov' __copyright__ = 'Copyright (c) 2014 Petar Chakarov' __license__ = 'GPL 2.0/LGPL 2.1' __all__ = ('BulgarianLanguagePack', ) from transliterate.base import TranslitLanguagePack, registry from transliterate.contrib.languages.bg import data class BulgarianLanguagePack(TranslitLanguagePack): """ Language pack for Bulgarian language. See http://en.wikipedia.org/wiki/Romanization_of_Bulgarian for details. """ language_code = "bg" language_name = "Bulgarian" character_ranges = ((0x0400, 0x04FF), (0x0500, 0x052F)) mapping = data.mapping reversed_specific_mapping = data.reversed_specific_mapping pre_processor_mapping = data.pre_processor_mapping detectable = False registry.register(BulgarianLanguagePack)
# -*- coding: utf-8 -*- from transliterate.base import TranslitLanguagePack, registry from transliterate.contrib.languages.mn import data __title__ = 'transliterate.contrib.languages.mn.translit_language_pack' __author__ = 'Enkhbold Bataa' __copyright__ = '2016 Enkhbold Bataa' __license__ = 'GPL 2.0/LGPL 2.1' __all__ = ('MongolianLanguagePack',) class MongolianLanguagePack(TranslitLanguagePack): """Language pack for Mongolian language. See `https://en.wikipedia.org/wiki/Mongolian_Cyrillic_alphabet` for details. """ language_code = "mn" language_name = "Mongolian" character_ranges = ((0x0400, 0x04FF), (0x0500, 0x052F)) mapping = data.mapping reversed_specific_mapping = data.reversed_specific_mapping pre_processor_mapping = data.pre_processor_mapping detectable = False registry.register(MongolianLanguagePack)
# -*- coding: utf-8 -*- from transliterate.base import TranslitLanguagePack, registry from transliterate.contrib.languages.uk import data __title__ = 'transliterate.contrib.languages.uk.translit_language_pack' __author__ = 'Timofey Pchelintsev' __copyright__ = '2014-2015 Timofey Pchelintsev' __license__ = 'GPL-2.0-only OR LGPL-2.1-or-later' __all__ = ('UkrainianLanguagePack',) class UkrainianLanguagePack(TranslitLanguagePack): """Language pack for Ukrainian language. See `http://en.wikipedia.org/wiki/Ukrainian_alphabet` for details. """ language_code = "uk" language_name = "Ukrainian" character_ranges = ((0x0400, 0x04FF), (0x0500, 0x052F)) mapping = data.mapping reversed_specific_mapping = data.reversed_specific_mapping pre_processor_mapping = data.pre_processor_mapping registry.register(UkrainianLanguagePack)
u"ч": u"ch", u"Ш": u"Sh", u"ш": u"sh", u"Щ": u"Shch", u"щ": u"shch", u"Ю": u"Yu", u"ю": u"iu", u"Я": u"Ya", u"я": u"ia", u"ь": u"", u"ъ": u"", u"\'": u"", } registry.register(UkrTranslitLanguagePack) def transliterateField(request): fieldData = request.GET.get('fieldData') # print(translit(fieldData, 'en')) print(get_available_language_codes()) print(translit(fieldData, 'ukrTranslit')) fieldData = fieldData.lower().title() data = { 'transliteration': translit(fieldData, 'ukrTranslit'), } return JsonResponse(data)
def translit2ru(s): registry.register(ImprovedRussianLanguagePack) return translit(s.replace(u"zhy",u"жи").replace(u"Zhy",u"Жи"), 'ru_new')
u"dz": u"ձ", u"gh": u"ղ", u"tch": u"ճ", u"sh": u"շ", u"vo": u"ո", u"ch": u"չ", u"dj": u"ջ", u"ph": u"փ", u"u": u"ու", # uppercase u"E'": u"Է", u"Y": u"Ը", u"Th": u"Թ", u"Jh": u"Ժ", u"Ts": u"Ծ", u"Dz": u"Ձ", u"Gh": u"Ղ", u"Tch": u"Ճ", u"Sh": u"Շ", u"Vo": u"Ո", u"Ch": u"Չ", u"Dj": u"Ջ", u"Ph": u"Փ", u"U": u"Ու" } detectable = True registry.register(ArmenianLanguagePack)
"""Language pack for Georgian language. See `http://en.wikipedia.org/wiki/Georgian_alphabet for details. """ language_code = "ka" language_name = "Georgian" character_ranges = ((0x10A0, 0x10C5), (0x10D0, 0x10FC), (0x2D00, 0x2D25)) mapping = data.mapping pre_processor_mapping = data.pre_processor_mapping detectable = True def translit(self, value, reversed=False, strict=False, fail_silently=True): # Georgian language knows no capitals. Therefore, we convert # everything to lowercase. value = value.lower() # Continue the standard way return super(GeorgianLanguagePack, self).translit(value=value, reversed=reversed, strict=strict, fail_silently=fail_silently) registry.register(GeorgianLanguagePack)
# -*- coding: utf-8 -*- from transliterate.base import TranslitLanguagePack, registry from transliterate.contrib.languages.el import data __title__ = 'transliterate.contrib.languages.el.translit_language_pack' __author__ = 'Artur Barseghyan' __copyright__ = '2013-2018 Artur Barseghyan' __license__ = 'GPL-2.0-only OR LGPL-2.1-or-later' __all__ = ('GreekLanguagePack', ) class GreekLanguagePack(TranslitLanguagePack): """Language pack for Greek language. See `http://en.wikipedia.org/wiki/Greek_alphabet` and `https://en.wikipedia.org/wiki/Romanization_of_Greek#Modern_Greek` for details. """ language_code = "el" language_name = "Greek" character_ranges = ((0x0370, 0x03FF), (0x1F00, 0x1FFF)) mapping = data.mapping reversed_specific_mapping = data.reversed_specific_mapping pre_processor_mapping = data.pre_processor_mapping detectable = True registry.register(GreekLanguagePack)
# -*- coding: utf-8 -*- from transliterate.base import TranslitLanguagePack, registry from transliterate.contrib.languages.mn import data __title__ = 'transliterate.contrib.languages.mn.translit_language_pack' __author__ = 'Enkhbold Bataa' __copyright__ = '2016 Enkhbold Bataa' __license__ = 'GPL-2.0-only OR LGPL-2.1-or-later' __all__ = ('MongolianLanguagePack',) class MongolianLanguagePack(TranslitLanguagePack): """Language pack for Mongolian language. See `https://en.wikipedia.org/wiki/Mongolian_Cyrillic_alphabet` for details. """ language_code = "mn" language_name = "Mongolian" character_ranges = ((0x0400, 0x04FF), (0x0500, 0x052F)) mapping = data.mapping reversed_specific_mapping = data.reversed_specific_mapping pre_processor_mapping = data.pre_processor_mapping detectable = False registry.register(MongolianLanguagePack)
# -*- coding: utf-8 -*- from transliterate.base import TranslitLanguagePack, registry from transliterate.contrib.languages.sr import data __title__ = 'transliterate.contrib.languages.sr.translit_language_pack' __author__ = 'Saša Kelečević' __copyright__ = '2017 Saša Kelečević' __license__ = 'GPL 2.0/LGPL 2.1' __all__ = ('SerbianLanguagePack',) class SerbianLanguagePack(TranslitLanguagePack): """Language pack for Serbian language. See https://en.wikipedia.org/wiki/Romanization_of_Serbian for details. """ language_code = "sr" language_name = "Serbian" character_ranges = ((0x0408, 0x04F0), (0x0000, 0x017F)) mapping = data.mapping reversed_specific_mapping = data.reversed_specific_mapping pre_processor_mapping = data.pre_processor_mapping detectable = False registry.register(SerbianLanguagePack)
# -*- coding: utf-8 -*- __title__ = 'transliterate.contrib.languages.bg.translit_language_pack' __author__ = 'Petar Chakarov' __copyright__ = 'Copyright (c) 2014 Petar Chakarov' __license__ = 'GPL 2.0/LGPL 2.1' __all__ = ('BulgarianLanguagePack',) from transliterate.base import TranslitLanguagePack, registry from transliterate.contrib.languages.bg import data class BulgarianLanguagePack(TranslitLanguagePack): """ Language pack for Bulgarian language. See http://en.wikipedia.org/wiki/Romanization_of_Bulgarian for details. """ language_code = "bg" language_name = "Bulgarian" character_ranges = ((0x0400, 0x04FF), (0x0500, 0x052F)) mapping = data.mapping reversed_specific_mapping = data.reversed_specific_mapping pre_processor_mapping = data.pre_processor_mapping detectable = False registry.register(BulgarianLanguagePack)
get rid of accented characters in Greek but leave other characters intact. :example: >>> from foo.greekunaccented import * >>> from transliterate import translit >>> print translit(u'άέήίϊΐόύϋΰώΆΈΉΊΪΌΎΫΏ', 'el2') αεηιιιουυυωΑΕΗΙΙΟΥΥΩ """ from transliterate import translit from transliterate.discover import autodiscover from transliterate.base import TranslitLanguagePack, registry # First autodicover bundled language packs. autodiscover() class GreekUnaccentedLanguagePack(TranslitLanguagePack): """ Custom language pack which gets rid of accented characters in Greek but leaves other characters intact. """ language_code = "el2" language_name = "Greek without accented characters" mapping = ( u"άέήίϊΐόύϋΰώΆΈΉΊΪΌΎΫΏ", u"αεηιιιουυυωΑΕΗΙΙΟΥΥΩ", ) # Register registry.register(GreekUnaccentedLanguagePack)
from transliterate.base import TranslitLanguagePack, registry from transliterate.discover import autodiscover from transliterate import translit autodiscover() class CzechLanguagePack(TranslitLanguagePack): language_code = 'cs' language_name = 'Czech' pre_processor_mapping = {u'ch': u'ch'} mapping = (u'aábcčdďeéěfghiíjklmnňoópqrřsštťuúůvwxyýzž', u'aabccddeeefghiijklmnnoopqrrssttuuuvwxyyzz') registry.register(CzechLanguagePack) class Translit: def __init__(self): pass @staticmethod def translit(string): # Try CZ string = translit(string, 'cs') # Normalize for RU chars string = translit(string, 'ru', reversed=True) # Remove spaces and punctuation