def get_transcriptor(lang="es_ES", alphabet="IPA", syllabic_separator=u".", stress_mark=u"'", word_separator=u"|"): """ Return a `Transcriptor` object :param lang: string with the ISO 639-1 code or IETF language tag of `text` :param alphabet: string with the name of the phonetic alphabet to use :param syllabic_separator: string with the syllabic separator character :param stress_mark: string to mark the stress in words :param word_separator: string with the word separator character :return: a `Transcriptor` object """ if not syllabic_separator: syllabic_separator = u"." alphabet = alphabet.lower() # Language identification if lang.lower() in ("es", "es_es"): lang = "es_ES" hyphenator = get_hyphenator(lang) module = import_module("stevens.languages.es.castillian") transcriptor = module.Transcriptor( hyphenator=hyphenator, syllabic_separator=syllabic_separator, word_separator=word_separator, alphabet=alphabet, stress_mark=stress_mark, ) return transcriptor else: raise NotLanguageSupported(lang)
def __init__(self, *args, **kwargs): super(Transcriptor, self).__init__(*args, **kwargs) if not self._hyphenator: self._hyphenator = get_hyphenator("es_ES") self._punctuation = re.compile(r"[ \.,\?\!¡¿\n\t]+") self._grave = re.compile(u'[aeiouns]') self._irregular = re.compile(u'[áéíóú]') self._double_consonants = {u'rr': u'R', u'll': u'ʎ', u'ch': u'ʧ', u'gu': u'g', u'qu': u'q'} self._double_consonants_set = set(self._double_consonants.keys()) self._nasals = u'mnñ' self._laterals = u'l' self._vowels = u'aeiouáéíóú' self._voiced = u'aeioubdglmnrRvw' self._voiced_consonants = u'bdglmnrRv' self._labiodentals = u'fv' self._coronals = u'dlrnstzʧ' self._palatals = u'yʎ' self._bilabials = u'bmp' self._velars = u'gjq' self._pause = u'‖'