def get_transformation_list(key, im, case=0): """ Return list of transformations inferred from entered key. The map between transform types and keys is given by module bogo_config (if exists) or by variable simple_telex_im if entered key is not in im, return u"+key", meaning appending the entered key to current text """ if key in im: lkey = key else: lkey = key.lower() if lkey in im: if isinstance(im[lkey], list): trans_list = im[lkey] else: trans_list = [im[lkey]] for i, trans in enumerate(trans_list): if trans[0] == u'<': trans_list[i] = trans[0] + utils.change_case(trans[1], case) return trans_list else: return [u'+' + unicode(key)]
def rep_chars(word, offset): rep = word[0] other_case = utils.change_case(word[0]) for char in word: if char != rep and char != other_case and \ len(word[offset:]) != 0: return offset offset += 1 return offset - 1
def _next(possible, word, prev): offset = 0 vowels = {'A', 'E', 'I', 'O', 'U', 'a', 'e', 'i', 'o', 'u'} cur = word[offset] temp = utils.change_case(cur) if prev == cur or prev == temp: #Run to the end of the input offset = rep_chars(word, offset) while 1: cur = word[offset] temp = utils.change_case(cur) if cur in possible: offset += 1 return cur, offset if temp in possible: offset += 1 return temp, offset if cur in vowels: pos_v = vowels.intersection(possible) if pos_v: offset += 1 return pos_v.pop(), offset if offset < 1: break offset -= 1 char, offset = check_final_run(word, prev, offset) return char, offset
def add_accent_char(char, accent): """ Add accent to a single char. Parameter accent is member of class Accent """ if char == u'': return u''; case = char.isupper() char = char.lower() index = utils.VOWELS.find(char) if (index != -1): index = index - index % 6 + 5 char = utils.VOWELS[index - accent] return utils.change_case(char, case)
def add_mark_char(char, mark): """ Add mark to a single char. """ if char == u'': return u'' case = char.isupper() ac = accent.get_accent_char(char) char = accent.add_accent_char(char.lower(), Accent.NONE) new_char = char if mark == Mark.HAT: if char in FAMILY_A: new_char = u"â" elif char in FAMILY_O: new_char = u"ô" elif char in FAMILY_E: new_char = u"ê" elif mark == Mark.HORN: if char in FAMILY_O: new_char = u"ơ" elif char in FAMILY_U: new_char = u"ư" elif mark == Mark.BREVE: if char in FAMILY_A: new_char = u"ă" elif mark == Mark.BAR: if char in FAMILY_D: new_char = u"đ" elif mark == Mark.NONE: if char in FAMILY_A: new_char = u"a" elif char in FAMILY_E: new_char = u"e" elif char in FAMILY_O: new_char = u"o" elif char in FAMILY_U: new_char = u"u" elif char in FAMILY_D: new_char = u"d" new_char = accent.add_accent_char(new_char, ac) return utils.change_case(new_char, case)
def gen_error(word): new_word = [] for char in word: if not char.isalpha(): new_word.append(char) continue to_do = random.randint(1, 4) if to_do == VOWEL and utils.is_vowel(char): new_word.append(random.choice(vowels)) elif to_do == CAPITALIZATION: new_word.append(utils.change_case(char)) elif to_do == REPETITION: while 1: new_word.append(char) if random.randint(0, 1): break else: new_word.append(char) return ''.join(new_word)
def is_valid_combination(components): """Check if a character combination complies to Vietnamese spelling. Input: components - a list of the form [u'c', u'a', u'm'] Output: True if OK, False otherwise. """ comps = list(components) # We only work with lower case for i in range(len(comps)): comps[i] = utils.change_case(comps[i], 0) # Allow 'đ' to appear in abbreviations like 'đm', 'đc', 'kgcđ', etc. #if comps[0] and not comps[1] and not comps[2] and \ #not comps[0] in ('gi', 'qu'): #for c in comps[0]: #if not c in CONSONANTS: #return False #return True if comps[0] and not comps[1] and not comps[2]: return True # Check if our start sound is a proper consonant if (comps[0] != u'') and (not (comps[0] in CONSONANTS)): return False # And if our ending sound is a proper ending consonant if (comps[2] != u'') and (not (comps[2] in ENDING_CONSONANTS)): return False vowel = accent.remove_accent_string(comps[1]) if len(vowel) > 1: if not (vowel in OPEN_COMPOUND_VOWELS or \ vowel in CLOSED_COMPOUND_VOWELS): return False if vowel in CLOSED_COMPOUND_VOWELS and \ not vowel in OPEN_COMPOUND_VOWELS and comps[2] != u'': return False # 'ăch'? if comps[2] == u'ch' and ((vowel in u'ăâeôơuư') or \ (vowel in OPEN_COMPOUND_VOWELS and not vowel in CLOSED_COMPOUND_VOWELS)): return False # 'ương' is ok but 'ơng' ? if comps[2] == u'ng' and vowel in (u'ơ'): return False # Sadly, this interferes with 'nhếch' :< #if comps[2] == u'c' and vowel in u'ê': # return False # Get the first accent ac = Accent.NONE for i in range(len(comps[1])): a = accent.get_accent_char(comps[1][i]) if a != Accent.NONE: ac = a break # These consonants can only go with ACUTE, DOT or NONE accents if comps[2] in [u'c', u'p', u't', u'ch'] and \ not ac in [Accent.NONE, Accent.ACUTE, Accent.DOT]: return False return True