def add_mark(components, mark): """ Case Mark.NONE will be deal with separately by user """ comp = list(components) if mark == Mark.BAR and comp[0] and comp[0][-1].lower() in FAMILY_D: comp[0] = add_mark_at(comp[0], len(comp[0]) - 1, Mark.BAR) else: #remove all marks and accents in vowel part raw_vowel = accent.add_accent(comp, Accent.NONE)[1].lower() raw_vowel = utils.join( [add_mark_char(c, Mark.NONE) for c in raw_vowel]) if mark == Mark.HAT: pos = max(raw_vowel.find(u"a"), raw_vowel.find(u"o"), raw_vowel.find(u"e")) comp[1] = add_mark_at(comp[1], pos, Mark.HAT) elif mark == Mark.BREVE: if raw_vowel != u"ua": comp[1] = add_mark_at(comp[1], raw_vowel.find(u"a"), Mark.BREVE) elif mark == Mark.HORN: if raw_vowel in (u"uo", u"uoi", u"uou"): comp[1] = utils.join( [add_mark_char(c, Mark.HORN) for c in comp[1][:2]]) + comp[1][2:] elif raw_vowel == u"oa": comp[1] = add_mark_at(comp[1], 1, Mark.HORN) else: pos = max(raw_vowel.find(u"u"), raw_vowel.find(u"o")) comp[1] = add_mark_at(comp[1], pos, Mark.HORN) return comp
def add_mark(components, mark): """ Case Mark.NONE will be deal with separately by user """ comp = list(components) if mark == Mark.BAR and comp[0] and comp[0][-1].lower() in FAMILY_D: comp[0] = add_mark_at(comp[0], len(comp[0])-1, Mark.BAR) else: #remove all marks and accents in vowel part raw_vowel = accent.add_accent(comp, Accent.NONE)[1].lower() raw_vowel = utils.join([add_mark_char(c, Mark.NONE) for c in raw_vowel]) if mark == Mark.HAT: pos = max(raw_vowel.find(u"a"), raw_vowel.find(u"o"), raw_vowel.find(u"e")) comp[1] = add_mark_at(comp[1], pos, Mark.HAT) elif mark == Mark.BREVE: if raw_vowel != u"ua": comp[1] = add_mark_at(comp[1], raw_vowel.find(u"a"), Mark.BREVE) elif mark == Mark.HORN: if raw_vowel in (u"uo", u"uoi", u"uou"): comp[1] = utils.join([add_mark_char(c, Mark.HORN) for c in comp[1][:2]]) + comp[1][2:] elif raw_vowel == u"oa": comp[1] = add_mark_at(comp[1], 1, Mark.HORN) else: pos = max(raw_vowel.find(u"u"), raw_vowel.find(u"o")) comp[1] = add_mark_at(comp[1], pos, Mark.HORN) return comp
def reverse(components, trans): """ Reverse the effect of transformation 'trans' on 'components' If the transformation does not effect the components, return the original string Workflow: - Find the part of components that is effected by the transformation - Transform this part to the original state (remove accent if the trans is ADD_ACCENT action, remove mark if the trans is ADD_MARK action) """ action, factor = get_action(trans) comps = list(components) string = utils.join(comps) if action == Action.ADD_CHAR and string[-1] == trans[1]: if comps[2]: i = 2 elif comps[1]: i = 1 else: i = 0 comps[i] = comps[i][:-1] elif action == Action.ADD_ACCENT: comps = accent.add_accent(comps, Accent.NONE) elif action == Action.ADD_MARK: if factor == Mark.BAR: comps[0] = comps[0][:-1] + \ mark.add_mark_char(comps[0][-1:], Mark.NONE) else: if mark.is_valid_mark(comps, trans): comps[1] = u"".join( [mark.add_mark_char(c, Mark.NONE) for c in comps[1]]) return comps
def reverse(components, trans): """ Reverse the effect of transformation 'trans' on 'components' If the transformation does not effect the components, return the original string Workflow: - Find the part of components that is effected by the transformation - Transform this part to the original state (remove accent if the trans is ADD_ACCENT action, remove mark if the trans is ADD_MARK action) """ action, factor = get_action (trans) comps = list(components) string = utils.join(comps) if action == Action.ADD_CHAR and string[-1] == trans[1]: if comps[2]: i = 2 elif comps[1] : i = 1 else: i = 0 comps[i] = comps[i][:-1] elif action == Action.ADD_ACCENT: comps = accent.add_accent(comps, Accent.NONE) elif action == Action.ADD_MARK: if factor == Mark.BAR: comps[0] = comps[0][:-1] + \ mark.add_mark_char(comps[0][-1:], Mark.NONE) else: if mark.is_valid_mark(comps, trans): comps[1] = u"".join([mark.add_mark_char(c, Mark.NONE) for c in comps[1]]) return comps
def is_valid_mark(comps, mark_trans): """ Check whether the mark given by mark_trans is valid to add to the components """ components = list(comps) if components[1] != u"": raw_vowel = accent.add_accent(components, Accent.NONE)[1].lower() raw_vowel = utils.join([add_mark_char(c, Mark.NONE) for c in raw_vowel]) if mark_trans[0] == 'd' and components[0] \ and components[0][-1].lower() in (u"d", u"đ"): return True elif components[1] != u"" and raw_vowel.find(mark_trans[0]) != -1: return True else: return False
def is_valid_mark(comps, mark_trans): """ Check whether the mark given by mark_trans is valid to add to the components """ components = list(comps) if components[1] != u"": raw_vowel = accent.add_accent(components, Accent.NONE)[1].lower() raw_vowel = utils.join( [add_mark_char(c, Mark.NONE) for c in raw_vowel]) if mark_trans[0] == 'd' and components[0] \ and components[0][-1].lower() in (u"d", u"đ"): return True elif components[1] != u"" and raw_vowel.find(mark_trans[0]) != -1: return True else: return False
def transform(comps, trans): """ Transform the given string with transform type trans """ components = list(comps) # Special case for 'ư, ơ' #if trans[0] == '<' and not trans[1] in (u'ư', u'ơ', u'Ư', u'Ơ'): # trans = '+' + trans[1] # (Not our job) if trans[0] == u'<': if not components[2]: # Undo operation if components[1][-1:] == trans[1]: return components # Only allow ư, ơ or ươ sitting alone in the middle part elif not components[1] or \ (components[1].lower() == u'ư' and trans[1].lower() == u'ơ'): components[1] += trans[1] # Quite a hack. If you want to type gi[f = 'giờ', separate() # will create ['g', 'i', '']. Therefore we have to allow # components[1] == 'i'. elif components[1].lower() == 'i' and components[0].lower() == 'g': components[1] += trans[1] components = separate(utils.join(components)) if trans[0] == u'+': # See this and you'll understand: # transform([u'nn', '', ''],'+n') = [u'nnn', '', ''] # transform([u'c', '', ''],'+o') = [u'c', 'o', ''] # transform([u'c', 'o', ''],'+o') = [u'c', 'oo', ''] # transform([u'c', 'o', ''],'+n') = [u'c', 'o', 'n'] if components[1] == u'': if utils.is_vowel(trans[1]): components[1] += trans[1] else: components[0] += trans[1] else: if components[2] == u'' and utils.is_vowel(trans[1]): components[1] += trans[1] else: components[2] += trans[1] # If there is any accent, remove and reapply it # because it is likely to be misplaced in previous transformations ac = accent.Accent.NONE for c in components[1]: ac = accent.get_accent_char(c) if ac: break if ac != accent.Accent.NONE: # Remove accent components = accent.add_accent(components, Accent.NONE) components = accent.add_accent(components, ac) return components action, factor = get_action(trans) if action == Action.ADD_ACCENT: components = accent.add_accent(components, factor) elif action == Action.ADD_MARK: if (mark.is_valid_mark(components, trans)): components = mark.add_mark(components, factor) return components
def transform(comps, trans): """ Transform the given string with transform type trans """ components = list(comps) # Special case for 'ư, ơ' #if trans[0] == '<' and not trans[1] in (u'ư', u'ơ', u'Ư', u'Ơ'): # trans = '+' + trans[1] # (Not our job) if trans[0] == u'<': if not components[2]: # Undo operation if components[1][-1:] == trans[1]: return components # Only allow ư, ơ or ươ sitting alone in the middle part elif not components[1] or \ (components[1].lower() == u'ư' and trans[1].lower() == u'ơ'): components[1] += trans[1] # Quite a hack. If you want to type gi[f = 'giờ', separate() # will create ['g', 'i', '']. Therefore we have to allow # components[1] == 'i'. elif components[1].lower() == 'i' and components[0].lower() == 'g': components[1] += trans[1] components = separate(utils.join(components)) if trans[0] == u'+': # See this and you'll understand: # transform([u'nn', '', ''],'+n') = [u'nnn', '', ''] # transform([u'c', '', ''],'+o') = [u'c', 'o', ''] # transform([u'c', 'o', ''],'+o') = [u'c', 'oo', ''] # transform([u'c', 'o', ''],'+n') = [u'c', 'o', 'n'] if components[1] == u'': if utils.is_vowel(trans[1]): components[1] += trans[1] else: components[0] += trans[1] else: if components[2] == u'' and utils.is_vowel(trans[1]): components[1] += trans[1] else: components[2] += trans[1] # If there is any accent, remove and reapply it # because it is likely to be misplaced in previous transformations ac = accent.Accent.NONE for c in components[1]: ac = accent.get_accent_char(c) if ac: break if ac != accent.Accent.NONE: # Remove accent components = accent.add_accent(components, Accent.NONE) components = accent.add_accent(components, ac) return components action, factor = get_action (trans) if action == Action.ADD_ACCENT: components = accent.add_accent(components, factor) elif action == Action.ADD_MARK: if (mark.is_valid_mark(components, trans)): components = mark.add_mark(components, factor) return components