Example #1
0
def handle_backspace(converted_string, raw_sequence):
    """
    Returns a new converted_string and a new raw_sequence
    after a backspace.
    """
    # I can't find a simple explanation for this, so
    # I hope this example can help clarify it:
    #
    # handle_backspace(thương, thuwongw) -> (thươn, thuwonw)
    # handle_backspace(thươn, thuwonw) -> (thươ, thuwow)
    # handle_backspace(thươ, thuwow) -> (thư, thuw)
    # handle_backspace(thươ, thuw) -> (th, th)
    #
    # The algorithm for handle_backspace was contributed by @hainp.

    deleted_char = converted_string[-1]
    converted_string = converted_string[:-1]

    _accent = accent.get_accent_char(deleted_char)
    _mark = mark.get_mark_char(deleted_char)

    if _mark and _accent:
        raw_sequence = raw_sequence[:-3]
    elif _mark or _accent:
        raw_sequence = raw_sequence[:-2]
    else:
        index = raw_sequence.rfind(deleted_char)
        raw_sequence = raw_sequence[:index] + raw_sequence[(index + 1):]

    return converted_string, raw_sequence
def make_im_list(word):
    """
    Generate a list of possible input method characters that can be used to
    type the given word. Each IM character is presented as a list of string with
    the IM character in the equivalent position as it would be in the word, all
    other position are empty strings.

    >>> make_im_list("bến")
    [['', 's', ''], ['', 'e', '']]
    """
    im_keys = []

    l = len(word)
    for index, char in enumerate(word):
        ac = accent.get_accent_char(char)
        mk = mark.get_mark_char(char)
        stripped_char = strip(char)

        if ac != accent.Accent.NONE:
            im_keys.append([""] * index + [reversed_accents[ac]] + [""] * (l - index - 1))
        if mk != mark.Mark.NONE:
            if mk in [mark.Mark.HORN, mark.Mark.BREVE]:
                mk = "w"
            else:
                mk = stripped_char
            im_keys.append([""] * index + [mk] + [""] * (l - index - 1))

    return im_keys
Example #3
0
def handle_backspace(converted_string, raw_sequence, im_rules=None):
    """
    Returns a new raw_sequence after a backspace. This raw_sequence should
    be pushed back to process_sequence().
    """
    # I can't find a simple explanation for this, so
    # I hope this example can help clarify it:
    #
    # handle_backspace(thương, thuwongw) -> thuwonw
    # handle_backspace(thươn, thuwonw) -> thuwow
    # handle_backspace(thươ, thuwow) -> thuw
    # handle_backspace(thươ, thuw) -> th
    #
    # The algorithm for handle_backspace was contributed by @hainp.

    if im_rules == None:
        im_rules = get_telex_definition()

    deleted_char = converted_string[-1]

    _accent = accent.get_accent_char(deleted_char)
    _mark = mark.get_mark_char(deleted_char)

    if _mark or _accent:
        # Find a sequence of IM keys at the end of
        # raw_sequence

        ime_keys_at_end = ""
        len_raw_sequence = len(raw_sequence)
        i = len_raw_sequence - 1

        while i >= 0:
            if raw_sequence[i] not in im_rules and \
                    raw_sequence[i] not in "aeiouyd":
                i += 1
                break
            else:
                ime_keys_at_end = raw_sequence[i] + ime_keys_at_end
            i -= 1

        # Try to find a subsequence from that sequence
        # that can be converted to the deleted_char
        k = 0
        while k < len_raw_sequence:
            if process_sequence(raw_sequence[i + k:],
                                im_rules) == deleted_char:
                # Delete that subsequence
                raw_sequence = raw_sequence[:i + k]
                break
            k += 1
    else:
        index = raw_sequence.rfind(deleted_char)
        raw_sequence = raw_sequence[:index] + raw_sequence[(index + 1):]

    return raw_sequence
Example #4
0
def handle_backspace(converted_string, raw_sequence, im_rules=None):
    """
    Returns a new raw_sequence after a backspace. This raw_sequence should
    be pushed back to process_sequence().
    """
    # I can't find a simple explanation for this, so
    # I hope this example can help clarify it:
    #
    # handle_backspace(thương, thuwongw) -> thuwonw
    # handle_backspace(thươn, thuwonw) -> thuwow
    # handle_backspace(thươ, thuwow) -> thuw
    # handle_backspace(thươ, thuw) -> th
    #
    # The algorithm for handle_backspace was contributed by @hainp.

    if im_rules == None:
        im_rules = get_telex_definition()

    deleted_char = converted_string[-1]

    _accent = accent.get_accent_char(deleted_char)
    _mark = mark.get_mark_char(deleted_char)

    if _mark or _accent:
        # Find a sequence of IM keys at the end of
        # raw_sequence

        ime_keys_at_end = ""
        len_raw_sequence = len(raw_sequence)
        i = len_raw_sequence - 1

        while i >= 0:
            if raw_sequence[i] not in im_rules and \
                    raw_sequence[i] not in "aeiouyd":
                i += 1
                break
            else:
                ime_keys_at_end = raw_sequence[i] + ime_keys_at_end
            i -= 1

        # Try to find a subsequence from that sequence
        # that can be converted to the deleted_char
        k = 0
        while k < len_raw_sequence:
            if process_sequence(raw_sequence[i + k:], im_rules) == deleted_char:
                # Delete that subsequence
                raw_sequence = raw_sequence[:i + k]
                break
            k += 1
    else:
        index = raw_sequence.rfind(deleted_char)
        raw_sequence = raw_sequence[:index] + raw_sequence[(index + 1):]

    return raw_sequence
Example #5
0
def add_mark_char(char, mark):
    """
    Add mark to a single char.
    """
    if char == "":
        return ""
    case = char.isupper()
    ac = accent.get_accent_char(char)
    char = accent.add_accent_char(char.lower(), Accent.NONE)
    new_char = char
    if mark == Mark.HAT:
        if char in FAMILY_A:
            new_char = "â"
        elif char in FAMILY_O:
            new_char = "ô"
        elif char in FAMILY_E:
            new_char = "ê"
    elif mark == Mark.HORN:
        if char in FAMILY_O:
            new_char = "ơ"
        elif char in FAMILY_U:
            new_char = "ư"
    elif mark == Mark.BREVE:
        if char in FAMILY_A:
            new_char = "ă"
    elif mark == Mark.BAR:
        if char in FAMILY_D:
            new_char = "đ"
    elif mark == Mark.NONE:
        if char in FAMILY_A:
            new_char = "a"
        elif char in FAMILY_E:
            new_char = "e"
        elif char in FAMILY_O:
            new_char = "o"
        elif char in FAMILY_U:
            new_char = "u"
        elif char in FAMILY_D:
            new_char = "d"

    new_char = accent.add_accent_char(new_char, ac)
    return utils.change_case(new_char, case)