コード例 #1
0
def add_mark(components, mark):
    comp = list(components)
    if mark == Mark.BAR and comp[0] and comp[0][-1].lower() in FAMILY_D:
        comp[0] = add_mark_at(comp[0], len(comp[0])-1, Mark.BAR)
    else:
        #remove all marks and accents in vowel part
        raw_vowel = accent.add_accent(comp, Accent.NONE)[1].lower()
        raw_vowel = utils.join([add_mark_char(c, Mark.NONE) for c in raw_vowel])
        if mark == Mark.HAT:
            pos = max(raw_vowel.find("a"), raw_vowel.find("o"),
                      raw_vowel.find("e"))
            comp[1] = add_mark_at(comp[1], pos, Mark.HAT)
        elif mark == Mark.BREVE:
            if raw_vowel != "ua":
                comp[1] = add_mark_at(comp[1], raw_vowel.find("a"), Mark.BREVE)
        elif mark == Mark.HORN:
            if raw_vowel in ("uo", "uoi", "uou"):
                comp[1] = utils.join([add_mark_char(c, Mark.HORN) for c in comp[1][:2]]) + comp[1][2:]
            elif raw_vowel == "oa":
                comp[1] = add_mark_at(comp[1], 1, Mark.HORN)
            else:
                pos = max(raw_vowel.find(""), raw_vowel.find("o"))
                comp[1] = add_mark_at(comp[1], pos, Mark.HORN)
    if mark == Mark.NONE:
        if not raw_vowel == comp[1].lower():
            comp[1] = raw_vowel
        elif comp[0] and comp[0][-1] == "đ":
            comp[0] = comp[0][:-1] + "d"
    return comp
コード例 #2
0
ファイル: core.py プロジェクト: fuzzysource/bogo-python
def _reverse(components, trans):
    """
    Reverse the effect of transformation 'trans' on 'components'
    If the transformation does not affect the components, return the original
    string.
    """

    action, parameter = _get_action(trans)
    comps = list(components)
    string = utils.join(comps)

    if action == _Action.ADD_CHAR and string[-1].lower() == parameter.lower():
        if comps[2]:
            i = 2
        elif comps[1]:
            i = 1
        else:
            i = 0
        comps[i] = comps[i][:-1]
    elif action == _Action.ADD_ACCENT:
        comps = accent.add_accent(comps, Accent.NONE)
    elif action == _Action.ADD_MARK:
        if parameter == Mark.BAR:
            comps[0] = comps[0][:-1] + \
                mark.add_mark_char(comps[0][-1:], Mark.NONE)
        else:
            if mark.is_valid_mark(comps, trans):
                comps[1] = "".join([mark.add_mark_char(c, Mark.NONE)
                                    for c in comps[1]])
    return comps
コード例 #3
0
def _reverse(components, trans):
    """
    Reverse the effect of transformation 'trans' on 'components'
    If the transformation does not affect the components, return the original
    string.
    """

    action, parameter = _get_action(trans)
    comps = list(components)
    string = utils.join(comps)

    if action == _Action.ADD_CHAR and string[-1].lower() == parameter.lower():
        if comps[2]:
            i = 2
        elif comps[1]:
            i = 1
        else:
            i = 0
        comps[i] = comps[i][:-1]
    elif action == _Action.ADD_ACCENT:
        comps = accent.add_accent(comps, Accent.NONE)
    elif action == _Action.ADD_MARK:
        if parameter == Mark.BAR:
            comps[0] = comps[0][:-1] + \
                mark.add_mark_char(comps[0][-1:], Mark.NONE)
        else:
            if mark.is_valid_mark(comps, trans):
                comps[1] = "".join(
                    [mark.add_mark_char(c, Mark.NONE) for c in comps[1]])
    return comps
コード例 #4
0
ファイル: core.py プロジェクト: fuzzysource/bogo-python
def _can_undo(comps, trans_list):
    """
    Return whether a components can be undone with one of the transformation in
    trans_list.
    """
    comps = list(comps)
    accent_list = list(map(accent.get_accent_char, comps[1]))
    mark_list = list(map(mark.get_mark_char, utils.join(comps)))
    action_list = list(map(lambda x: _get_action(x), trans_list))

    def atomic_check(action):
        """
        Check if the `action` created one of the marks, accents, or characters
        in `comps`.
        """
        return (action[0] == _Action.ADD_ACCENT and action[1] in accent_list) \
                or (action[0] == _Action.ADD_MARK and action[1] in mark_list) \
                or (action[0] == _Action.ADD_CHAR and action[1] == \
                    accent.remove_accent_char(comps[1][-1]))  # ơ, ư

    return any(map(atomic_check, action_list))
コード例 #5
0
def _can_undo(comps, trans_list):
    """
    Return whether a components can be undone with one of the transformation in
    trans_list.
    """
    comps = list(comps)
    accent_list = list(map(accent.get_accent_char, comps[1]))
    mark_list = list(map(mark.get_mark_char, utils.join(comps)))
    action_list = list(map(lambda x: _get_action(x), trans_list))

    def atomic_check(action):
        """
        Check if the `action` created one of the marks, accents, or characters
        in `comps`.
        """
        return (action[0] == _Action.ADD_ACCENT and action[1] in accent_list) \
                or (action[0] == _Action.ADD_MARK and action[1] in mark_list) \
                or (action[0] == _Action.ADD_CHAR and action[1] == \
                    accent.remove_accent_char(comps[1][-1]))  # ơ, ư

    return any(map(atomic_check, action_list))
コード例 #6
0
ファイル: core.py プロジェクト: fuzzysource/bogo-python
def process_key(string, key,
                fallback_sequence="", rules=None,
                skip_non_vietnamese=True):
    """Process a keystroke.

    Args:
        string: The previously processed string or "".
        key: The keystroke.
        fallback_sequence: The previous keystrokes.
        rules (optional): A dictionary listing
            transformation rules. Defaults to get_telex_definition().
        skip_non_vietnamese (optional): Whether to skip results that
            doesn't seem like Vietnamese. Defaults to True.

    Returns a tuple. The first item of which is the processed
    Vietnamese string, the second item is the next fallback sequence.
    The two items are to be fed back into the next call of process_key()
    as `string` and `fallback_sequence`. If `skip_non_vietnamese` is
    True and the resulting string doesn't look like Vietnamese,
    both items contain the `fallback_sequence`.

    >>> process_key('a', 'a', 'a')
    (â, aa)

    Note that when a key is an undo key, it won't get appended to
    `fallback_sequence`.

    >>> process_key('â', 'a', 'aa')
    (aa, aa)

    `rules` is a dictionary that maps keystrokes to
    their effect string. The effects can be one of the following:

    'a^': a with circumflex (â), only affect an existing 'a family'
    'a+': a with breve (ă), only affect an existing 'a family'
    'e^': e with circumflex (ê), only affect an existing 'e family'
    'o^': o with circumflex (ô), only affect an existing 'o family'
    'o*': o with horn (ơ), only affect an existing 'o family'
    'd-': d with bar (đ), only affect an existing 'd'
    '/': acute (sắc), affect an existing vowel
    '\': grave (huyền), affect an existing vowel
    '?': hook (hỏi), affect an existing vowel
    '~': tilde (ngã), affect an existing vowel
    '.': dot (nặng), affect an existing vowel
    '<ư': append ư
    '<ơ': append ơ

    A keystroke entry can have multiple effects, in which case the
    dictionary entry's value should be a list of the possible
    effect strings. Although you should try to avoid this if
    you are defining a custom input method rule.
    """
    # TODO Figure out a way to remove the `string` argument. Perhaps only the
    #      key sequence is needed?
    def default_return():
        return string + key, fallback_sequence + key

    if rules is None:
        rules = get_telex_definition()

    comps = utils.separate(string)

    # if not _is_processable(comps):
    #     return default_return()

    # Find all possible transformations this keypress can generate
    trans_list = _get_transformation_list(
        key, rules, fallback_sequence)

    # Then apply them one by one
    new_comps = list(comps)
    for trans in trans_list:
        new_comps = _transform(new_comps, trans)

    if new_comps == comps:
        tmp = list(new_comps)

        # If none of the transformations (if any) work
        # then this keystroke is probably an undo key.
        if _can_undo(new_comps, trans_list):
            # The prefix "_" means undo.
            for trans in map(lambda x: "_" + x, trans_list):
                new_comps = _transform(new_comps, trans)

            # Undoing the w key with the TELEX input method with the
            # w:<ư extension requires some care.
            #
            # The input (ư, w) should be undone as w
            # on the other hand, (ư, uw) should return uw.
            #
            # _transform() is not aware of the 2 ways to generate
            # ư in TELEX and always think ư was created by uw.
            # Therefore, after calling _transform() to undo ư,
            # we always get ['', 'u', ''].
            #
            # So we have to clean it up a bit.
            def is_telex_like():
                return '<ư' in rules["w"]

            def undone_vowel_ends_with_u():
                return new_comps[1] and new_comps[1][-1].lower() == "u"

            def not_first_key_press():
                return len(fallback_sequence) >= 1

            def user_typed_ww():
                return (fallback_sequence[-1:]+key).lower() == "ww"

            def user_didnt_type_uww():
                return not (len(fallback_sequence) >= 2 and
                            fallback_sequence[-2].lower() == "u")

            if is_telex_like() and \
                    not_first_key_press() and \
                    undone_vowel_ends_with_u() and \
                    user_typed_ww() and \
                    user_didnt_type_uww():
                # The vowel part of new_comps is supposed to end with
                # u now. That u should be removed.
                new_comps[1] = new_comps[1][:-1]

        if tmp == new_comps:
            fallback_sequence += key
        new_comps = utils.append_comps(new_comps, key)
    else:
        fallback_sequence += key

    if skip_non_vietnamese is True and key.isalpha() and \
            not is_valid_combination(new_comps, final_form=False):
        result = fallback_sequence, fallback_sequence
    else:
        result = utils.join(new_comps), fallback_sequence

    return result
コード例 #7
0
def remove_mark_string(string):
    return utils.join([remove_mark_char(c) for c in string])
コード例 #8
0
ファイル: accent.py プロジェクト: BoGoEngine/bogo-python
def remove_accent_string(string):
    """
    Remove all accent from a whole string.
    """
    return utils.join([add_accent_char(c, Accent.NONE) for c in string])
コード例 #9
0
ファイル: accent.py プロジェクト: nguyenvulebinh/bogo-python
def remove_accent_string(string):
    """
    Remove all accent from a whole string.
    """
    return utils.join([add_accent_char(c, Accent.NONE) for c in string])
コード例 #10
0
def process_key(string,
                key,
                fallback_sequence="",
                rules=None,
                skip_non_vietnamese=True):
    """Process a keystroke.

    Args:
        string: The previously processed string or "".
        key: The keystroke.
        fallback_sequence: The previous keystrokes.
        rules (optional): A dictionary listing
            transformation rules. Defaults to get_telex_definition().
        skip_non_vietnamese (optional): Whether to skip results that
            doesn't seem like Vietnamese. Defaults to True.

    Returns a tuple. The first item of which is the processed
    Vietnamese string, the second item is the next fallback sequence.
    The two items are to be fed back into the next call of process_key()
    as `string` and `fallback_sequence`. If `skip_non_vietnamese` is
    True and the resulting string doesn't look like Vietnamese,
    both items contain the `fallback_sequence`.

    >>> process_key('a', 'a', 'a')
    (â, aa)

    Note that when a key is an undo key, it won't get appended to
    `fallback_sequence`.

    >>> process_key('â', 'a', 'aa')
    (aa, aa)

    `rules` is a dictionary that maps keystrokes to
    their effect string. The effects can be one of the following:

    'a^': a with circumflex (â), only affect an existing 'a family'
    'a+': a with breve (ă), only affect an existing 'a family'
    'e^': e with circumflex (ê), only affect an existing 'e family'
    'o^': o with circumflex (ô), only affect an existing 'o family'
    'o*': o with horn (ơ), only affect an existing 'o family'
    'd-': d with bar (đ), only affect an existing 'd'
    '/': acute (sắc), affect an existing vowel
    '\': grave (huyền), affect an existing vowel
    '?': hook (hỏi), affect an existing vowel
    '~': tilde (ngã), affect an existing vowel
    '.': dot (nặng), affect an existing vowel
    '<ư': append ư
    '<ơ': append ơ

    A keystroke entry can have multiple effects, in which case the
    dictionary entry's value should be a list of the possible
    effect strings. Although you should try to avoid this if
    you are defining a custom input method rule.
    """

    # TODO Figure out a way to remove the `string` argument. Perhaps only the
    #      key sequence is needed?
    def default_return():
        return string + key, fallback_sequence + key

    if rules is None:
        rules = get_telex_definition()

    comps = utils.separate(string)

    # if not _is_processable(comps):
    #     return default_return()

    # Find all possible transformations this keypress can generate
    trans_list = _get_transformation_list(key, rules, fallback_sequence)

    # Then apply them one by one
    new_comps = list(comps)
    for trans in trans_list:
        new_comps = _transform(new_comps, trans)

    if new_comps == comps:
        tmp = list(new_comps)

        # If none of the transformations (if any) work
        # then this keystroke is probably an undo key.
        if _can_undo(new_comps, trans_list):
            # The prefix "_" means undo.
            for trans in map(lambda x: "_" + x, trans_list):
                new_comps = _transform(new_comps, trans)

            # Undoing the w key with the TELEX input method with the
            # w:<ư extension requires some care.
            #
            # The input (ư, w) should be undone as w
            # on the other hand, (ư, uw) should return uw.
            #
            # _transform() is not aware of the 2 ways to generate
            # ư in TELEX and always think ư was created by uw.
            # Therefore, after calling _transform() to undo ư,
            # we always get ['', 'u', ''].
            #
            # So we have to clean it up a bit.
            def is_telex_like():
                return '<ư' in rules["w"]

            def undone_vowel_ends_with_u():
                return new_comps[1] and new_comps[1][-1].lower() == "u"

            def not_first_key_press():
                return len(fallback_sequence) >= 1

            def user_typed_ww():
                return (fallback_sequence[-1:] + key).lower() == "ww"

            def user_didnt_type_uww():
                return not (len(fallback_sequence) >= 2
                            and fallback_sequence[-2].lower() == "u")

            if is_telex_like() and \
                    not_first_key_press() and \
                    undone_vowel_ends_with_u() and \
                    user_typed_ww() and \
                    user_didnt_type_uww():
                # The vowel part of new_comps is supposed to end with
                # u now. That u should be removed.
                new_comps[1] = new_comps[1][:-1]

        if tmp == new_comps:
            fallback_sequence += key
        new_comps = utils.append_comps(new_comps, key)
    else:
        fallback_sequence += key

    if skip_non_vietnamese is True and key.isalpha() and \
            not is_valid_combination(new_comps, final_form=False):
        result = fallback_sequence, fallback_sequence
    else:
        result = utils.join(new_comps), fallback_sequence

    return result