Example #1
0
 def test_remove_invalid_ipa_characters_invalid_single(self):
     values = [
         (None, None),
         (u"", ([], [])),
         (u"f", ([u"f"], [])),
         (u"foo", ([u"f", u"o", u"o"], [])),
         (u"\u0066\u02BCoo", ([u"\u0066", u"\u02BC", u"o",
                               u"o"], [])),  # single (\u0066 + \u02BC)
         (u"f\u031Aoo", ([u"f", u"\u031A", u"o", u"o"], [])),
         (u"f\u006e\u0361\u006doo",
          ([u"f", u"\u006e", u"\u0361", u"\u006d", u"o",
            u"o"], [])),  # single (\u006e + \u0361 + \u006d)
         (u"L", ([], [u"L"])),
         (u"LfM", ([u"f"], [u"L", u"M"])),
         (u"fLoMo", ([u"f", u"o", u"o"], [u"L", u"M"])),
         (u"L\u0066\u02BCMoo", ([u"\u0066", u"\u02BC", u"o",
                                 u"o"], [u"L", u"M"])),
         (u"LfM\u02BCoo", ([u"f", u"\u02BC", u"o", u"o"], [u"L", u"M"])),
         (u"fL\u031AMoo", ([u"f", u"\u031A", u"o", u"o"], [u"L", u"M"])),
         (u"f\u006eL\u0361\u006doo",
          ([u"f", u"\u006e", u"\u0361", u"\u006d", u"o", u"o"], [u"L"])),
     ]
     for v, e in values:
         self.assertEqual(
             remove_invalid_ipa_characters(v,
                                           return_invalid=True,
                                           single_char_parsing=True), e)
Example #2
0
def command_clean(string, vargs):
    """
    Remove characters that are not IPA valid from the given string,
    and print the remaining string.

    :param str string: the string to act upon
    :param dict vargs: the command line arguments
    """
    valid_chars, invalid_chars = remove_invalid_ipa_characters(
        unicode_string=string,
        return_invalid=True,
        single_char_parsing=vargs["single_char_parsing"])
    print(u"".join(valid_chars))
    print_invalid_chars(invalid_chars, vargs)
Example #3
0
def command_clean(string, vargs):
    """
    Remove characters that are not IPA valid from the given string,
    and print the remaining string.

    :param str string: the string to act upon
    :param dict vargs: the command line arguments
    """
    valid_chars, invalid_chars = remove_invalid_ipa_characters(
        unicode_string=string,
        return_invalid=True,
        single_char_parsing=vargs["single_char_parsing"]
    )
    print(u"".join(valid_chars))
    print_invalid_chars(invalid_chars, vargs)
Example #4
0
 def __init__(self, ipa_chars=None, unicode_string=None, ignore=False, single_char_parsing=False):
     self.ipa_chars = []
     if ipa_chars is not None:
         self.ipa_chars = ipa_chars
     elif unicode_string is not None:
         if not is_unicode_string(unicode_string):
             raise ValueError("The given string is not a Unicode string.")
         if (not ignore) and (not is_valid_ipa(unicode_string)):
             raise ValueError("The given string contains characters not IPA valid. Use the 'ignore' option to ignore them.")
         substrings = remove_invalid_ipa_characters(
             unicode_string=unicode_string,
             return_invalid=False,
             single_char_parsing=single_char_parsing
         )
         self.ipa_chars = [UNICODE_TO_IPA[substring] for substring in substrings]
Example #5
0
def command_check(string, vargs):
    """
    Check if the given string is IPA valid.

    If the given string is not IPA valid,
    print the invalid characters.

    :param str string: the string to act upon
    :param dict vargs: the command line arguments
    """
    is_valid = is_valid_ipa(string)
    print(is_valid)
    if not is_valid:
        valid_chars, invalid_chars = remove_invalid_ipa_characters(
            unicode_string=string, return_invalid=True)
        print_invalid_chars(invalid_chars, vargs)
Example #6
0
def command_check(string, vargs):
    """
    Check if the given string is IPA valid.

    If the given string is not IPA valid,
    print the invalid characters.

    :param str string: the string to act upon
    :param dict vargs: the command line arguments
    """
    is_valid = is_valid_ipa(string)
    print(is_valid)
    if not is_valid:
        valid_chars, invalid_chars = remove_invalid_ipa_characters(
            unicode_string=string,
            return_invalid=True
        )
        print_invalid_chars(invalid_chars, vargs)
Example #7
0
 def test_remove_invalid_ipa_characters_invalid_single(self):
     values = [
         (None, None),
         (u"", ([], [])),
         (u"f", ([u"f"], [])),
         (u"foo", ([u"f", u"o", u"o"], [])),
         (u"\u0066\u02BCoo", ([u"\u0066", u"\u02BC", u"o", u"o"], [])),     # single (\u0066 + \u02BC)
         (u"f\u031Aoo", ([u"f", u"\u031A", u"o", u"o"], [])),
         (u"f\u006e\u0361\u006doo", ([u"f", u"\u006e", u"\u0361", u"\u006d", u"o", u"o"], [])), # single (\u006e + \u0361 + \u006d)
         (u"L", ([], [u"L"])),
         (u"LfM", ([u"f"], [u"L", u"M"])),
         (u"fLoMo", ([u"f", u"o", u"o"], [u"L", u"M"])),
         (u"L\u0066\u02BCMoo", ([u"\u0066", u"\u02BC", u"o", u"o"], [u"L", u"M"])),
         (u"LfM\u02BCoo", ([u"f", u"\u02BC", u"o", u"o"], [u"L", u"M"])),
         (u"fL\u031AMoo", ([u"f", u"\u031A", u"o", u"o"], [u"L", u"M"])),
         (u"f\u006eL\u0361\u006doo", ([u"f", u"\u006e", u"\u0361", u"\u006d", u"o", u"o"], [u"L"])),
     ]
     for v, e in values:
         self.assertEqual(remove_invalid_ipa_characters(v, return_invalid=True, single_char_parsing=True), e)