def test_remove_invalid_ipa_characters_invalid_single(self): values = [ (None, None), (u"", ([], [])), (u"f", ([u"f"], [])), (u"foo", ([u"f", u"o", u"o"], [])), (u"\u0066\u02BCoo", ([u"\u0066", u"\u02BC", u"o", u"o"], [])), # single (\u0066 + \u02BC) (u"f\u031Aoo", ([u"f", u"\u031A", u"o", u"o"], [])), (u"f\u006e\u0361\u006doo", ([u"f", u"\u006e", u"\u0361", u"\u006d", u"o", u"o"], [])), # single (\u006e + \u0361 + \u006d) (u"L", ([], [u"L"])), (u"LfM", ([u"f"], [u"L", u"M"])), (u"fLoMo", ([u"f", u"o", u"o"], [u"L", u"M"])), (u"L\u0066\u02BCMoo", ([u"\u0066", u"\u02BC", u"o", u"o"], [u"L", u"M"])), (u"LfM\u02BCoo", ([u"f", u"\u02BC", u"o", u"o"], [u"L", u"M"])), (u"fL\u031AMoo", ([u"f", u"\u031A", u"o", u"o"], [u"L", u"M"])), (u"f\u006eL\u0361\u006doo", ([u"f", u"\u006e", u"\u0361", u"\u006d", u"o", u"o"], [u"L"])), ] for v, e in values: self.assertEqual( remove_invalid_ipa_characters(v, return_invalid=True, single_char_parsing=True), e)
def command_clean(string, vargs): """ Remove characters that are not IPA valid from the given string, and print the remaining string. :param str string: the string to act upon :param dict vargs: the command line arguments """ valid_chars, invalid_chars = remove_invalid_ipa_characters( unicode_string=string, return_invalid=True, single_char_parsing=vargs["single_char_parsing"]) print(u"".join(valid_chars)) print_invalid_chars(invalid_chars, vargs)
def command_clean(string, vargs): """ Remove characters that are not IPA valid from the given string, and print the remaining string. :param str string: the string to act upon :param dict vargs: the command line arguments """ valid_chars, invalid_chars = remove_invalid_ipa_characters( unicode_string=string, return_invalid=True, single_char_parsing=vargs["single_char_parsing"] ) print(u"".join(valid_chars)) print_invalid_chars(invalid_chars, vargs)
def __init__(self, ipa_chars=None, unicode_string=None, ignore=False, single_char_parsing=False): self.ipa_chars = [] if ipa_chars is not None: self.ipa_chars = ipa_chars elif unicode_string is not None: if not is_unicode_string(unicode_string): raise ValueError("The given string is not a Unicode string.") if (not ignore) and (not is_valid_ipa(unicode_string)): raise ValueError("The given string contains characters not IPA valid. Use the 'ignore' option to ignore them.") substrings = remove_invalid_ipa_characters( unicode_string=unicode_string, return_invalid=False, single_char_parsing=single_char_parsing ) self.ipa_chars = [UNICODE_TO_IPA[substring] for substring in substrings]
def command_check(string, vargs): """ Check if the given string is IPA valid. If the given string is not IPA valid, print the invalid characters. :param str string: the string to act upon :param dict vargs: the command line arguments """ is_valid = is_valid_ipa(string) print(is_valid) if not is_valid: valid_chars, invalid_chars = remove_invalid_ipa_characters( unicode_string=string, return_invalid=True) print_invalid_chars(invalid_chars, vargs)
def command_check(string, vargs): """ Check if the given string is IPA valid. If the given string is not IPA valid, print the invalid characters. :param str string: the string to act upon :param dict vargs: the command line arguments """ is_valid = is_valid_ipa(string) print(is_valid) if not is_valid: valid_chars, invalid_chars = remove_invalid_ipa_characters( unicode_string=string, return_invalid=True ) print_invalid_chars(invalid_chars, vargs)
def test_remove_invalid_ipa_characters_invalid_single(self): values = [ (None, None), (u"", ([], [])), (u"f", ([u"f"], [])), (u"foo", ([u"f", u"o", u"o"], [])), (u"\u0066\u02BCoo", ([u"\u0066", u"\u02BC", u"o", u"o"], [])), # single (\u0066 + \u02BC) (u"f\u031Aoo", ([u"f", u"\u031A", u"o", u"o"], [])), (u"f\u006e\u0361\u006doo", ([u"f", u"\u006e", u"\u0361", u"\u006d", u"o", u"o"], [])), # single (\u006e + \u0361 + \u006d) (u"L", ([], [u"L"])), (u"LfM", ([u"f"], [u"L", u"M"])), (u"fLoMo", ([u"f", u"o", u"o"], [u"L", u"M"])), (u"L\u0066\u02BCMoo", ([u"\u0066", u"\u02BC", u"o", u"o"], [u"L", u"M"])), (u"LfM\u02BCoo", ([u"f", u"\u02BC", u"o", u"o"], [u"L", u"M"])), (u"fL\u031AMoo", ([u"f", u"\u031A", u"o", u"o"], [u"L", u"M"])), (u"f\u006eL\u0361\u006doo", ([u"f", u"\u006e", u"\u0361", u"\u006d", u"o", u"o"], [u"L"])), ] for v, e in values: self.assertEqual(remove_invalid_ipa_characters(v, return_invalid=True, single_char_parsing=True), e)