def vocalism(self, str1, str2): """Check correct word-endings. All words should end with a/i/u, but a handful of exceptions: - me, to, so (possessive pronouns) - po (verb "putiri") - no - jo (personal pronoun) - se (yes) - nne (in) :param str1: the source string :param str2: the target (translated) string :return: True if there are no words with endings not in respect of vocalism (or if they appear in source string as well) """ exceptions = ["me", "to", "so", "po", "no", "jo", "se", "nne"] stopwords = [] for word in self.config.lang.words(str2): if word not in str1 and word.lower() not in exceptions: if word.lower().endswith(("e", "o")) and word.lower() not in stopwords: stopwords.append(word.lower()) if stopwords: raise FilterFailure("Please respect vocalism: %s" % (", ".join(stopwords))) return True
def italianisms(self, str1, str2): """Check if the translation contains common errors done by italophones. Mainly inspired by musttranslatewords(), but with a different logic: return True if the given word appears in the translation but not in the source (if it's in the source, then presumably it's being kept untranslated). :param str1: the source string :param str2: the target (translated) string :return: True if str2 doesn't contain an "italianism" """ str1 = self.removevariables(str1) str2 = self.removevariables(str2) errors = { "io": "ju/jeu/iu/...", "tantu": "assai", "menu": "cchiù picca", } # The above is full of strange quotes and things in utf-8 encoding. # single apostrophe perhaps problematic in words like "doesn't" for separator in self.config.punctuation: str1 = str1.replace(separator, u" ") str2 = str2.replace(separator, u" ") words1 = self.filteraccelerators(str1).split() words2 = self.filteraccelerators(str2).split() stopwords = ["%s (%s)" % (word, errors[word]) for word in words2 if word.lower() in errors.keys() and word not in words1] if stopwords: raise FilterFailure(u"Please translate: %s" % (u", ".join(stopwords))) return True
def niciun_nicio(self, str1, str2): """ Checks for sequences containing 'nici un'/'nici o' which are obsolete Romanian syntax. Correct is 'niciun'/'nicio' """ if contains_illegal(['nici un', 'nici o'], str2): raise FilterFailure("String contains 'nici un' or 'nici o'") return True
def cedillas(self, str1, str2): """Check if the translation contains an illegal cedilla character Cedillas are obsoleted diacritics for Romanian: - U+0162 Latin capital letter T with cedilla - U+0163 Latin small letter T with cedilla - U+015E Latin capital letter S with cedilla - U+015F Latin small letter S with cedilla Cedilla-letters are only valid for Turkish (S-cedilla) and Gagauz languages (S-cedilla and T-comma). Fun fact: Gagauz is the only known language to use T-cedilla. :param str1: the source string :param str2: the target (translated) string :return: True if str2 contains a cedilla character """ if contains_illegal(['Ţ', 'Ş', 'ţ', 'ş'], str2): raise FilterFailure("String contains illegal cedillas") return True
def suffixes(self, str1, str2): """Check for common word suffixes to be written correctly. :param str1: the source string :param str2: the target (translated) string :return: True if there are no common suffixes wrongly written """ suffixes = { "zzioni": "zziuni", } stopwords = [] for word in self.config.lang.words(str2): for suffix in suffixes.keys(): if word not in str1 and word.lower().endswith(suffix): stopwords.append("%s (-%s)" % (word, suffixes[suffix])) if stopwords: raise FilterFailure(u"Please use the correct word endings: %s" % (u", ".join(stopwords))) return True