def stem(self, word): """ Stem a Norwegian word and return the stemmed form. :param word: The word that is stemmed. :type word: str or unicode :return: The stemmed form. :rtype: unicode """ word = word.lower() if word in self.stopwords: return word r1 = self._r1_scandinavian(word, self.__vowels) # STEP 1 for suffix in self.__step1_suffixes: if r1.endswith(suffix): if suffix in ("erte", "ert"): word = suffix_replace(word, suffix, "er") r1 = suffix_replace(r1, suffix, "er") elif suffix == "s": if word[-2] in self.__s_ending or ( word[-2] == "k" and word[-3] not in self.__vowels ): word = word[:-1] r1 = r1[:-1] else: word = word[: -len(suffix)] r1 = r1[: -len(suffix)] break # STEP 2 for suffix in self.__step2_suffixes: if r1.endswith(suffix): word = word[:-1] r1 = r1[:-1] break # STEP 3 for suffix in self.__step3_suffixes: if r1.endswith(suffix): word = word[: -len(suffix)] break return word
def normalize(self, token): # strip diacritics token = self.__vocalization.sub('', token) #strip kasheeda token = self.__kasheeda.sub('', token) # strip punctuation marks token = self.__arabic_punctuation_marks.sub('', token) # normalize last hamza for hamza in self.__last_hamzat: if token.endswith(hamza): token = suffix_replace(token, hamza, '\u0621') break # normalize other hamzat token = self.__initial_hamzat.sub('\u0627', token) token = self.__waw_hamza.sub('\u0648', token) token = self.__yeh_hamza.sub('\u064a', token) token = self.__alefat.sub('\u0627', token) return token
def stem(self, word): """ Stem a Romanian word and return the stemmed form. :param word: The word that is stemmed. :type word: str or unicode :return: The stemmed form. :rtype: unicode """ # word = word.lower() # nltk.download('stopwords') # print (self.__step0_suffixes) # print (self.__step1_suffixes) # print (self.__step2_suffixes) # print (self.__step3_suffixes) punctuations = "" result = "" while word: if word[-1] in special: punctuations = word[-1] + punctuations word = word[:-1] else: break if word.lower() in list(stopwords.words('romanian')): return word + " " + punctuations + " " step1_success = False step2_success = False for i in range(1, len(word)-1): if word[i-1] in self.__vowels and word[i+1] in self.__vowels: if word[i] == "u": word = "".join((word[:i], "U", word[i+1:])) elif word[i] == "i": word = "".join((word[:i], "I", word[i+1:])) r1, r2 = self._r1r2_standard(word, self.__vowels) rv = self._rv_standard(word, self.__vowels) # STEP 0: Removal of plurals and other simplifications for suffix in self.__step0_suffixes: if word.endswith(suffix): if suffix in r1: if suffix in ("ul", "ului"): result = tok_s + suffix + " " + result word = word[:-len(suffix)] if suffix in rv: rv = rv[:-len(suffix)] else: rv = "" elif (suffix == "aua" or suffix == "atei" or (suffix == "ile" and word[-5:-3] != "ab")): result = tok_s + suffix[-2:] + " " + result word = word[:-2] elif suffix in ("ea", "ele", "elor"): result = tok_s + suffix[1:] + " " + result word = word[:-len(suffix)+1] if suffix in rv: rv = suffix_replace(rv, suffix, "e") else: rv = "" elif suffix in ("ii", "iua", "iei", "iile", "iilor", "ilor"): result = tok_s + suffix[1:] + " " + result word = word[:-len(suffix)+1] if suffix in rv: rv = suffix_replace(rv, suffix, "i") else: rv = "" elif suffix in ("a\u0163ie", "a\u0163ia"): result = tok_s + suffix + " " + result word = word[:-len(suffix)+1] break # STEP 1: Reduction of combining suffixes while True: replacement_done = False for suffix in self.__step1_suffixes: if word.endswith(suffix): if suffix in r1: step1_success = True replacement_done = True if suffix in ("abilitate", "abilitati", "abilit\u0103i", "abilit\u0103\u0163i"): result = tok_s + suffix[4:] + " " + result word = word[:-len(suffix)+4] elif suffix == "ibilitate": result = tok_s + suffix[4:] + " " + result word = word[:-len(suffix)+4] elif suffix in ("ivitate", "ivitati", "ivit\u0103i", "ivit\u0103\u0163i"): result = tok_s + suffix[2:] + " " + result word = word[:-len(suffix)+2] elif suffix in ("icitate", "icitati", "icit\u0103i", "icit\u0103\u0163i", "icator", "icatori", "iciv", "iciva", "icive", "icivi", "iciv\u0103", "ical", "icala", "icale", "icali", "ical\u0103"): result = tok_s + suffix[2:] + " " + result word = word[:-len(suffix)+2] elif suffix in ("ativ", "ativa", "ative", "ativi", "ativ\u0103", "a\u0163iune", "atoare", "ator", "atori", "\u0103toare", "\u0103tor", "\u0103tori"): result = tok_s + suffix[2:] + " " + result word = word[:-len(suffix)+2] if suffix in r2: r2 = suffix_replace(r2, suffix, "at") elif suffix in ("itiv", "itiva", "itive", "itivi", "itiv\u0103", "i\u0163iune", "itoare", "itor", "itori"): result = tok_s + suffix[2:] + " " + result word = word[:-len(suffix)+2] if suffix in r2: r2 = suffix_replace(r2, suffix, "it") else: step1_success = False break if not replacement_done: break # STEP 2: Removal of standard suffixes for suffix in self.__step2_suffixes: if word.endswith(suffix): if suffix in r2: step2_success = True # if suffix in ("iune", "iuni"): # if word[-5] == "\u0163": # word = "".join((word[:-5], "t")) if suffix in ("ism", "isme", "ist", "ista", "iste", "isti", "ist\u0103", "i\u015Fti"): result = tok_s + suffix[3:] + " " + result word = word[:-len(suffix)+3] else: result = tok_s + suffix + " " + result word = word[:-len(suffix)] break # STEP 3: Removal of verb suffixes if not step1_success and not step2_success: for suffix in self.__step3_suffixes: if word.endswith(suffix): if suffix in rv: if suffix in ('seser\u0103\u0163i', 'seser\u0103m', 'ser\u0103\u0163i', 'sese\u015Fi', 'seser\u0103', 'ser\u0103m', 'sesem', 'se\u015Fi', 'ser\u0103', 'sese', 'a\u0163i', 'e\u0163i', 'i\u0163i', '\xE2\u0163i', 'sei', '\u0103m', 'em', 'im', '\xE2m', 'se'): result = tok_s + suffix + " " + result word = word[:-len(suffix)] rv = rv[:-len(suffix)] else: if (not rv.startswith(suffix) and rv[rv.index(suffix)-1] not in "aeio\u0103\xE2\xEE"): result = tok_s + suffix + " " + result word = word[:-len(suffix)] break # STEP 4: Removal of final vowel for suffix in ("ie", "a", "e", "i", "\u0103"): if word.endswith(suffix): if suffix in rv: result = tok_s + suffix + " " + result word = word[:-len(suffix)] break word = word.replace("I", "i").replace("U", "u") return (word + " " + result).rstrip(" ") + " " + punctuations + " "
def stem(self, word): """ Stem a Dutch word and return the stemmed form. :param word: The word that is stemmed. :type word: str or unicode :return: The stemmed form. :rtype: unicode """ punctuations = "" result = "" while word: if word[-1] in special: punctuations = word[-1] + punctuations word = word[:-1] else: break heden = False # word = word.lower() if word.lower() in list(stopwords.words('dutch')): return word + " " + punctuations + " " step2_success = False # Vowel accents are removed. # word = (word.replace("\xE4", "a").replace("\xE1", "a") # .replace("\xEB", "e").replace("\xE9", "e") # .replace("\xED", "i").replace("\xEF", "i") # .replace("\xF6", "o").replace("\xF3", "o") # .replace("\xFC", "u").replace("\xFA", "u")) # An initial 'y', a 'y' after a vowel, # and an 'i' between self.__vowels is put into upper case. # As from now these are treated as consonants. if word.startswith("y"): word = "".join(("Y", word[1:])) for i in range(1, len(word)): if word[i-1] in self.__vowels and word[i] == "y": word = "".join((word[:i], "Y", word[i+1:])) for i in range(1, len(word)-1): if (word[i-1] in self.__vowels and word[i] == "i" and word[i+1] in self.__vowels): word = "".join((word[:i], "I", word[i+1:])) r1, r2 = self._r1r2_standard(word, self.__vowels) # R1 is adjusted so that the region before it # contains at least 3 letters. for i in range(1, len(word)): if word[i] not in self.__vowels and word[i-1] in self.__vowels: if len(word[:i+1]) < 3 and len(word[:i+1]) > 0: r1 = word[3:] elif len(word[:i+1]) == 0: return word + " " + punctuations + " " break # STEP 1 for suffix in self.__step1_suffixes: if r1.endswith(suffix): if suffix == "heden": word = suffix_replace(word, suffix, "heid") r1 = suffix_replace(r1, suffix, "heid") heden = True if r2.endswith("heden"): r2 = suffix_replace(r2, suffix, "heid") elif (suffix in ("ene", "en") and not word.endswith("heden") and word[-len(suffix)-1] not in self.__vowels and word[-len(suffix)-3:-len(suffix)] != "gem"): result = tok_s + suffix + " " + result word = word[:-len(suffix)] r1 = r1[:-len(suffix)] r2 = r2[:-len(suffix)] if word.endswith(("kk", "dd", "tt")): result = tok_s + word[-1] + " " + result word = word[:-1] r1 = r1[:-1] r2 = r2[:-1] elif (suffix in ("se", "s") and word[-len(suffix)-1] not in self.__vowels and word[-len(suffix)-1] != "j"): result = tok_s + suffix + " " + result word = word[:-len(suffix)] r1 = r1[:-len(suffix)] r2 = r2[:-len(suffix)] break # STEP 2 if r1.endswith("e") and word[-2] not in self.__vowels: step2_success = True result = tok_s + word[-1] + " " + result word = word[:-1] r1 = r1[:-1] r2 = r2[:-1] if word.endswith(("kk", "dd", "tt")): result = tok_s + word[-1] + " " + result word = word[:-1] r1 = r1[:-1] r2 = r2[:-1] # STEP 3a if r2.endswith("heid") and word[-5] != "c": if heden : result = tok_s + "heden " + result else: result = tok_s + "heid " + result word = word[:-4] r1 = r1[:-4] r2 = r2[:-4] if (r1.endswith("en") and word[-3] not in self.__vowels and word[-5:-2] != "gem"): result = tok_s + "en " + result word = word[:-2] r1 = r1[:-2] r2 = r2[:-2] if word.endswith(("kk", "dd", "tt")): result = tok_s + word[-1] + " " + result word = word[:-1] r1 = r1[:-1] r2 = r2[:-1] # STEP 3b: Derivational suffixes for suffix in self.__step3b_suffixes: if r2.endswith(suffix): if suffix in ("end", "ing"): result = tok_s + suffix + " " + result word = word[:-3] r2 = r2[:-3] if r2.endswith("ig") and word[-3] != "e": result = tok_s + suffix + " " + result word = word[:-2] else: if word.endswith(("kk", "dd", "tt")): result = tok_s + word[-1] + " " + result word = word[:-1] elif suffix == "ig" and word[-3] != "e": result = tok_s + suffix + " " + result word = word[:-2] elif suffix == "lijk": result = tok_s + suffix + " " + result word = word[:-4] r1 = r1[:-4] if r1.endswith("e") and word[-2] not in self.__vowels: result = tok_s + word[-1] + " " + result word = word[:-1] if word.endswith(("kk", "dd", "tt")): result = tok_s + word[-1] + " " + result word = word[:-1] elif suffix == "baar": result = tok_s + suffix + " " + result word = word[:-4] elif suffix == "bar" and step2_success: result = tok_s + suffix + " " + result word = word[:-3] break # # STEP 4: Undouble vowel # if len(word) >= 4: # if word[-1] not in self.__vowels and word[-1] != "I": # if word[-3:-1] in ("aa", "ee", "oo", "uu"): # if word[-4] not in self.__vowels: # word = "".join((word[:-3], word[-3], word[-1])) # All occurrences of 'I' and 'Y' are put back into lower case. word = word.replace("I", "i").replace("Y", "y") return (word + " " + result).rstrip(" ")+ " " + punctuations + " "
def stem(word, verbose=False): if not verbose: return word.lower() else: vowels = "aeiouy" # The vowels here includes y, we will deal with the case later. double_consonants = ("bb", "dd", "ff", "gg", "mm", "nn", "pp", "rr", "tt") li_ending = "cdeghkmnrt" step0_suffixes = ("'s'", "'s", "'") step1a_suffixes = ("sses", "ied", "ies", "us", "ss", "s") step1b_suffixes = ("eedly", "ingly", "edly", "eed", "ing", "ed") step2_suffixes = ('ization', 'ational', 'fulness', 'ousness', 'iveness', 'tional', 'biliti', 'lessli', 'entli', 'ation', 'alism', 'aliti', 'ousli', 'iviti', 'fulli', 'enci', 'anci', 'abli', 'izer', 'ator', 'alli', 'bli', 'ogi', 'li') step3_suffixes = ('ational', 'tional', 'alize', 'icate', 'iciti', 'ative', 'ical', 'ness', 'ful') step4_suffixes = ('ement', 'ance', 'ence', 'able', 'ible', 'ment', 'ant', 'ent', 'ism', 'ate', 'iti', 'ous', 'ive', 'ize', 'ion', 'al', 'er', 'ic') word = word.lower() if word in stopwords.words('english'): return word # remove starting ' if word.startswith("\x27"): word = word[1:] """ Special cases with Y's: 3 cases Y considered as a vowel, according to Merriam-Webster If Y is at the end of a words, we consider this Y as a vowel. e.g.: candy, deny If the word has no other vowels than Y, Y is considered as a vowel. e.g. gym If Y is in the middle of a syllable. e.g. system, borborygmus Thus, we will find the non-vowel y's, and replace them with Y as distinguish. """ # We need to find the Y's, since Y is a special. # If a word starts with a y, it is not considered as a vowel. # Find starting Y if word.startswith("y"): word = "".join(("Y", word[1:])) # Find vowel + y # If any y follows a vowel, that Y is not considered as a vowel. for i in range(1, len(word)): if word[i - 1] in vowels and word[i] == "y": word = "".join((word[:i], "Y", word[i + 1:])) step1a_vowel_found = False step1b_vowel_found = False r1 = "" r2 = "" # R1 is the region after the first non-vowel following a vowel, # or is the null region at the end of the word if there is no # such non-vowel. # # R2 is the region after the first non-vowel following a vowel # in R1, or is the null region at the end of the word if there # is no such non-vowel. if word.startswith(("gener", "commun", "arsen")): if word.startswith(("gener", "arsen")): r1 = word[5:] else: r1 = word[6:] for i in range(1, len(r1)): if r1[i] not in vowels and r1[i - 1] in vowels: r2 = r1[i + 1:] break else: for i in range(1, len(word)): if word[i] not in vowels and word[i - 1] in vowels: r1 = word[i + 1:] break for i in range(1, len(r1)): if r1[i] not in vowels and r1[i - 1] in vowels: r2 = r1[i + 1:] break # Step 0 # Remove the suffixes 's, s', ' # The single -s suffix and possessives for suffix in step0_suffixes: if word.endswith(suffix): word = word[:-len(suffix)] r1 = r1[:-len(suffix)] r2 = r2[:-len(suffix)] break # Step 1a # Deal with "regular" suffix, such as ied, ies, sses for suffix in step1a_suffixes: if word.endswith(suffix): if suffix == 'sses': word = word[:-2] r1 = r1[:-2] w2 = r2[:-2] elif suffix in ("ied", "ies"): # For regular words, we remove the last 2 letter if len(word[:-len(suffix)]) > 1: word = word[:-2] r1 = r1[:-2] r2 = r2[:-2] else: # For short words, like pies, we only remove s. word = word[:-1] r1 = r1[:-1] r2 = r2[:-1] # When suffix of this word is just s, we remove the last letter elif suffix == "s": word = word[:-1] r1 = r1[:-1] r2 = r2[:-1] break # Step 1b for suffix in step1b_suffixes: if word.endswith(suffix): if suffix in ("eed", "eedly"): if r1.endswith(suffix): word = word[:-len(suffix)] + "ee" # word = suffix_replace(word, suffix, "ee") if len(r1) >= len(suffix): r1 = r1[:-len(suffix)] + 'ee' else: r1 = "" if len(r2) >= len(suffix): r1 = r2[:-len(suffix)] + 'ee' else: r2 = "" else: # For ed, edly+, ing, ingly part. for letter in word[:-len(suffix)]: if letter in vowels: step1b_vowel_found = True break # If such suffix are found, we delete the the suffix. if step1b_vowel_found: word = word[:-len(suffix)] r1 = r1[:-len(suffix)] r2 = r2[:-len(suffix)] # After deletion # If end with at, bl, iz, we add a e, and make to ate, ble, ize. if word.endswith(("at", "bl", "iz")): word = "".join((word, "e")) r1 = "".join((r1, "e")) if len(word) > 5 or len(r1) >= 3: r2 = "".join((r2, "e")) # If end with double consonants, we delete one consonant elif word.endswith(double_consonants): word = word[:-1] r1 = r1[:-1] r2 = r2[:-1] # If the word is short we add e elif (r1 == "" and len(word) >= 3 and word[-1] not in vowels and word[-1] not in "wxY" and word[-2] in vowels and word[-3] not in vowels) or ( r1 == "" and len(word) == 2 and word[0] in vowels and word[1] not in vowels): word = "".join((word, "e")) if len(r1) > 0: r1 = "".join((r1, "e")) if len(r2) > 0: r2 = "".join((r2, "e")) break # STEP 1c # If word now ends with Y or y, we replace y with i. if len(word) > 2 and word[-1] in "yY" and word[-2] not in vowels: word = "".join((word[:-1], "i")) if len(r1) >= 1: r1 = "".join((r1[:-1], "i")) else: r1 = "" if len(r2) >= 1: r2 = "".join((r2[:-1], "i")) else: r2 = "" # Step 2 # In step 2, we go through each of the suffix, and replace them with disired ending. # These suffix are for suffix in step2_suffixes: if word.endswith(suffix): if r1.endswith(suffix): if suffix == "tional": word = word[:-2] r1 = r1[:-2] r2 = r2[:-2] elif suffix in ("enci", "anci", "abli"): word = "".join((word[:-1], "e")) if len(r1) >= 1: r1 = "".join((r1[:-1], "e")) else: r1 = "" if len(r2) >= 1: r2 = "".join((r2[:-1], "e")) else: r2 = "" elif suffix == "entli": word = word[:-2] r1 = r1[:-2] r2 = r2[:-2] elif suffix in ("izer", "ization"): word = word[:-len(suffix)] + 'ize' if len(r1) >= len(suffix): r1 = r1[:-len(suffix)] + 'ize' else: r1 = "" if len(r2) >= len(suffix): r2 = r2[:-len(suffix)] + 'ize' else: r2 = "" elif suffix in ("ational", "ation", "ator"): word = word[:-len(suffix)] + 'ate' if len(r1) >= len(suffix): r1 = r1[:-len(suffix)] + 'ate' else: r1 = "" if len(r2) >= len(suffix): r2 = r2[:-len(suffix)] + 'ate' else: r2 = "e" elif suffix in ("alism", "aliti", "alli"): word = suffix_replace(word, suffix, "al") if len(r1) >= len(suffix): r1 = suffix_replace(r1, suffix, "al") else: r1 = "" if len(r2) >= len(suffix): r2 = suffix_replace(r2, suffix, "al") else: r2 = "" elif suffix == "fulness": word = word[:-4] r1 = r1[:-4] r2 = r2[:-4] elif suffix in ("ousli", "ousness"): word = suffix_replace(word, suffix, "ous") if len(r1) >= len(suffix): r1 = suffix_replace(r1, suffix, "ous") else: r1 = "" if len(r2) >= len(suffix): r2 = suffix_replace(r2, suffix, "ous") else: r2 = "" elif suffix in ("iveness", "iviti"): word = suffix_replace(word, suffix, "ive") if len(r1) >= len(suffix): r1 = suffix_replace(r1, suffix, "ive") else: r1 = "" if len(r2) >= len(suffix): r2 = suffix_replace(r2, suffix, "ive") else: r2 = "e" elif suffix in ("biliti", "bli"): word = suffix_replace(word, suffix, "ble") if len(r1) >= len(suffix): r1 = suffix_replace(r1, suffix, "ble") else: r1 = "" if len(r2) >= len(suffix): r2 = suffix_replace(r2, suffix, "ble") else: r2 = "" elif suffix == "ogi" and word[-4] == "l": word = word[:-1] r1 = r1[:-1] r2 = r2[:-1] elif suffix in ("fulli", "lessli"): word = word[:-2] r1 = r1[:-2] r2 = r2[:-2] elif suffix == "li" and word[-3] in li_ending: word = word[:-2] r1 = r1[:-2] r2 = r2[:-2] break # Step 3 for suffix in step3_suffixes: if word.endswith(suffix): if r1.endswith(suffix): if suffix == "tional": word = word[:-2] r1 = r1[:-2] r2 = r2[:-2] elif suffix == "ational": word = suffix_replace(word, suffix, "ate") if len(r1) >= len(suffix): r1 = suffix_replace(r1, suffix, "ate") else: r1 = "" if len(r2) >= len(suffix): r2 = suffix_replace(r2, suffix, "ate") else: r2 = "" elif suffix == "alize": word = word[:-3] r1 = r1[:-3] r2 = r2[:-3] elif suffix in ("icate", "iciti", "ical"): word = suffix_replace(word, suffix, "ic") if len(r1) >= len(suffix): r1 = suffix_replace(r1, suffix, "ic") else: r1 = "" if len(r2) >= len(suffix): r2 = suffix_replace(r2, suffix, "ic") else: r2 = "" elif suffix in ("ful", "ness"): word = word[:-len(suffix)] r1 = r1[:-len(suffix)] r2 = r2[:-len(suffix)] elif suffix == "ative" and r2.endswith(suffix): word = word[:-5] r1 = r1[:-5] r2 = r2[:-5] break # Step 4 for suffix in step4_suffixes: if word.endswith(suffix): if r2.endswith(suffix): if suffix == "ion": if word[-4] in "st": word = word[:-3] r1 = r1[:-3] r2 = r2[:-3] else: word = word[:-len(suffix)] r1 = r1[:-len(suffix)] r2 = r2[:-len(suffix)] break # Step 5 if r2.endswith("l") and word[-2] == "l": word = word[:-1] elif r2.endswith("e"): word = word[:-1] elif r1.endswith("e"): if len(word) >= 4 and (word[-2] in vowels or word[-2] in "wxY" or word[-3] not in vowels or word[-4] in vowels): word = word[:-1] word = word.replace("Y", "y") return word
def stem(self, word): """ Stem a Spanish word and return the stemmed form. :param word: The word that is stemmed. :type word: str or unicode :return: The stemmed form. :rtype: unicode """ word = word.lower() if word in self.stopwords: return word step1_success = False r1, r2 = self._r1r2_standard(word, self.__vowels) rv = self._rv_standard(word, self.__vowels) # STEP 0: Attached pronoun for suffix in self.__step0_suffixes: if not (word.endswith(suffix) and rv.endswith(suffix)): continue if (rv[:-len(suffix)].endswith(( "ando", "ar", "er", "iendo", "ir", ))) or (rv[:-len(suffix)].endswith("yendo") and word[:-len(suffix)].endswith("uyendo")): word = self.__replace_accented(word[:-len(suffix)]) r1 = self.__replace_accented(r1[:-len(suffix)]) r2 = self.__replace_accented(r2[:-len(suffix)]) rv = self.__replace_accented(rv[:-len(suffix)]) break # STEP 1: Standard suffix removal for suffix in self.__step1_suffixes: if not word.endswith(suffix): continue if suffix == "amente" and r1.endswith(suffix): step1_success = True word = word[:-6] r2 = r2[:-6] rv = rv[:-6] if r2.endswith("iv"): word = word[:-2] r2 = r2[:-2] rv = rv[:-2] if r2.endswith("at"): word = word[:-2] rv = rv[:-2] elif r2.endswith(("os", "ic", "ad")): word = word[:-2] rv = rv[:-2] elif r2.endswith(suffix): step1_success = True if suffix in ( "adora", "ador", "acion", "adoras", "adores", "aciones", "ante", "antes", "ancia", "ancias", ): word = word[:-len(suffix)] r2 = r2[:-len(suffix)] rv = rv[:-len(suffix)] if r2.endswith("ic"): word = word[:-2] rv = rv[:-2] elif suffix in ("logia", "logias"): word = suffix_replace(word, suffix, "log") rv = suffix_replace(rv, suffix, "log") elif suffix in ("ucion", "uciones"): word = suffix_replace(word, suffix, "u") rv = suffix_replace(rv, suffix, "u") elif suffix in ("encia", "encias"): word = suffix_replace(word, suffix, "ente") rv = suffix_replace(rv, suffix, "ente") elif suffix == "mente": word = word[:-len(suffix)] r2 = r2[:-len(suffix)] rv = rv[:-len(suffix)] if r2.endswith(("ante", "able", "ible")): word = word[:-4] rv = rv[:-4] elif suffix in ("idad", "idades"): word = word[:-len(suffix)] r2 = r2[:-len(suffix)] rv = rv[:-len(suffix)] for pre_suff in ("abil", "ic", "iv"): if r2.endswith(pre_suff): word = word[:-len(pre_suff)] rv = rv[:-len(pre_suff)] elif suffix in ("ivo", "iva", "ivos", "ivas"): word = word[:-len(suffix)] r2 = r2[:-len(suffix)] rv = rv[:-len(suffix)] if r2.endswith("at"): word = word[:-2] rv = rv[:-2] else: word = word[:-len(suffix)] rv = rv[:-len(suffix)] break # STEP 2a: Verb suffixes beginning 'y' if not step1_success: for suffix in self.__step2a_suffixes: if rv.endswith(suffix) and word[-len(suffix) - 1:-len(suffix)] == "u": word = word[:-len(suffix)] rv = rv[:-len(suffix)] break # STEP 2b: Other verb suffixes for suffix in self.__step2b_suffixes: if rv.endswith(suffix): word = word[:-len(suffix)] rv = rv[:-len(suffix)] if suffix in ("en", "es", "eis", "emos"): if word.endswith("gu"): word = word[:-1] if rv.endswith("gu"): rv = rv[:-1] break # STEP 3: Residual suffix for suffix in self.__step3_suffixes: if rv.endswith(suffix): word = word[:-len(suffix)] if suffix in ("e", "\xE9"): rv = rv[:-len(suffix)] if word[-2:] == "gu" and rv.endswith("u"): word = word[:-1] break word = self.__replace_accented(word) return word