def postprocess(word_as_list, fst): if fst.postprocess_req: if not fst.left_subseq: word_as_list = word_as_list[::-1] if fst.name == "Igbo ATR harmony": post_fst = FST(vh_dataset['22B']) output = post_fst.step(word_as_list) word_as_list = output[::-1] elif fst.name == "Diola-Fogny (Jola-Fonyi) ATR harmony": word_as_list = fst.step(word_as_list) word_as_list = word_as_list[::-1] elif fst.name == \ "Pasiego vowel harmony (metaphony, raising, and centralization)": word_as_list = word_as_list[::-1] fst_b = FST(vh_dataset['26B']) word_as_list = fst_b.step(word_as_list) fst_c = FST(vh_dataset['26C']) word_as_list = fst_c.step(word_as_list)[::-1] elif fst.name == "Kalenjin ATR harmony": # reversing this for lazy (but probably not efficient) parsing word_as_list = word_as_list[::-1] if '+' in word_as_list: final_prefixes = word_as_list[word_as_list.index('+' ):][::-1] stem = word_as_list[:word_as_list.index('+')][::-1] else: final_prefixes = [] stem = word_as_list if '-' in stem: stem = stem[:stem.index('-')] post_language = vh_dataset['24B'] post_fst = FST(post_language) suffix_as_list = [] [suffix_as_list.extend(suff) for suff in fst.suffix] processed_stem_and_suffix = post_fst.step(stem + suffix_as_list) word_as_list = final_prefixes + processed_stem_and_suffix else: if fst.name == "Maasai (Eastern Nilotic) ATR harmony": word_as_list = word_as_list[::-1] fst_b = vh_dataset['28B'] word_as_list = fst_b.step(word_as_list)[::-1] if fst.name == "Nawuri (North Guang) ATR harmony": fst_b = vh_dataset['31B'] reversed = word_as_list[::-1] word_as_list = fst_b.step(reversed)[::-1] return "".join(word_as_list)
def preprocess(word_as_list, prefix_as_list, stem_as_list, suffix_as_list, fst): fst.prefix = prefix_as_list fst.suffix = suffix_as_list # empty input if word_as_list == []: return False, [] # if preprocessing isn't required on the input string if not fst.preprocess_req: return True, word_as_list # Preprocessing for left-subsequential languages if fst.left_subseq: if fst.name == "Kisa applicative suffix Vlɑ"\ or fst.name == "Kisa reversative suffix Vlɑ": return True, word_as_list[:-3] elif fst.name in { "Uyghur backness harmony", "Uyghur plural suffix -lVr", 'Uyghur dative suffix -' + U_F_V + 'V' }: # run the preprocess step preliminary_language = vh_dataset['5P'] preliminary_fst = FST(preliminary_language) if fst.name == "Uyghur backness harmony": return True, preliminary_fst.step(word_as_list) elif fst.name == "Uyghur plural suffix -lVr": word_as_list = word_as_list[:-3] return True, preliminary_fst.step(word_as_list) elif fst.name == 'Uyghur dative suffix -' + U_F_V + 'V': word_as_list = word_as_list[:-2] return True, preliminary_fst.step(word_as_list) elif fst.name == "Halh (Mongolic) rounding harmony": # run the preprocess step preliminary_language = vh_dataset['8P'] preliminary_fst = FST(preliminary_language) return True, preliminary_fst.step(word_as_list) elif fst.name == "Kalmyk (Oirat) harmony": '''AI--------------------------------------------------------------- Rules for language 17 ---------------------------------------------------------------AI''' # If there is a suffix, hyphenate # Check if i is the only type of vowel in the stem # if not, run stem + suffix through 17 # if yes, don't process the stem. Input the suffix into 17P. # Output is the stored stem + output of 17P alphabet_as_set = set(fst.alphabet) vowels_in_stem = set() for ch in stem_as_list: if ch in fst.alphabet: vowels_in_stem.add(ch) if vowels_in_stem.intersection(alphabet_as_set) != {'i'}: return True, word_as_list else: if suffix_as_list != []: preliminary_language = vh_dataset['17P'] preliminary_fst = FST(preliminary_language) fst.suffix = preliminary_fst.step(suffix_as_list[0]) return False, stem_as_list + fst.suffix elif fst.name in { "Maasai (Eastern Nilotic) ATR harmony", "Nawuri (North Guang) ATR harmony" }: stem_as_list.insert(0, '!') stem_as_list.append('&') return True, prefix_as_list + stem_as_list + suffix_as_list elif fst.name == "Kashaya (Pomoan) translaryngeal harmony": fst_p = vh_dataset['29P'] return True, fst_p.step(word_as_list) elif fst.name == "Standard Hungarian palatal harmony of alternating suffixes": stem_as_list.insert(0, '!') return True, prefix_as_list + stem_as_list + suffix_as_list elif fst.name == "Tunica harmony": word_as_list.append('#') return True, word_as_list else: return True, word_as_list # Preprocessing for right-subsequential languages else: # languages 9,10,11,12,13 if fst.name in { 'Jingulu nominal root with non-neuter gender suffix', 'Jingulu adjectivial root with non-neuter gender suffix', 'Jingulu verbal root with subject agreement-marking suffix', 'Jingulu verbal root with motion-imperative suffix', 'Jingulu verbal root with negative imperative suffix', }: return True, word_as_list[::-1] # if not "u" in fst.suffix[0] and not "i" in fst.suffix[0]: # return False, word_as_list # else: # suffix_start = word_as_list.index('-') # return True, word_as_list[:suffix_start][::-1] elif fst.name == 'Yoruba ATR harmony': return True, word_as_list[::-1] elif fst.name == "Kalenjin ATR harmony": language = vh_dataset[24] def add_delimiters(affix_list): for affix in affix_list: if affix in language['n-a_suff']: affix.insert(0, '&') elif affix in language['n-a_r&pre']: affix.append('!') return affix_list # prefix pre-processing prefix_as_list = add_delimiters(prefix_as_list) suffix_as_list = add_delimiters(suffix_as_list) stem_as_list = add_delimiters(stem_as_list) prefix_flat = [] [prefix_flat.extend(prefix) for prefix in prefix_as_list] prefix_as_list = prefix_flat if len(suffix_as_list) >= 1: suffix_to_add = suffix_as_list[0] else: suffix_to_add = [] return True, (prefix_as_list + stem_as_list + suffix_to_add)[::-1] # h e h i + h i + h i + a w q - a e elif fst.name==\ "Asturian Lena (Romance) height harmony with inflectional suffixes": return True, word_as_list[::-1] return True, word_as_list[::-1]