def synt(i, analysis, tag, mult_anal): if analysis["case"][0] in cases.keys(): form = analysis["number"][0] + " " + cases[analysis["case"][0]] new_word = synthesize(analysis["lemma"], form=form) if len(new_word) > 0 and i.lower() in new_word: info = i + "\t" + i + "\t" + tag lines_list.append(info) else: if mult_anal == False: add_manual_correction_if_available(i) else: form = analysis["number"][0] + " " + analysis["case"][0] new_word = synthesize(analysis["lemma"], form=form) if len(new_word) > 0 and i.lower() in new_word: info = i + "\t" + i + "\t" + tag lines_list.append(info) else: if mult_anal == False: add_manual_correction_if_available(i)
def synthesizeWord(word, f): """ Changes the word's form without changing it form plural to singular or vice versa :param word: word to be formed :param f: desired form :return: word in desired form """ # need to check if the title is plural or not. form = analyze(word)[0]["analysis"][0]["form"] return synthesize(word, form[:2] + " " + f, "S")[0]
def answer(self, layer): """ Creates answer for email question :param structUnits: asked structuralunits :return: structuralunits email """ results = [] structUnits = layer[wt.structureUnitCode] for id in structUnits: json = oisStructuralUnits.getStructuralUnit(id) name = json['name']['et'].split(" ") name[-1] = synthesize(name[-1], 'sg g')[0] results.append(" ".join(name).capitalize() + "(" + json['code'] + ")" + " email on " + json['email']) return "\n".join(results) + "."
def answer(self, layer): """ Creates answer for language question :param layer: Current frame layer :return: courses language """ courseIds = layer[wt.courseID] results = [] for id in courseIds: json = oisCourses.coursesId(id) lang = json['target']['language']['et'].split(" ") lang[-1] = synthesize(lang[-1], "sg in", "S")[0] results.append("Aine " + json['title']['et'] + "(" + id + ")" + " on " + " ".join(lang)) return "\n".join(results) + "."
def others_check(morph, analysis, tagged_i, i, mult_anal): if ("unknown_attribute" in analysis.keys() and "Emo" in analysis["unknown_attribute"]) or (analysis["partofspeech"] == "E"): info = i + "\t" + analysis["lemma"] + "\t" + "Emo" lines_list.append(info) elif analysis["partofspeech"] == "Z": info = i + "\t" + analysis["lemma"] + "\t" + "Punct" lines_list.append(info) else: if "subtype" in analysis.keys( ) and "prop" in analysis["subtype"] and not re.search( "\d", analysis["lemma"]): if analysis["case"][0] in cases.keys(): form = analysis["number"][0] + " " + cases[analysis["case"][0]] new_word = synthesize(analysis["lemma"], form=form) else: form = analysis["number"][0] + " " + analysis["case"][0] new_word = synthesize(analysis["lemma"], form=form) if len(new_word) > 0: if i.lower() in new_word: info = i + "\t" + i + "\t" + "Name" lines_list.append(info) elif analysis["case"][0] == "gen" or analysis["case"][ 0] == "part": if new_word[-1][-1] in [ "a", "e", "i", "o", "u", "õ", "ä", "ö" ] and new_word[-1][-1] != i[-1]: if analysis["case"][0] == "gen": info = i + "\t" + new_word[ -1] + "\t" + "Spell_Unknown_Gen" lines_list.append(info) else: info = i + "\t" + new_word[ -1] + "\t" + "Spell_Unknown_Part" lines_list.append(info) else: info = i + "\t" + new_word[-1].capitalize( ) + "\t" + "Name" lines_list.append(info) else: info = i + "\t" + new_word[-1].capitalize() + "\t" + "Name" lines_list.append(info) else: if mult_anal == False: add_manual_correction_if_available(i) elif ("subtype" in analysis.keys() and "prop" in analysis["subtype"]) or ( (analysis["partofspeech"] == "Y" or analysis["partofspeech"] == "S") and re.search("\d", analysis["lemma"])): if (("case" not in analysis.keys()) or ("case" in analysis.keys() and "nom" in analysis["case"]) and "sg" in analysis["number"]): if i in new_analyses.keys(): info = new_analyses[i] lines_list.append(info) else: info = i + "\t" + analysis["lemma"] + "\t" + "Abbrev_Num" lines_list.append(info) else: if i.lower() == analysis["lemma"].lower(): info = i + "\t" + analysis["lemma"] + "\t" + "Abbrev_Num" lines_list.append(info) else: if mult_anal == False: add_manual_correction_if_available(i) elif analysis["partofspeech"] == "Y" and not re.search( "\d", analysis["lemma"]): if ("case" in analysis.keys() and "nom" in analysis["case"] and "sg" in analysis["number"]) or ("case" not in analysis.keys()): info = i + "\t" + analysis["lemma"] + "\t" + "Abbrev" lines_list.append(info) else: if "case" in analysis.keys(): if analysis["case"][0] in cases.keys(): new_word = synthesize(analysis["lemma"], form=analysis["number"][0] + " " + cases[analysis["case"][0]]) else: new_word = synthesize(analysis["lemma"], form=analysis["number"][0] + " " + analysis["case"][0]) if len(new_word) > 0: info = i + "\t" + new_word[-1] + "\t" + "Abbrev" lines_list.append(info) else: if mult_anal == False: add_manual_correction_if_available(i) else: if mult_anal == False: add_manual_correction_if_available(i) else: if morph.lemma != None and analysis["lemma"].lower( ) == morph.lemma.lower(): info = i + "\t" + tagged_i.text + "\t" + "Ok" lines_list.append(info) elif ("-" in i or ":" in i or "/" in i): if i == analysis["lemma"]: info = i + "\t" + analysis["lemma"] + "\t" + "Word_w_Punct" lines_list.append(info) else: if "case" in analysis.keys(): if mult_anal == False: synt(i, analysis, "Word_w_Punct", mult_anal == False) elif mult_anal == True: synt(i, analysis, "Word_w_Punct", mult_anal == True) else: if "partofspeech" in analysis.keys() and ( analysis["partofspeech"] == "J" or analysis["partofspeech"] == "K" or analysis["partofspeech"] == "D" or analysis["partofspeech"] == "B"): if analysis["lemma"].lower() == i.lower(): info = i + "\t" + analysis[ "lemma"] + "\t" + "Ok" lines_list.append(info) else: ld = distance(i.lower(), analysis["lemma"].lower()) if ld > 1: if i[0].isupper(): info = i + "\t" + analysis[ "lemma"].capitalize( ) + "\t" + "Spell_ED>1" lines_list.append(info) else: info = i + "\t" + analysis[ "lemma"] + "\t" + "Spell_ED>1" lines_list.append(info) elif ld == 1: if i[0].isupper(): info = i + "\t" + analysis[ "lemma"].capitalize( ) + "\t" + "Spell_ED_1" lines_list.append(info) else: info = i + "\t" + analysis[ "lemma"] + "\t" + "Spell_ED_1" lines_list.append(info) else: if mult_anal == False: add_manual_correction_if_available(i) else: if mult_anal == False: add_manual_correction_if_available(i) else: if "partofspeech" in analysis.keys() and ( analysis["partofspeech"] == "J" or analysis["partofspeech"] == "K" or analysis["partofspeech"] == "D" or analysis["partofspeech"] == "B"): if analysis["lemma"].lower() == i.lower(): info = i + "\t" + analysis["lemma"] + "\t" + "Ok" lines_list.append(info) else: if len(re.findall("ä|Ä|ö|Ö|ü|Ü|Õ|õ", i)) < len( re.findall("ä|Ä|ö|Ö|ü|Ü|Õ|õ", analysis["lemma"])): if i[0].isupper(): info = i + "\t" + analysis["lemma"].capitalize( ) + "\t" + "Spell_Missing_Diacritics" lines_list.append(info) else: info = i + "\t" + analysis[ "lemma"] + "\t" + "Spell_Missing_Diacritics" lines_list.append(info) else: ld = distance(i.lower(), analysis["lemma"].lower()) if ld > 1: if i[0].isupper(): info = i + "\t" + analysis[ "lemma"].capitalize( ) + "\t" + "Spell_ED>1" lines_list.append(info) else: info = i + "\t" + analysis[ "lemma"] + "\t" + "Spell_ED>1" lines_list.append(info) elif ld == 1: if i[0].isupper(): info = i + "\t" + analysis[ "lemma"].capitalize( ) + "\t" + "Spell_ED_1" lines_list.append(info) else: info = i + "\t" + analysis[ "lemma"] + "\t" + "Spell_ED_1" lines_list.append(info) else: if mult_anal == False: add_manual_correction_if_available(i) elif "case" in analysis.keys(): if analysis["case"][0] in cases.keys(): form = analysis["number"][0] + " " + cases[ analysis["case"][0]] else: form = analysis["number"][0] + " " + analysis["case"][0] if mult_anal == False: use_edit_distance(analysis, form, i, i2, mult_anal == False) elif mult_anal == True: use_edit_distance(analysis, form, i, i2, mult_anal == True) else: if i.lower() == analysis["lemma"].lower(): info = i + "\t" + analysis["lemma"] + "\t" + "Ok" lines_list.append(info) else: if mult_anal == False: add_manual_correction_if_available(i)
def verb_check(morph_root, analysis, tagged_i, i, mult_anal): if morph_root.lemma != None: morph_root.root = morph_root.root.replace("_", "") if morph_root.lemma != None and analysis["lemma"] == morph_root.root: info = i + "\t" + tagged_i.text + "\t" + "Ok" lines_list.append(info) else: if len(re.findall("ä|Ä|ö|Ö|ü|Ü|Õ|õ", i)) < len( re.findall("ä|Ä|ö|Ö|ü|Ü|Õ|õ", analysis["lemma"])): if analysis["ending"] == "0" and "tense" in analysis.keys( ) and "polarity" in analysis.keys() and "pres" in analysis[ "tense"] and "neg" in analysis["polarity"]: new_word = synthesize(analysis["lemma"] + "ma", form="o") else: new_word = synthesize(analysis["lemma"] + "ma", form=analysis["ending"]) if len(new_word) > 0: if i.lower() in new_word: info = i + "\t" + i + "\t" + "Spell_Missing_Diacritics" lines_list.append(info) else: if i[0].isupper(): info = i + "\t" + new_word[-1].capitalize( ) + "\t" + "Spell_Missing_Diacritics" lines_list.append(info) else: info = i + "\t" + new_word[ -1] + "\t" + "Spell_Missing_Diacritics" lines_list.append(info) else: if mult_anal == False: add_manual_correction_if_available(i) else: if analysis["ending"] == "0" and "tense" in analysis.keys( ) and "polarity" in analysis.keys() and "pres" in analysis[ "tense"] and "neg" in analysis["polarity"]: new_word = synthesize(analysis["lemma"] + "ma", form="o") else: new_word = synthesize(analysis["lemma"] + "ma", form=analysis["ending"]) if len(new_word) > 0: ld = distance(i.lower(), new_word[-1].lower()) if i.lower() in new_word: info = i + "\t" + i + "\t" + "Ok" lines_list.append(info) elif ld > 1: if i in new_analyses.keys(): info = new_analyses[i] lines_list.append(info) else: if i[0].isupper(): info = i + "\t" + new_word[-1].capitalize( ) + "\t" + "Spell_ED>1" lines_list.append(info) else: info = i + "\t" + new_word[-1] + "\t" + "Spell_ED>1" lines_list.append(info) elif ld == 1: if i[0].isupper(): info = i + "\t" + new_word[-1].capitalize( ) + "\t" + "Spell_ED_1" lines_list.append(info) else: info = i + "\t" + new_word[-1] + "\t" + "Spell_ED_1" lines_list.append(info) elif ld == 0: if i[0].isupper(): info = i + "\t" + new_word[-1].capitalize( ) + "\t" + "Ok" lines_list.append(info) else: info = i + "\t" + new_word[-1] + "\t" + "Ok" lines_list.append(info) else: if mult_anal == False: add_manual_correction_if_available(i) else: if mult_anal == False: add_manual_correction_if_available(i)
def use_edit_distance(analysis, form, i, i2, mult_anal): if analysis["case"][0] in cases.keys(): form = analysis["number"][0] + " " + cases[analysis["case"][0]] new_word = synthesize(analysis["lemma"], form=form) else: form = analysis["number"][0] + " " + analysis["case"][0] new_word = synthesize(analysis["lemma"], form=form) if len(new_word) > 0: if i.lower() in new_word: info = i + "\t" + i + "\t" + "Ok" lines_list.append(info) elif analysis["case"][0] == "gen" or analysis["case"][0] == "part": if new_word[-1][-1] in ["a", "e", "i", "o", "u", "õ", "ä", "ö" ] and new_word[-1][-1] != i[-1]: if analysis["case"][0] == "gen": info = i + "\t" + new_word[-1] + "\t" + "Spell_Unknown_Gen" lines_list.append(info) else: info = i + "\t" + new_word[-1] + "\t" + "Spell_Unknown_Part" lines_list.append(info) elif len(re.findall("ä|Ä|ö|Ö|ü|Ü|Õ|õ", i)) < len( re.findall("ä|Ä|ö|Ö|ü|Ü|Õ|õ", new_word[-1])): info = i + "\t" + new_word[ -1] + "\t" + "Spell_Missing_Diacritics" lines_list.append(info) else: ld = distance(i.lower(), new_word[-1].lower()) if ld > 1: new_form = analysis["ending"].replace("_", "") new_word2 = analysis["lemma"] + new_form if new_word2.lower() == i.lower(): if i[0].isupper(): info = i + "\t" + new_word2.capitalize( ) + "\t" + "Ok" lines_list.append(info) else: info = i + "\t" + new_word2 + "\t" + "Ok" lines_list.append(info) else: if i[0].isupper(): info = i + "\t" + new_word[-1].capitalize( ) + "\t" + "Spell_ED>1" lines_list.append(info) else: info = i + "\t" + new_word[-1] + "\t" + "Spell_ED>1" lines_list.append(info) elif ld == 1: if i[0].isupper(): info = i + "\t" + new_word[-1].capitalize( ) + "\t" + "Spell_ED_1" lines_list.append(info) else: info = i + "\t" + new_word[-1] + "\t" + "Spell_ED_1" lines_list.append(info) elif ld == 0: if i[0].isupper(): info = i + "\t" + new_word[-1].capitalize( ) + "\t" + "Ok" lines_list.append(info) else: info = i + "\t" + new_word[-1] + "\t" + "Ok" lines_list.append(info) else: ld = distance(i.lower(), new_word[-1].lower()) if len(re.findall("ä|Ä|ö|Ö|ü|Ü|Õ|õ", i)) < len( re.findall("ä|Ä|ö|Ö|ü|Ü|Õ|õ", new_word[-1])): info = i + "\t" + new_word[ -1] + "\t" + "Spell_Missing_Diacritics" lines_list.append(info) elif ld > 1: new_form = analysis["ending"].replace("_", "") new_word2 = analysis["lemma"] + new_form if new_word2.lower() == i.lower(): if i[0].isupper(): info = i + "\t" + new_word2.capitalize() + "\t" + "Ok" lines_list.append(info) else: info = i + "\t" + new_word2 + "\t" + "Ok" lines_list.append(info) else: if i[0].isupper(): info = i + "\t" + new_word[-1].capitalize( ) + "\t" + "Spell_ED>1" lines_list.append(info) else: info = i + "\t" + new_word[-1] + "\t" + "Spell_ED>1" lines_list.append(info) elif ld == 1: if i[0].isupper(): info = i + "\t" + new_word[-1].capitalize( ) + "\t" + "Spell_ED_1" lines_list.append(info) else: info = i + "\t" + new_word[-1] + "\t" + "Spell_ED_1" lines_list.append(info) elif ld == 0: if i[0].isupper(): info = i + "\t" + new_word[-1].capitalize() + "\t" + "Ok" lines_list.append(info) else: info = i + "\t" + new_word[-1] + "\t" + "Ok" lines_list.append(info) else: if analysis["case"][0] in cases.keys(): form = analysis["number"][0] + " " + cases[analysis["case"][0]] new_word = synthesize(i2[0].lemma, form=form) else: form = analysis["number"][0] + " " + analysis["case"][0] new_word = synthesize(i2[0].lemma, form=form) if len(new_word) > 0 and i.lower() in new_word: info = i + "\t" + i + "\t" + "Ok" lines_list.append(info) else: if mult_anal == False: add_manual_correction_if_available(i)