def adjust_affix_tags(lines, main_flag, flags, modifiers): lines2 = [] for line in lines: # DL- if main_flag[1] == "n": if main_flag.startswith("/n2") and re_search("^/n2[01234]", main_flag): # base_word = lines[0].split()[0] base_word = line.split()[1] if util.istota(flags): if "m:v_rod" in line and not "/v_zna" in line: line = line.replace("m:v_rod", "m:v_rod/v_zna") if not base_word[-1:] in "аеєиіїоюя" and not ".a" in flags: # util.dbg("```", main_flag, line) word = line.split()[0] if word[-1:] in "ую": logger.debug("u/rod %s - %s", line, base_word) line = line.replace("v_dav", "v_rod/v_dav") if main_flag.startswith("/n2") and "@" in flags: word = line.split(" ", 1)[0] if word[-1:] in "ая" and "m:v_rod" in line: line = line.replace("m:v_rod", "m:v_rod/v_zna") if not "np" in main_flag and not ".p" in main_flag and not "n2adj" in flags: if ":p:" in line: logger.debug("skipping line with p: " + line) elif "//p:" in line: line = re_sub("//p:.*", "", line) logger.debug("removing //p from: " + line) if "/v_kly" in line: if main_flag.startswith("/n1"): # Єремія /n10.ko.patr.< base_word = line.split()[1] if ("<+" in flags and not ":p:" in line) or not util.person(flags) \ or (not ":patr" in line and re_search("\\.k[eo]", flags)) \ or (":m:" in line and ("<+" in flags)) \ or (main_flag.startswith("/n20") and base_word.endswith("ло") and "v_dav" in line): logger.debug("removing v_kly from: %s, %s", line, flags) line = line.replace("/v_kly", "") if ".p" in main_flag or "np" in main_flag: if util.person(flags): line = line.replace("p:v_naz", "p:v_naz/v_kly") if util.istota(flags): line = line.replace("p:v_rod", "p:v_rod/v_zna") if ">" in flags: # animal line = line.replace("p:v_naz", "p:v_naz/v_zna") else: line = line.replace("p:v_naz", "p:v_naz/v_zna") elif ":perf" in flags and ":pres" in line: line = line.replace(":pres", ":futr") elif main_flag.startswith("/adj"): if "<" in flags or "^noun" in flags: if ":uncontr" in line: continue if "<" in flags: if not ">" in flags and ":p:v_naz/v_zna" in line: line = line.replace("v_naz/v_zna", "v_naz/v_kly") if ":m:v_naz" in line and not "<+" in flags: line = line.replace("v_naz", "v_naz/v_kly") elif "^noun" in flags: if ":m:v_rod/v_zna" in line: line = line.replace("v_rod/v_zna", "v_rod") elif ":p:v_rod/v_zna" in line: line = line.replace("v_rod/v_zna", "v_rod") # if "<" in flags: # if util.person(flags): # line = line.replace("p:v_naz", "p:v_naz/v_kly") # # if util.istota(flags): # line = line.replace("p:v_rod", "p:v_rod/v_zna") # if ">" in flags: # animal # line = line.replace("p:v_naz", "p:v_naz/v_zna") # else: # line = line.replace("p:v_naz", "p:v_naz/v_zna") lines2.append(line) return lines2
def get_base_tags(word, affixFlag, allAffixFlags, extra): affixFlag = allAffixFlags # print("**", word, allAffixFlags, file=sys.stderr) tag = "" if affixFlag[0:2] == "vr": tag = ":rev:inf" elif affixFlag[0] == "v": tag = ":inf" if tag: return tag v_zna_for_inanim = "" v_kly_for_anim = "" if not util.istota(allAffixFlags): v_zna_for_inanim = "/v_zna"; else: if ".ko" not in allAffixFlags and ".ke" not in allAffixFlags: v_kly_for_anim = "/v_kly" # if affixFlag == "U" and "+" in allAffixFlags: # tag = word + " " + word + " noun:m:v_naz" # elif affixFlag == "U" and word.endswith("ий"): # tag = word + " " + word + " adj:m:v_naz/v_zna:np" if affixFlag.startswith("adj"): if word.endswith("е"): tag = ":n:v_naz/v_zna" elif word.endswith("і"): tag = ":p:v_naz/v_zna:ns" elif word.endswith("а"): tag = ":f:v_naz" if not "<+" in allAffixFlags: tag += v_kly_for_anim elif word.endswith("ій"): tag = ":m:v_naz/v_zna//f:v_dav/v_mis" if util.istota(allAffixFlags): tag = tag.replace(":m:v_naz/v_zna", ":m:v_naz") else: tag = tag.replace("v_zna", "v_zn2") else: if util.istota(allAffixFlags): tag = ":m:v_naz" else: tag = ":m:v_naz/v_zna" if not "^noun" in extra: tag = tag.replace("v_zna", "v_zn2") # if "\\" in extra: # tag += ":compb" return tag if affixFlag == "numr": tag = ":p:v_naz/v_zna" return tag if affixFlag.startswith("n2n"): if ending_i_nnia_re.match(word): tag = ":n:v_naz/v_rod/v_zna//p:v_naz" else: tag = ":n:v_naz/v_zna" if util.person(allAffixFlags) and (word[-2:] in "ще", "ко", "ло"):# and not util.lastname(allAffixFlags): tag += "/v_kly" # if affixFlag in "bfox": elif affixFlag.startswith("np"): tag = ":p:v_naz" # + v_zna_for_inanim + v_kly_for_anim elif affixFlag.startswith("n2adj1") and word.endswith("е"): tag = ":n:v_naz" + v_zna_for_inanim elif affixFlag.startswith("n2adj"): tag = ":m:v_naz" elif affixFlag[:2] == "n2": tag = ":m:v_naz" + v_zna_for_inanim if affixFlag.startswith("n20") and util.person(allAffixFlags) and (word[-2:] in "ло"):# and not util.lastname(allAffixFlags): tag += "/v_kly" elif affixFlag[:2] == "n1": tag = ":f:v_naz" elif affixFlag[:2] == "n4": tag = ":n:v_naz/v_zna" + v_kly_for_anim elif affixFlag[:2] == "n3": tag = ":f:v_naz/v_zna" else: # tag = word + " " + word + " unknown" # print(tag, "---", word, affixFlag) raise Exception("Unkown base for " + word + " " + allAffixFlags) return tag
def post_expand(lines, flags): if len(lines) == 0: raise Exception("emtpy lines") extra_flags = get_extra_flags(flags) if extra_flags: first_name_base = util.firstname(lines[0], flags) out_lines = [] extra_out_lines = [] for line in lines: extra_flags2 = extra_flags if first_name_base and not ":patr" in line: extra_flags2 += ":fname" if " advp" in line: if ":imperf" in line: extra_flags2 = re_sub(":(im)?perf", "", extra_flags2) else: line = line.replace(":perf", "") elif "adj.adv" in flags and " adv" in line: extra_flags2 = re_sub(r":&?adjp(:pasv|:actv|:pres|:past|:perf|:imperf)+", "", extra_flags2) elif ":+m" in extra_flags: extra_flags2 = extra_flags2.replace(":+m", "") if ":f:" in line: masc_line = line.replace(":f:", ":m:") + extra_flags2 extra_out_lines.append(masc_line) elif ":n:" in line: masc_line = line.replace(":n:", ":m:") + extra_flags2 if util.istota(flags): if "m:v_rod" in masc_line: masc_line2 = masc_line.replace("m:v_rod", "m:v_zna") extra_out_lines.append(masc_line2) elif "m:v_zna" in masc_line: masc_line = "" if "m:v_kly" in masc_line: word, lemma, tags = masc_line.split() masc_line = word[:-1]+"е " + lemma + " " + tags if masc_line: extra_out_lines.append(masc_line) elif ":+f" in extra_flags: extra_flags2 = extra_flags2.replace(":+f", "") if ":m:" in line: masc_line = line.replace(":m:", ":f:") + extra_flags2 extra_out_lines.append(masc_line) elif ":n:" in line: masc_line = line.replace(":n:", ":f:") + extra_flags2 # if util.istota(flags): # if "m:v_rod" in masc_line: # masc_line2 = masc_line.replace("m:v_rod", "m:v_zna") # extra_out_lines.append(masc_line2) # elif "m:v_zna" in masc_line: # masc_line = "" if masc_line: extra_out_lines.append(masc_line) elif ":patr" in line and ":anim" in extra_flags2: line = line.replace(":patr", ":anim:patr") extra_flags2 = extra_flags2.replace(":anim", "") out_lines.append(line + extra_flags2) out_lines.extend(extra_out_lines) return out_lines return lines