Example #1
0
def get_base_tags(word, affixFlag, allAffixFlags, extra):
    affixFlag = allAffixFlags
#    print("**", word, allAffixFlags, file=sys.stderr)
    
    tag = ""

    if affixFlag[0:2] == "vr":
        tag = ":rev:inf"
    elif affixFlag[0] == "v":
        tag = ":inf"

    if tag:
        return tag

    v_zna_for_inanim = ""
    v_kly_for_anim = ""
    
    if not util.istota(allAffixFlags):
        v_zna_for_inanim = "/v_zna";
    else:
        if ".ko" not in allAffixFlags and ".ke" not in allAffixFlags:
            v_kly_for_anim = "/v_kly"


#        if affixFlag == "U" and "+" in allAffixFlags:
#            tag = word + " " + word + " noun:m:v_naz"
#        elif affixFlag == "U" and word.endswith("ий"):
#            tag = word + " " + word + " adj:m:v_naz/v_zna:np"
    if affixFlag.startswith("adj"):
        if word.endswith("е"):
            tag = ":n:v_naz/v_zna"
        elif word.endswith("і"):
            tag = ":p:v_naz/v_zna:ns"
        elif word.endswith("а"):
            tag = ":f:v_naz"
            if not "<+" in allAffixFlags:
                tag += v_kly_for_anim
        elif word.endswith("ій"):
            tag = ":m:v_naz/v_zna//f:v_dav/v_mis"
            if util.istota(allAffixFlags):
                tag = tag.replace(":m:v_naz/v_zna", ":m:v_naz")
            else:
                tag = tag.replace("v_zna", "v_zn2")
        else:
            if util.istota(allAffixFlags):
                tag = ":m:v_naz"
            else:
                tag = ":m:v_naz/v_zna"
                if not "^noun" in extra:
                    tag = tag.replace("v_zna", "v_zn2")
        
#        if "\\" in extra:
#            tag += ":compb"
        
        return tag
        
    
    if affixFlag == "numr":
        tag = ":p:v_naz/v_zna"
        return tag
        
    if affixFlag.startswith("n2n"):
        if ending_i_nnia_re.match(word):
            tag = ":n:v_naz/v_rod/v_zna//p:v_naz"
        else:
            tag = ":n:v_naz/v_zna"
            if util.person(allAffixFlags) and (word[-2:] in "ще", "ко", "ло"):# and not util.lastname(allAffixFlags):
                tag += "/v_kly"
#        if affixFlag in "bfox":
    elif affixFlag.startswith("np"):
        tag = ":p:v_naz" # + v_zna_for_inanim + v_kly_for_anim
    elif affixFlag.startswith("n2adj1") and word.endswith("е"):
        tag = ":n:v_naz" + v_zna_for_inanim
    elif affixFlag.startswith("n2adj"):
        tag = ":m:v_naz"
    elif affixFlag[:2] == "n2":
        tag = ":m:v_naz" + v_zna_for_inanim
        if affixFlag.startswith("n20") and util.person(allAffixFlags) and (word[-2:] in "ло"):# and not util.lastname(allAffixFlags):
                tag += "/v_kly"
    elif affixFlag[:2] == "n1":
        tag = ":f:v_naz"
    elif affixFlag[:2] == "n4":
        tag = ":n:v_naz/v_zna" + v_kly_for_anim
    elif affixFlag[:2] == "n3":
        tag = ":f:v_naz/v_zna"

    else:
#        tag = word + " " + word + " unknown"
#        print(tag, "---", word, affixFlag)
        raise Exception("Unkown base for " + word + " " + allAffixFlags)

    return tag
Example #2
0
def adjust_affix_tags(lines, main_flag, flags, modifiers):
    lines2 = []
  
    for line in lines:
        # DL-
        if main_flag[1] == "n":
                
            if main_flag.startswith("/n2") and re_search("^/n2[01234]", main_flag):
#                base_word = lines[0].split()[0]
                base_word = line.split()[1]
                
                if util.istota(flags):
                    if "m:v_rod" in line and not "/v_zna" in line:
                        line = line.replace("m:v_rod", "m:v_rod/v_zna")
        
                if not base_word[-1:] in "аеєиіїоюя" and not ".a" in flags:
#                    util.dbg("```", main_flag, line)
                    word = line.split()[0]
                    if word[-1:] in "ую":
                        logger.debug("u/rod %s - %s", line, base_word)
                        line = line.replace("v_dav", "v_rod/v_dav")
                        
            if main_flag.startswith("/n2") and "@" in flags:
                word = line.split(" ", 1)[0]
                if word[-1:] in "ая" and "m:v_rod" in line:
                    line = line.replace("m:v_rod", "m:v_rod/v_zna")
        
            if not "np" in main_flag and not ".p" in main_flag and not "n2adj" in flags:
                if ":p:" in line:
                    logger.debug("skipping line with p: " + line)
                elif "//p:" in line:
                    line = re_sub("//p:.*", "", line)
                    logger.debug("removing //p from: " + line)
        
            if "/v_kly" in line:
                if main_flag.startswith("/n1"): # Єремія /n10.ko.patr.<
                    base_word = line.split()[1]

                if ("<+" in flags and not ":p:" in line) or not util.person(flags) \
                        or (not ":patr" in line and re_search("\\.k[eo]", flags)) \
                        or (":m:" in line and ("<+" in flags)) \
                        or (main_flag.startswith("/n20") and base_word.endswith("ло") and "v_dav" in line):
                    logger.debug("removing v_kly from: %s, %s", line, flags)
                    line = line.replace("/v_kly", "")

            if ".p" in main_flag or "np" in main_flag:
                if util.person(flags):
                    line = line.replace("p:v_naz", "p:v_naz/v_kly")
    
                if util.istota(flags):
                    line = line.replace("p:v_rod", "p:v_rod/v_zna")
                    if ">" in flags: # animal
                        line = line.replace("p:v_naz", "p:v_naz/v_zna")
                else:
                    line = line.replace("p:v_naz", "p:v_naz/v_zna")

            
        elif ":perf" in flags and ":pres" in line:
            line = line.replace(":pres", ":futr")
            
        elif main_flag.startswith("/adj"):
            if "<" in flags or "^noun" in flags:
                if ":uncontr" in line:
                    continue
                
            if "<" in flags:
                if not ">" in flags and ":p:v_naz/v_zna" in line:
                    line = line.replace("v_naz/v_zna", "v_naz/v_kly")
                if ":m:v_naz" in line and not "<+" in flags:
                    line = line.replace("v_naz", "v_naz/v_kly")
            elif "^noun" in flags:
                if ":m:v_rod/v_zna" in line:
                    line = line.replace("v_rod/v_zna", "v_rod")
                elif ":p:v_rod/v_zna" in line:
                    line = line.replace("v_rod/v_zna", "v_rod")

    
#            if "<" in flags:
#                if util.person(flags):
#                    line = line.replace("p:v_naz", "p:v_naz/v_kly")
#    
#                if util.istota(flags):
#                    line = line.replace("p:v_rod", "p:v_rod/v_zna")
#                    if ">" in flags: # animal
#                        line = line.replace("p:v_naz", "p:v_naz/v_zna")
#                else:
#                    line = line.replace("p:v_naz", "p:v_naz/v_zna")

        lines2.append(line)

    return lines2