Esempio n. 1
0
def adjust_affix_tags(lines, main_flag, flags, modifiers):
    lines2 = []
  
    for line in lines:
        # DL-
        if main_flag[1] == "n":
                
            if main_flag.startswith("/n2") and re_search("^/n2[01234]", main_flag):
#                base_word = lines[0].split()[0]
                base_word = line.split()[1]
                
                if util.istota(flags):
                    if "m:v_rod" in line and not "/v_zna" in line:
                        line = line.replace("m:v_rod", "m:v_rod/v_zna")
        
                if not base_word[-1:] in "аеєиіїоюя" and not ".a" in flags:
#                    util.dbg("```", main_flag, line)
                    word = line.split()[0]
                    if word[-1:] in "ую":
                        logger.debug("u/rod %s - %s", line, base_word)
                        line = line.replace("v_dav", "v_rod/v_dav")
                        
            if main_flag.startswith("/n2") and "@" in flags:
                word = line.split(" ", 1)[0]
                if word[-1:] in "ая" and "m:v_rod" in line:
                    line = line.replace("m:v_rod", "m:v_rod/v_zna")
        
            if not "np" in main_flag and not ".p" in main_flag and not "n2adj" in flags:
                if ":p:" in line:
                    logger.debug("skipping line with p: " + line)
                elif "//p:" in line:
                    line = re_sub("//p:.*", "", line)
                    logger.debug("removing //p from: " + line)
        
            if "/v_kly" in line:
                if main_flag.startswith("/n1"): # Єремія /n10.ko.patr.<
                    base_word = line.split()[1]

                if ("<+" in flags and not ":p:" in line) or not util.person(flags) \
                        or (not ":patr" in line and re_search("\\.k[eo]", flags)) \
                        or (":m:" in line and ("<+" in flags)) \
                        or (main_flag.startswith("/n20") and base_word.endswith("ло") and "v_dav" in line):
                    logger.debug("removing v_kly from: %s, %s", line, flags)
                    line = line.replace("/v_kly", "")

            if ".p" in main_flag or "np" in main_flag:
                if util.person(flags):
                    line = line.replace("p:v_naz", "p:v_naz/v_kly")
    
                if util.istota(flags):
                    line = line.replace("p:v_rod", "p:v_rod/v_zna")
                    if ">" in flags: # animal
                        line = line.replace("p:v_naz", "p:v_naz/v_zna")
                else:
                    line = line.replace("p:v_naz", "p:v_naz/v_zna")

            
        elif ":perf" in flags and ":pres" in line:
            line = line.replace(":pres", ":futr")
            
        elif main_flag.startswith("/adj"):
            if "<" in flags or "^noun" in flags:
                if ":uncontr" in line:
                    continue
                
            if "<" in flags:
                if not ">" in flags and ":p:v_naz/v_zna" in line:
                    line = line.replace("v_naz/v_zna", "v_naz/v_kly")
                if ":m:v_naz" in line and not "<+" in flags:
                    line = line.replace("v_naz", "v_naz/v_kly")
            elif "^noun" in flags:
                if ":m:v_rod/v_zna" in line:
                    line = line.replace("v_rod/v_zna", "v_rod")
                elif ":p:v_rod/v_zna" in line:
                    line = line.replace("v_rod/v_zna", "v_rod")

    
#            if "<" in flags:
#                if util.person(flags):
#                    line = line.replace("p:v_naz", "p:v_naz/v_kly")
#    
#                if util.istota(flags):
#                    line = line.replace("p:v_rod", "p:v_rod/v_zna")
#                    if ">" in flags: # animal
#                        line = line.replace("p:v_naz", "p:v_naz/v_zna")
#                else:
#                    line = line.replace("p:v_naz", "p:v_naz/v_zna")

        lines2.append(line)

    return lines2
Esempio n. 2
0
def get_base_tags(word, affixFlag, allAffixFlags, extra):
    affixFlag = allAffixFlags
#    print("**", word, allAffixFlags, file=sys.stderr)
    
    tag = ""

    if affixFlag[0:2] == "vr":
        tag = ":rev:inf"
    elif affixFlag[0] == "v":
        tag = ":inf"

    if tag:
        return tag

    v_zna_for_inanim = ""
    v_kly_for_anim = ""
    
    if not util.istota(allAffixFlags):
        v_zna_for_inanim = "/v_zna";
    else:
        if ".ko" not in allAffixFlags and ".ke" not in allAffixFlags:
            v_kly_for_anim = "/v_kly"


#        if affixFlag == "U" and "+" in allAffixFlags:
#            tag = word + " " + word + " noun:m:v_naz"
#        elif affixFlag == "U" and word.endswith("ий"):
#            tag = word + " " + word + " adj:m:v_naz/v_zna:np"
    if affixFlag.startswith("adj"):
        if word.endswith("е"):
            tag = ":n:v_naz/v_zna"
        elif word.endswith("і"):
            tag = ":p:v_naz/v_zna:ns"
        elif word.endswith("а"):
            tag = ":f:v_naz"
            if not "<+" in allAffixFlags:
                tag += v_kly_for_anim
        elif word.endswith("ій"):
            tag = ":m:v_naz/v_zna//f:v_dav/v_mis"
            if util.istota(allAffixFlags):
                tag = tag.replace(":m:v_naz/v_zna", ":m:v_naz")
            else:
                tag = tag.replace("v_zna", "v_zn2")
        else:
            if util.istota(allAffixFlags):
                tag = ":m:v_naz"
            else:
                tag = ":m:v_naz/v_zna"
                if not "^noun" in extra:
                    tag = tag.replace("v_zna", "v_zn2")
        
#        if "\\" in extra:
#            tag += ":compb"
        
        return tag
        
    
    if affixFlag == "numr":
        tag = ":p:v_naz/v_zna"
        return tag
        
    if affixFlag.startswith("n2n"):
        if ending_i_nnia_re.match(word):
            tag = ":n:v_naz/v_rod/v_zna//p:v_naz"
        else:
            tag = ":n:v_naz/v_zna"
            if util.person(allAffixFlags) and (word[-2:] in "ще", "ко", "ло"):# and not util.lastname(allAffixFlags):
                tag += "/v_kly"
#        if affixFlag in "bfox":
    elif affixFlag.startswith("np"):
        tag = ":p:v_naz" # + v_zna_for_inanim + v_kly_for_anim
    elif affixFlag.startswith("n2adj1") and word.endswith("е"):
        tag = ":n:v_naz" + v_zna_for_inanim
    elif affixFlag.startswith("n2adj"):
        tag = ":m:v_naz"
    elif affixFlag[:2] == "n2":
        tag = ":m:v_naz" + v_zna_for_inanim
        if affixFlag.startswith("n20") and util.person(allAffixFlags) and (word[-2:] in "ло"):# and not util.lastname(allAffixFlags):
                tag += "/v_kly"
    elif affixFlag[:2] == "n1":
        tag = ":f:v_naz"
    elif affixFlag[:2] == "n4":
        tag = ":n:v_naz/v_zna" + v_kly_for_anim
    elif affixFlag[:2] == "n3":
        tag = ":f:v_naz/v_zna"

    else:
#        tag = word + " " + word + " unknown"
#        print(tag, "---", word, affixFlag)
        raise Exception("Unkown base for " + word + " " + allAffixFlags)

    return tag
Esempio n. 3
0
def post_expand(lines, flags):
    if len(lines) == 0:
        raise Exception("emtpy lines")

    extra_flags = get_extra_flags(flags)
    
    
    if extra_flags:
        first_name_base = util.firstname(lines[0], flags)
        
        out_lines = []
        extra_out_lines = []
        
        for line in lines:
            extra_flags2 = extra_flags
    
            if first_name_base and not ":patr" in line:
                extra_flags2 += ":fname"
    
            if " advp" in line:
                if ":imperf" in line:
                    extra_flags2 = re_sub(":(im)?perf", "", extra_flags2)
                else:
                    line = line.replace(":perf", "")
            elif "adj.adv" in flags and " adv" in line:
                extra_flags2 = re_sub(r":&?adjp(:pasv|:actv|:pres|:past|:perf|:imperf)+", "", extra_flags2)
            elif ":+m" in extra_flags:
                extra_flags2 = extra_flags2.replace(":+m", "")
                
                if ":f:" in line:
                    masc_line = line.replace(":f:", ":m:") + extra_flags2
                    extra_out_lines.append(masc_line)
                elif ":n:" in line:
                    masc_line = line.replace(":n:", ":m:") + extra_flags2
                    
                    if util.istota(flags):
                        if "m:v_rod" in masc_line:
                            masc_line2 = masc_line.replace("m:v_rod", "m:v_zna")
                            extra_out_lines.append(masc_line2)
                        elif "m:v_zna" in masc_line:
                            masc_line = ""
                        if "m:v_kly" in masc_line:
                            word, lemma, tags = masc_line.split()
                            masc_line = word[:-1]+"е " + lemma + " " + tags
                    
                    if masc_line:
                        extra_out_lines.append(masc_line)
            elif ":+f" in extra_flags:
                extra_flags2 = extra_flags2.replace(":+f", "")
                
                if ":m:" in line:
                    masc_line = line.replace(":m:", ":f:") + extra_flags2
                    extra_out_lines.append(masc_line)
                elif ":n:" in line:
                    masc_line = line.replace(":n:", ":f:") + extra_flags2
                    
#                     if util.istota(flags):
#                         if "m:v_rod" in masc_line:
#                             masc_line2 = masc_line.replace("m:v_rod", "m:v_zna")
#                             extra_out_lines.append(masc_line2)
#                         elif "m:v_zna" in masc_line:
#                             masc_line = ""
                    
                    if masc_line:
                        extra_out_lines.append(masc_line)
            elif ":patr" in line and ":anim" in extra_flags2:
                line = line.replace(":patr", ":anim:patr")
                extra_flags2 = extra_flags2.replace(":anim", "")
    
            out_lines.append(line + extra_flags2)
    
        out_lines.extend(extra_out_lines)
        
        return out_lines
    
    return lines