Esempio n. 1
0
def filter_word(w, modifiers):
    if "gen" in modifiers:
#        util.dbg("filter by gen", modifiers, w)
        if not re_search(":[" + modifiers["gen"] + "]:", w):
            return False
        
    if "pers" in modifiers and not re.search(":(inf|past)", w):
        if not re_search(":[" + modifiers["pers"] + "]", w):
            return False

    if "tag" in modifiers:
        if not re.search(modifiers["tag"], w):
            return False

    return True
Esempio n. 2
0
def post_process_sorted(lines):
    out_lines = []
    
#    print("\n".join(lines), file=sys.stderr)
    
    prev_line = ""
    for line in lines:
        if "patr" in line:
            if re_search(":[mf]:v_naz:.*patr", line):
                logger.debug("promoting patr lemma %s", line)
                last_lema = line.split()[0]
            line = replace_base(line, last_lema)
        elif "lname" in line and ":f:" in line and not ":nv" in line:
            if ":f:v_naz" in line:
                logger.debug("promoting f lname lemma %s", line)
                last_lema = line.split()[0]
            line = replace_base(line, last_lema)
#        elif " adv" in line and not " advp" in line and not ":combr" in line and not ":super" in line:
#            logger.debug("promoting adv lemma %s", line)
#            line = replace_base(line, line.split()[0])

        if prev_line == line and ("advp:perf" in line or "advp:rev:perf" in line):
            continue
        
        prev_line = line
        out_lines.append(line)

    return out_lines
Esempio n. 3
0
def get_extra_flags(flags):
    extra_flags = ""
    if " :" in flags:
        extra_flags = re_search(" (:[^ ]+)", flags).group(1)
    if "<" in flags or "patr" in flags:
        extra_flags += ":anim"
    if "<+" in flags:
        extra_flags += ":lname"
    
    return extra_flags
Esempio n. 4
0
def expand_line(line, flush_stdout):
    global main_word
    global main_flag
    global last_adv

    lines = preprocess(line)
    
    out_lines = []

    for line in lines:
        sub_lines = []
        
        #  +cs
        if "\\ +" in line:
            
            line, *sub_lines = line.split("\\")
            line = line.rstrip()
            if " :" in line or not " /" in line:
                line += ":compb"
            else:
                line += " :compb"

#            print(" \\+", line, file=sys.stderr)
                    
#            main_word = line
#            sublines = expand_subposition(main_word, line)
#            out_lines.extend( sublines )
            
        # word lemma tags
        elif word_lemma_re.search(line):
            if "/" in line:
                exp_lines = affix.expand_alts([line], "//")  # TODO: change this to some single-char splitter?
                try:
                    exp_lines = affix.expand_alts(exp_lines, "/")
                except:
                    print("Failed to expand", exp_lines, file=sys.stderr)
                    raise
            else:
                exp_lines = [ line ]

            if ":nv" in line and not "v_" in line:
                exp_lines = util.expand_nv(exp_lines)
                
            out_lines.extend( exp_lines )
            
            continue
        
        # word tags
        # word /flags [mods] [tags]
        try:
            word, flags = line.split(" ", 1)
        except:
            print("Failed to find flags in", line, file=sys.stderr)
            raise
          
        main_word = word
        
        inflected_lines = expand(word, flags, flush_stdout)
        
        if sub_lines:
            idx = 0
            for sub_line in sub_lines:
                if flags.startswith("adv:"):
                    extra_flags = flags[3:].replace(":compb", "")
    #                util.dbg("sub_lines: %s, %s", flags, extra_flags)
                elif " :" in flags or flags.startswith(":"):
                    extra_flags = re_search("(^| )(:[^ ]+)", flags).group(2).replace(":compb", "")
    #                 util.dbg("===", extra_flags)
                else:
                    extra_flags = ""
            
                if " adv" in line:
                    sublines = expand_subposition_adv_main(main_word, sub_line, extra_flags)
                else:
                    sublines = expand_subposition(main_word, sub_line, extra_flags, idx)
                    
                out_lines.extend( sublines )
            
                if ".adv" in line and "/adj" in line:
                    for inflected_line in inflected_lines:
                        if " adv" in inflected_line:
                            last_adv = inflected_line.split()[0]
                            cs_lines = expand_subposition_adv(last_adv, sub_line, extra_flags)
                            out_lines.extend(cs_lines)
                            break
#                    print(".adv", last_adv, file=sys.stderr)

                idx += 1
        
        out_lines.extend( inflected_lines )
        
        for l in inflected_lines:
            if not l.strip():
                raise Exception("empty liner", inflected_lines)

    return post_process(out_lines)
Esempio n. 5
0
def adjust_affix_tags(lines, main_flag, flags, modifiers):
    lines2 = []
  
    for line in lines:
        # DL-
        if main_flag[1] == "n":
                
            if main_flag.startswith("/n2") and re_search("^/n2[01234]", main_flag):
#                base_word = lines[0].split()[0]
                base_word = line.split()[1]
                
                if util.istota(flags):
                    if "m:v_rod" in line and not "/v_zna" in line:
                        line = line.replace("m:v_rod", "m:v_rod/v_zna")
        
                if not base_word[-1:] in "аеєиіїоюя" and not ".a" in flags:
#                    util.dbg("```", main_flag, line)
                    word = line.split()[0]
                    if word[-1:] in "ую":
                        logger.debug("u/rod %s - %s", line, base_word)
                        line = line.replace("v_dav", "v_rod/v_dav")
                        
            if main_flag.startswith("/n2") and "@" in flags:
                word = line.split(" ", 1)[0]
                if word[-1:] in "ая" and "m:v_rod" in line:
                    line = line.replace("m:v_rod", "m:v_rod/v_zna")
        
            if not "np" in main_flag and not ".p" in main_flag and not "n2adj" in flags:
                if ":p:" in line:
                    logger.debug("skipping line with p: " + line)
                elif "//p:" in line:
                    line = re_sub("//p:.*", "", line)
                    logger.debug("removing //p from: " + line)
        
            if "/v_kly" in line:
                if main_flag.startswith("/n1"): # Єремія /n10.ko.patr.<
                    base_word = line.split()[1]

                if ("<+" in flags and not ":p:" in line) or not util.person(flags) \
                        or (not ":patr" in line and re_search("\\.k[eo]", flags)) \
                        or (":m:" in line and ("<+" in flags)) \
                        or (main_flag.startswith("/n20") and base_word.endswith("ло") and "v_dav" in line):
                    logger.debug("removing v_kly from: %s, %s", line, flags)
                    line = line.replace("/v_kly", "")

            if ".p" in main_flag or "np" in main_flag:
                if util.person(flags):
                    line = line.replace("p:v_naz", "p:v_naz/v_kly")
    
                if util.istota(flags):
                    line = line.replace("p:v_rod", "p:v_rod/v_zna")
                    if ">" in flags: # animal
                        line = line.replace("p:v_naz", "p:v_naz/v_zna")
                else:
                    line = line.replace("p:v_naz", "p:v_naz/v_zna")

            
        elif ":perf" in flags and ":pres" in line:
            line = line.replace(":pres", ":futr")
            
        elif main_flag.startswith("/adj"):
            if "<" in flags or "^noun" in flags:
                if ":uncontr" in line:
                    continue
                
            if "<" in flags:
                if not ">" in flags and ":p:v_naz/v_zna" in line:
                    line = line.replace("v_naz/v_zna", "v_naz/v_kly")
                if ":m:v_naz" in line and not "<+" in flags:
                    line = line.replace("v_naz", "v_naz/v_kly")
            elif "^noun" in flags:
                if ":m:v_rod/v_zna" in line:
                    line = line.replace("v_rod/v_zna", "v_rod")
                elif ":p:v_rod/v_zna" in line:
                    line = line.replace("v_rod/v_zna", "v_rod")

    
#            if "<" in flags:
#                if util.person(flags):
#                    line = line.replace("p:v_naz", "p:v_naz/v_kly")
#    
#                if util.istota(flags):
#                    line = line.replace("p:v_rod", "p:v_rod/v_zna")
#                    if ">" in flags: # animal
#                        line = line.replace("p:v_naz", "p:v_naz/v_zna")
#                else:
#                    line = line.replace("p:v_naz", "p:v_naz/v_zna")

        lines2.append(line)

    return lines2