Ejemplo n.º 1
0
def expand(word, flags, flush_stdout):

    flag_set = flags.split(" ", 1)
    main_flag = flag_set[0]
  
    if len(flag_set) > 1:
        extra = flag_set[1]
    else:
        extra = ""

    modifiers = get_modifiers(extra, flags, word)

    if main_flag[0] == "/":
        inflection_flag = main_flag[1:]
        sfx_lines = expand_suffixes(word, inflection_flag, modifiers, extra)
        sfx_lines = adjust_affix_tags(sfx_lines, main_flag, flags, modifiers)
    else:
        sfx_lines = [word + " " + word + " " + flags]


    sfx_lines = [ line.replace("v_zna", "v_zn1") if "adj:m:v_rod/v_zna" in line else line for line in sfx_lines ]

    sfx_lines = affix.expand_alts(sfx_lines, "//")  # TODO: change this to some single-char splitter?
    sfx_lines = affix.expand_alts(sfx_lines, "/")

    if "/adj" in flags:
        out_lines = []
        for line in sfx_lines:
            if "v_zn1" in line:
                if "^noun" in flags or "<" in flags:
                    line = line.replace("v_zn1", "v_zna")
                else:
                    line = line.replace("v_zn1", "v_zna:ranim")
            elif "v_zn2" in line:
                if "^noun" in flags or "<" in flags:
                    line = line.replace("v_zn2", "v_zna")
                else:
                    line = line.replace("v_zn2", "v_zna:rinanim")
            out_lines.append(line)
        sfx_lines = out_lines

    if main_flag[0] != "/":
        sfx_lines = util.expand_nv(sfx_lines)

    sfx_lines = modify(sfx_lines, modifiers)


    if "\\" in flags:
        for i in range(0, len(sfx_lines)):
            sfx_lines[i] = sfx_lines[i] + ":compb"

    words = post_expand(sfx_lines, flags)

    return words
Ejemplo n.º 2
0
def expand_line(line, flush_stdout):
    global main_word
    global main_flag
    global last_adv

    lines = preprocess(line)
    
    out_lines = []

    for line in lines:
        sub_lines = []
        
        #  +cs
        if "\\ +" in line:
            
            line, *sub_lines = line.split("\\")
            line = line.rstrip()
            if " :" in line or not " /" in line:
                line += ":compb"
            else:
                line += " :compb"

#            print(" \\+", line, file=sys.stderr)
                    
#            main_word = line
#            sublines = expand_subposition(main_word, line)
#            out_lines.extend( sublines )
            
        # word lemma tags
        elif word_lemma_re.search(line):
            if "/" in line:
                exp_lines = affix.expand_alts([line], "//")  # TODO: change this to some single-char splitter?
                try:
                    exp_lines = affix.expand_alts(exp_lines, "/")
                except:
                    print("Failed to expand", exp_lines, file=sys.stderr)
                    raise
            else:
                exp_lines = [ line ]

            if ":nv" in line and not "v_" in line:
                exp_lines = util.expand_nv(exp_lines)
                
            out_lines.extend( exp_lines )
            
            continue
        
        # word tags
        # word /flags [mods] [tags]
        try:
            word, flags = line.split(" ", 1)
        except:
            print("Failed to find flags in", line, file=sys.stderr)
            raise
          
        main_word = word
        
        inflected_lines = expand(word, flags, flush_stdout)
        
        if sub_lines:
            idx = 0
            for sub_line in sub_lines:
                if flags.startswith("adv:"):
                    extra_flags = flags[3:].replace(":compb", "")
    #                util.dbg("sub_lines: %s, %s", flags, extra_flags)
                elif " :" in flags or flags.startswith(":"):
                    extra_flags = re_search("(^| )(:[^ ]+)", flags).group(2).replace(":compb", "")
    #                 util.dbg("===", extra_flags)
                else:
                    extra_flags = ""
            
                if " adv" in line:
                    sublines = expand_subposition_adv_main(main_word, sub_line, extra_flags)
                else:
                    sublines = expand_subposition(main_word, sub_line, extra_flags, idx)
                    
                out_lines.extend( sublines )
            
                if ".adv" in line and "/adj" in line:
                    for inflected_line in inflected_lines:
                        if " adv" in inflected_line:
                            last_adv = inflected_line.split()[0]
                            cs_lines = expand_subposition_adv(last_adv, sub_line, extra_flags)
                            out_lines.extend(cs_lines)
                            break
#                    print(".adv", last_adv, file=sys.stderr)

                idx += 1
        
        out_lines.extend( inflected_lines )
        
        for l in inflected_lines:
            if not l.strip():
                raise Exception("empty liner", inflected_lines)

    return post_process(out_lines)