Ejemplo n.º 1
0
def get_wordarr(st):
    if is_terminalc(st.type):
        return st.text
    r = []
    for c in st.left + st.blocks + st.right:
        if is_terminalc(c.type) and c.meaning == None: continue
        if get_wordarr(c) != None:
            r += get_wordarr(c)
    return r
Ejemplo n.º 2
0
def pre_morphen(lang, st, ind = 0):
    '''Init's the main morpho-groups before doing morphological generation'''
    st.parent = None
    if is_terminalc(st.type):
        if not st.fixed or is_empty_word(st):
            yield st
            return
        variants = [x for x in lang.words if all(y == concept["tags"] or eqx(y, x, st) for y in concept)] #musteq
        variants = score_tags(variants, st)
        new_variants = []
        if PRINT_TO_CONSOLE and len(variants) > 1: print("CAUTION: Ambiguity in 'pre_morphen' at %s" % st.descr())
        for variant in variants:
            for new_variant in new_variants:
                for f in st.fixed:
                    if variant.attr(f) != new_variant.attr(f):
                        break
                else:
                    break
            else:
                for f in st.fixed: #################################################
                    if variant.attr(f) == NONE_VALUE:
                        print("xxxxxxxxxxxpre_morphenpre_morphenpre_morphenpre_morphen")
                        break#############################################################################################################################
                else:
                    new_variants += [variant]
        variants = new_variants
        if variants == []:
            return
        for variant in variants:
            c = deepcopy(st)
            for f in c.fixed:
                c.attr(f, variant.attr(f))
            yield c
        return

    res = []
    for x in pre_morphen(lang, (st.left + st.blocks + st.right)[ind]):
        c = deepcopy(st)
        if ind < len(c.left):
            c.left[ind] = x
        elif ind < len(c.left + c.blocks):
            c.blocks[ind - len(c.left)] = x
        else:
            c.right[ind - len(c.left + c.blocks)] = x
        x.parent = c
        if not x.refresh(): continue
        res += [c]

    res = the_bests(lang, eupony_score, res)

    for c in res:
        if ind + 1 == len(c.left + c.blocks + c.right):
            yield c
        else:
            for y in pre_morphen(lang, c, ind + 1):
                yield y
Ejemplo n.º 3
0
def get_tranarr(st):
    if st.type is None:
        raise Exception('%s has None type' % st.meaning)
    if is_terminalc(st.type):
        return st.transcription
    r = []
    for c in st.left + st.cblocks + st.right:
        t = get_tranarr(c)
        if t != None:
            r += t
    return r
Ejemplo n.º 4
0
def lang_to_case_frame(unit):
    '''Translates the source language AST to Interlingua'''
    cf = {}
    if is_terminalc(unit.type):
        for c in transferred_attributes[unit.type]:
            cf[c] = unit.attr(c)
            if cf[c] == None and c in default.keys():
                cf[c] = default[c]
    elif len(unit.blocks) > 1 and not unit.relation is None:
        t = unit.blocks[0].type
        cf[concept["conj-str"]] = unit.relation.conj_str
        cf[concept["conj-type"]] = unit.relation.conj_type
        cf[concept["conj-function"]] = unit.relation.function_number
        cf[t] = [lang_to_case_frame(y)[y.type] for y in unit.blocks]
    else:
        for e in unit.left + unit.right:
            if not e.type in modificators: ##and (e.type != TERMINAL_DETERMINER or not e.attr(concept["difinity"])): #OR JUST TERMINAL_ARTICLE
                update_dict(cf, lang_to_case_frame(e))
        for c in transferred_attributes[unit.type]:
            if unit.attr(c) == None:
                if c in default:
                    cf[c] = default[c]
                elif not c in exclusions:
                    print(unit.type, c)
                    raise NotImplementedError
                    #continue
            else:
                cf[c] = unit.attr(c)

        if len(unit.blocks) == 1:
            if not cf.get(concept["quantity"], None) in [None, NONE_VALUE]:
                transferred_attributes[const.type["noun"]].remove(concept["real-number"])
            cf.update(lang_to_case_frame(unit.blocks[0]))
            if not cf.get(concept["quantity"], None) in [None, NONE_VALUE]:
                transferred_attributes[const.type["noun"]].append(concept["real-number"])
        else:
            for b in unit.blocks:
                if b.type in cf:
                    cf[b.type] += [lang_to_case_frame(b)[b.type]]
                else:
                    cf[b.type] = [lang_to_case_frame(b)[b.type]]

#        #mosaic translation
#        if not curr_type in cf.keys():
#            for i in min_str.get(curr_type, []):
#                if not i in cf.keys():
#                    print(str(i), str(curr_type), cf)
#                    raise NotImplementedError()

    return {unit.type: cf}
Ejemplo n.º 5
0
def morphen(lang, st, ind = 0):
    '''Morphological generation'''
    st.parent = None
    if is_terminalc(st.type):
        if is_empty_word(st):
            yield st
            return

        variants = [x for x in lang.words if all(y == concept["tags"] or eqx(y, x, st) for y in concept)] #musteq
        variants = score_tags(variants, st)

        if variants == [] or variants is None: raise Exception("Not found in the '%s' dictionary %s" % (lang.name,  st.descr()))
        if PRINT_TO_CONSOLE and len(variants) > 1:
            tmp = str([x.descr() for x in variants])
            print("CAUTION: Ambiguity in 'morphen' at %s alternatives %d (%s)" % (st.descr(), len(variants), tmp))
        for variant in variants:
            c = deepcopy(st)
            if c.type is None: raise Exception('%s hasn\'t type' % str(st))
            c.text = variant.text
            c.transcription = variant.transcription
            for p in concept: c.attr(p, variant.attr(p))
            yield c
        return
    res = []
    for x in morphen(lang, (st.left+st.blocks+st.right)[ind]):
        c = deepcopy(st)
        if ind < len(c.left):
            c.left[ind] = x
        elif ind < len(c.left + c.blocks):
            c.blocks[ind - len(c.left)] = x
        else:
            c.right[ind - len(c.left + c.blocks)] = x
        x.parent = c
        if not x.refresh():
            continue
        res += [c]

    res = the_bests(lang, eupony_score, res)
    for c in res:
        if ind + 1 == len(c.left+c.blocks+c.right):
            yield c
        else:
            for y in morphen(lang, c, ind + 1):
                yield y
Ejemplo n.º 6
0
Archivo: kb.py Proyecto: arne-cl/fosay
def meaning_shift(cf, lang):
    '''Interlingua approximation'''
    if type(cf) != type({}):
        return cf
    for key in [x for x in cf.keys()]: #beacause of conjunctions is the error!
        #print(key)
        if is_terminalc(key):
            if type(cf[key]) == type([]):
                r = []
                for c in cf[key]:
                    r += [meaning_shift(c, lang)]
                cf[key] = r
            else:
                lemma = cf[key][concept["lemma"]]
                if not lemma is None and not lemma in lang.meanings.keys() \
                    and cf[key][concept["lemma"]] in cf_diff:
                    #replace with deffination
                    #checking for properness
                    #print(cf[key][concept["lemma"]], cf_diff[cf[key][concept["lemma"]]])
                    q = cf_diff[cf[key][concept["lemma"]]]
                    temp = meaning_shift(q, lang)
                    tt = cf[key][concept["order-number"]]
                    del cf[key]
                    h = temp[[k for k in temp.keys()][0]] #because of {16: {16:
                    #WTF? IT WILL CHECK IT TWICE!!!
                    for key in h.keys():
                        cf[key] = h[key]
                    cf[concept["order-number"]] = tt
                    #print(cf)
###        elif type(cf[key]) == tuple:
###            ci, children = cf[key]
###            r = []
###            for c in children:
###                r += [meaning_shift(c, lang)]
###            cf[key] = ci, r
        else:
            cf[key] = meaning_shift(cf[key], lang)
    return cf