Esempio n. 1
0
def get_stem(text):
    'return list by all bearable stems'
    if len(text) < 3: return
    stems = []
    if daw_araby.stripTashkeel(text) == u'الله': 
        stems.append(u'الله')
        return stems
    text = daw_araby.stripHarakat(text)
    without_prefix = _del_prefix(text)
    for a in without_prefix:
        without_suffix = _del_suffix(a)
        for b in without_suffix:
            if len(b) <= 9:
                stems.append(b)
    return stems
Esempio n. 2
0
def get_root(text):
    roots = {'lisan':[], 'assas':[], 'taje':[], 'mekhtar':[]}
    if len(text) < 3: return
    text = daw_araby.stripHarakat(text)
    stems = get_stem(text)
    all_term = get_stem(text)
    for a in stems:
        if a[-1] == text[-1] != u"ي":
            text0 = a+u"ي"
            without_augment1 = _del_augment(text0)
            for b in without_augment1:
                if len(b) <= 5 and b not in roots:
                    for c in daw_araby.i3lal(b):
                        all_term.append(c)
                        if is_root_lisan(c):
                            if c not in roots['lisan']:
                                roots['lisan'].append(c)
                        if is_root_taje(c):
                            if c not in roots['taje']:
                                roots['taje'].append(c)
                        if is_root_assas(c):
                            if c not in roots['assas']:
                                roots['assas'].append(c)
                        if is_root_mekhtar(c):
                            if c not in roots['mekhtar']:
                                roots['mekhtar'].append(c)
        text1, text2 = daw_araby.del_tense(a)
        without_augment1 = _del_augment(text1)
        for b in without_augment1:
            if len(b) <= 5 and b not in roots:
                for c in daw_araby.i3lal(b):
                    all_term.append(c)
                    if is_root_lisan(c):
                        if c not in roots['lisan']:
                            roots['lisan'].append(c)
                    if is_root_taje(c):
                        if c not in roots['taje']:
                            roots['taje'].append(c)
                    if is_root_assas(c):
                        if c not in roots['assas']:
                            roots['assas'].append(c)
                    if is_root_mekhtar(c):
                        if c not in roots['mekhtar']:
                            roots['mekhtar'].append(c)
                
        if text1 != text2:
            without_augment2 = _del_augment(text2)
            for b in without_augment2:
                if len(b) <= 5 and b not in roots:
                    for c in daw_araby.i3lal(b):
                        all_term.append(c)
                        if is_root_lisan(c):
                            if c not in roots['lisan']:
                                roots['lisan'].append(c)
                        if is_root_taje(c):
                            if c not in roots['taje']:
                                roots['taje'].append(c)
                        if is_root_assas(c):
                            if c not in roots['assas']:
                                roots['assas'].append(c)
                        if is_root_mekhtar(c):
                            if c not in roots['mekhtar']:
                                roots['mekhtar'].append(c)
    return roots, all_term