def get_stem(text): 'return list by all bearable stems' if len(text) < 3: return stems = [] if daw_araby.stripTashkeel(text) == u'الله': stems.append(u'الله') return stems text = daw_araby.stripHarakat(text) without_prefix = _del_prefix(text) for a in without_prefix: without_suffix = _del_suffix(a) for b in without_suffix: if len(b) <= 9: stems.append(b) return stems
def get_root(text): roots = {'lisan':[], 'assas':[], 'taje':[], 'mekhtar':[]} if len(text) < 3: return text = daw_araby.stripHarakat(text) stems = get_stem(text) all_term = get_stem(text) for a in stems: if a[-1] == text[-1] != u"ي": text0 = a+u"ي" without_augment1 = _del_augment(text0) for b in without_augment1: if len(b) <= 5 and b not in roots: for c in daw_araby.i3lal(b): all_term.append(c) if is_root_lisan(c): if c not in roots['lisan']: roots['lisan'].append(c) if is_root_taje(c): if c not in roots['taje']: roots['taje'].append(c) if is_root_assas(c): if c not in roots['assas']: roots['assas'].append(c) if is_root_mekhtar(c): if c not in roots['mekhtar']: roots['mekhtar'].append(c) text1, text2 = daw_araby.del_tense(a) without_augment1 = _del_augment(text1) for b in without_augment1: if len(b) <= 5 and b not in roots: for c in daw_araby.i3lal(b): all_term.append(c) if is_root_lisan(c): if c not in roots['lisan']: roots['lisan'].append(c) if is_root_taje(c): if c not in roots['taje']: roots['taje'].append(c) if is_root_assas(c): if c not in roots['assas']: roots['assas'].append(c) if is_root_mekhtar(c): if c not in roots['mekhtar']: roots['mekhtar'].append(c) if text1 != text2: without_augment2 = _del_augment(text2) for b in without_augment2: if len(b) <= 5 and b not in roots: for c in daw_araby.i3lal(b): all_term.append(c) if is_root_lisan(c): if c not in roots['lisan']: roots['lisan'].append(c) if is_root_taje(c): if c not in roots['taje']: roots['taje'].append(c) if is_root_assas(c): if c not in roots['assas']: roots['assas'].append(c) if is_root_mekhtar(c): if c not in roots['mekhtar']: roots['mekhtar'].append(c) return roots, all_term