Ejemplo n.º 1
0
    def Lemmatisation(self):
        tagger = naftawayh.wordtag.WordTagger()
        ws = self.Pretraitement()
        ArListem = ArabicLightStemmer()
        words_root = []
        words_all = {}
        words_all['words'] = []
        for w in ws:
            #if not tagger.is_noun(w):
            stem = ArListem.light_stem(w)
            ww = ArListem.get_prefix() + " + " + ArListem.get_stem(
            ) + " + " + ArListem.get_suffix()
            words_all['words'].append(ww)
            words_root.append(ArListem.get_stem())

        self.aff(words_all)

        result = json.dumps(words_all, ensure_ascii=False,
                            indent=4).encode('utf-8')
        return words_root
Ejemplo n.º 2
0
#tag words
for l in corps:
    ps=nlp.pos_tag(l)
    if ps[0][0]==u'\ufeff': #ZERO WIDTH NO-BREAK SPACE
        ps=ps[1:]
    dp=nlp.dependency_parse(l)
    dp2=[]
    if len(dp)==len(ps):
        i = dp[0][2]
        for ind,w in enumerate(dp):
            if ind+1==i:
                dp2.append(w)
                dp2.append(("NONE",i,i))
            else:
                dp2.append(w)
    else:
        dp2=dp
    dp2 = dp2[1:]
    
    for ind,w in enumerate(ps) :
        stem = ArListem.light_stem(w[0])
        pre = ArListem.get_prefix()
        suf = ArListem.get_suffix()
        ls.append(w[0]+"|"+w[1]+"|"+dp2[ind][0]+"|"+str(dp2[ind][1]-1)+"|"+func([w[0],w[1]],classifier)+"p="+pre+"|s="+suf+"\n")
    ls.append(". PUNC\n")

corpw.writelines(ls)

corp.close()
corpw.close()
Ejemplo n.º 3
0
'''
Created on 15 juin 2019

@author: KHALID-RAMI
'''
# coding=utf8
import pyarabic.arabrepr
from tashaphyne.stemming import ArabicLightStemmer
arepr = pyarabic.arabrepr.ArabicRepr()
repr = arepr.repr
ArListem = ArabicLightStemmer()
word = u'قال'
stem = ArListem.light_stem(word)
print(ArListem.get_stem())
print(ArListem.get_root())
print(ArListem.get_left())
print(ArListem.get_prefix(2))
print(ArListem.get_right())
print(ArListem.get_unvocalized())