def lightStemAr(word_list): result = [] arstemmer = ISRIStemmer() for word in word_list: word = arstemmer.norm(word, num=1) # remove diacritics which representing Arabic short vowels if not word in arstemmer.stop_words: # exclude stop words from being processed word = arstemmer.pre32(word) # remove length three and length two prefixes in this order word = arstemmer.suf32(word) # remove length three and length two suffixes in this order word = arstemmer.waw(word) # remove connective ‘و’ if it precedes a word beginning with ‘و’ word = arstemmer.norm(word, num=2) # normalize initial hamza to bare alif result.append(word) return ' '.join(result)
def lightStemAr(word_list): result = [] arstemmer = ISRIStemmer() for word in word_list: word = arstemmer.norm(word, num=1) # remove diacritics which representing Arabic short vowels if not word in arstemmer.stop_words: # exclude stop words from being processed word = arstemmer.pre32(word) # remove length three and length two prefixes in this order word = arstemmer.suf32(word) # remove length three and length two suffixes in this order word = arstemmer.waw(word) # remove connective ‘و’ if it precedes a word beginning with ‘و’ word = arstemmer.norm(word, num=2) # normalize initial hamza to bare alif result.append(word) return ' '.join(result)
def light_stem(text): words = text result = list() stemmer = ISRIStemmer() for word in words: word = stemmer.norm(word, num=1) if word not in stemmer.stop_words: word = stemmer.pre32(word) word = stemmer.suf32(word) word = stemmer.waw(word) word = stemmer.norm(word, num=2) result.append(word) return ' '.join(result)
def light_stem(text): words = text.split() result = list() stemmer = ISRIStemmer() for word in words: word = stemmer.norm(word, num=1) # remove diacritics which representing Arabic short vowels if not word in stemmer.stop_words: # exclude stop words from being processed word = stemmer.pre32(word) # remove length three and length two prefixes in this order word = stemmer.suf32(word) # remove length three and length two suffixes in this order word = stemmer.waw(word) # remove connective ‘و’ if it precedes a word beginning with ‘و’ word = stemmer.norm(word, num=2) # normalize initial hamza to bare alif # word=stemmer.pro_w4(word) #process length four patterns and extract length three roots # word=stemmer.pro_w53(word) #process length five patterns and extract length three roots # word=stemmer.pro_w54(word) #process length five patterns and extract length four roots # word=stemmer.end_w5(word) #ending step (word of length five) # word=stemmer.pro_w6(word) #process length six patterns and extract length three roots # word=stemmer.pro_w64(word) #process length six patterns and extract length four roots # word=stemmer.end_w6(word) #ending step (word of length six) # word=stemmer.suf1(word) #normalize short sufix # word=stemmer.pre1(word) #normalize short prefix result.append(word) return ' '.join(result)