Python ISRIStemmer Examples

Programming Language: Python

Namespace/Package Name: nltk

Class/Type: ISRIStemmer

Examples at hotexamples.com: 7

Python ISRIStemmer - 7 examples found. These are the top rated real world Python examples of nltk.ISRIStemmer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ISRIStemmer(6)

stem(4)

norm(2)

pre32(2)

suf32(2)

waw(2)

Example #1

Show file

def get_root(text):
    arabic_stemmer = ISRIStemmer()
    if not isinstance(text, list):
        word_list = text.split()
    else:
        word_list = text
    result = list()
    for word in text:
        root = arabic_stemmer.stem(word)
        result.append(root)
    return result

Example #2

Show file

 def lookUpWordScore(self, word, lexicon, use_lemma):
     stemmer = ISRIStemmer()
     for key in lexicon.iterkeys():
         if key == word:
             return lexicon[key]
     for key in lexicon.iterkeys():
         if stemmer.stem(key) == stemmer.stem(word):
             print word
             print key
             return lexicon[key] * 0.25
     for key in lexicon.iterkeys():
         med = nltk.metrics.edit_distance(word, key)
         match = 1 - (float(med) / len(word))
         if match > 0.7:
             return lexicon[key] * 0.25
     return 0

Example #3

Show file

File: SA_l.py Project: MohamedAbdalkader/arabic_sentiment_analysis

 def lookUpWordScore(self, word, lexicon, use_lemma):
     stemmer = ISRIStemmer()
     for key in lexicon.iterkeys():
         if key == word:
             return lexicon[key]
     for key in lexicon.iterkeys():
         if stemmer.stem(key) == stemmer.stem(word):
             print word
             print key
             return lexicon[key]*0.25
     for key in lexicon.iterkeys():
         med = nltk.metrics.edit_distance(word, key)
         match = 1 - (float(med)/len(word))
         if match > 0.7:
             return lexicon[key]*0.25
     return 0

Example #4

Show file

def preprocessing_test_data(user_string):
    tokens_array = n.tokenize._treebank_word_tokenizer.tokenize(user_string)
    stemmer = ISRIStemmer()
    stop_words = stopwords.words('arabic')
    preprocessing_result_question = list()
    for word in tokens_array:
        word = stemmer.norm(word, num=1)      # remove diacritics which representing Arabic short vowels
        if not word in stop_words:           # exclude stop words from being processed
          word = stemmer.pre32(word)        # remove length three and length two prefixes in this order
          word = stemmer.suf32(word)        # remove length three and length two suffixes in this order
          word = stemmer.waw(word)          # remove connective ??? if it precedes a word beginning with ???
          word = stemmer.norm(word, num=2)  # normalize initial hamza to bare alif
          preprocessing_result_question.append(word)
    return preprocessing_result_question

Example #5

Show file

def light_stem_word(word):
    original_word = word
    arabic_stemmer = ISRIStemmer()
    # remove diacritics which representing Arabic short vowels
    word = arabic_stemmer.norm(word, num=1)
    # exclude stop words from being processed
    if word not in arabic_stemmer.stop_words:
        # remove length three and length two prefixes in this order
        word = arabic_stemmer.pre32(word)
        # remove length three and length two suffixes in this order
        word = arabic_stemmer.suf32(word)
        # remove connective ‘و’ if it precedes a word beginning with ‘و’
        word = arabic_stemmer.waw(word)
        # normalize initial hamza to bare alif
        word = arabic_stemmer.norm(word, num=2)
    if word not in ar_spell:
        return original_word
    else:
        return word

Example #6

Show file

 def Word_Steamer(self, arr):
     array = []
     stemmer = ISRIStemmer()
     for words in arr:
         array.append(stemmer.stem(words))
     return array

Example #7

Show file

def get_root_word(word):
    arabic_stemmer = ISRIStemmer()
    root = arabic_stemmer.stem(word)
    return root