Exemple #1
0
import nltk.stem.porter as pt
import nltk.stem.lancaster as lc
import nltk.stem.snowball as sb

words = [
    'table', 'probably', 'wolves', 'playing', 'is', 'dog', 'the', 'beaches',
    'grounded', 'dreamt', 'envision'
]

pt_stemmer = pt.PorterStemmer()  # 波特词干提取器,偏宽松
lc_stemmer = lc.LancasterStemmer()  # 朗卡斯特词干提取器,偏严格
sb_stemmer = sb.SnowballStemmer('english')  # 思诺博词干提取器,偏中庸
for word in words:
    pt_stem = pt_stemmer.stem(word)
    lc_stem = lc_stemmer.stem(word)
    sb_stem = sb_stemmer.stem(word)
    print('%8s %8s %8s %8s' % (word, pt_stem, lc_stem, sb_stem))
Exemple #2
0
def word_lemmatizer(word):
    "stem words"
    lc_stemmer = lc.LancasterStemmer()
    lc_stem = lc_stemmer.stem(word)
    return lc_stem
Exemple #3
0
import nltk.stem.porter as pt 
import nltk.stem.lancaster as lc
import nltk.stem.snowball as sb 
import nltk.stem as ns 

words = ['table', 'probably', 'wolves', 'playing', 'is', 'the',
        'beaches', 'grounded', 'dreamt', 'envision']
    
pt_stemmer = pt.PorterStemmer()         # 偏宽松
lc_stemmer = lc.LancasterStemmer()      # 偏严格
sb_stemmer = sb.SnowballStemmer('english')  # 偏中庸
lemmetizer = ns.WordNetLemmatizer()

for word in words:
    pt_stem = pt_stemmer.stem(word)
    lc_stem = lc_stemmer.stem(word)
    sb_stem = sb_stemmer.stem(word)
    print('%8s %8s %8s %8s' % (word, pt_stem, lc_stem, sb_stem))
Exemple #4
0
import nltk.stem.porter as pt
import nltk.stem.lancaster as lc
import nltk.stem.snowball as sb
words = [
    'table', 'probably', 'wolves', 'playing', 'is', 'dog', 'the ', 'beeches',
    'grounded', 'dreamt', 'envision'
]
stemmer = pt.PorterStemmer()
for word in words:
    stem = stemmer.stem(word)
    print(stem)
print('-' * 80)
stemmer = lc.LancasterStemmer()
for word in words:
    stem = stemmer.stem(word)
    print(stem)
print('-' * 80)
stemmer = sb.SnowballStemmer('english')
for word in words:
    stem = stemmer.stem(word)
    print(stem)
Exemple #5
0
@author: Administrator
"""

import nltk.stem.porter as pt
import nltk.stem.lancaster as lc
import nltk.stem.snowball as sb

words = [
    'table', 'probably', 'wolves', 'dreamt', 'palying', 'is', 'beaches',
    'envision', 'grounded'
]
'''
提取词干
'''
stemmer_porter = pt.PorterStemmer()  # 偏宽松
stemmer_lancaster = lc.LancasterStemmer()  # 偏严格
stemmer_snowball = sb.SnowballStemmer('english')  # 适中
for word in words:
    pstem = stemmer_porter.stem(word)
    lstem = stemmer_lancaster.stem(word)
    sstem = stemmer_snowball.stem(word)
    print('{:10} {:10} {:10} {:10}'.format(word, pstem, lstem, sstem))
    #table      tabl       tabl       tabl
    #probably   probabl    prob       probabl
    #wolves     wolv       wolv       wolv
    #dreamt     dreamt     dreamt     dreamt
    #palying    pali       paly       pali
    #is         is         is         is
    #beaches    beach      beach      beach
    #envision   envis      envid      envis
    #grounded   ground     ground     ground