import nltk.stem.porter as pt import nltk.stem.lancaster as lc import nltk.stem.snowball as sb words = [ 'table', 'probably', 'wolves', 'playing', 'is', 'dog', 'the', 'beaches', 'grounded', 'dreamt', 'envision' ] pt_stemmer = pt.PorterStemmer() # 波特词干提取器,偏宽松 lc_stemmer = lc.LancasterStemmer() # 朗卡斯特词干提取器,偏严格 sb_stemmer = sb.SnowballStemmer('english') # 思诺博词干提取器,偏中庸 for word in words: pt_stem = pt_stemmer.stem(word) lc_stem = lc_stemmer.stem(word) sb_stem = sb_stemmer.stem(word) print('%8s %8s %8s %8s' % (word, pt_stem, lc_stem, sb_stem))
def word_lemmatizer(word): "stem words" lc_stemmer = lc.LancasterStemmer() lc_stem = lc_stemmer.stem(word) return lc_stem
import nltk.stem.porter as pt import nltk.stem.lancaster as lc import nltk.stem.snowball as sb import nltk.stem as ns words = ['table', 'probably', 'wolves', 'playing', 'is', 'the', 'beaches', 'grounded', 'dreamt', 'envision'] pt_stemmer = pt.PorterStemmer() # 偏宽松 lc_stemmer = lc.LancasterStemmer() # 偏严格 sb_stemmer = sb.SnowballStemmer('english') # 偏中庸 lemmetizer = ns.WordNetLemmatizer() for word in words: pt_stem = pt_stemmer.stem(word) lc_stem = lc_stemmer.stem(word) sb_stem = sb_stemmer.stem(word) print('%8s %8s %8s %8s' % (word, pt_stem, lc_stem, sb_stem))
import nltk.stem.porter as pt import nltk.stem.lancaster as lc import nltk.stem.snowball as sb words = [ 'table', 'probably', 'wolves', 'playing', 'is', 'dog', 'the ', 'beeches', 'grounded', 'dreamt', 'envision' ] stemmer = pt.PorterStemmer() for word in words: stem = stemmer.stem(word) print(stem) print('-' * 80) stemmer = lc.LancasterStemmer() for word in words: stem = stemmer.stem(word) print(stem) print('-' * 80) stemmer = sb.SnowballStemmer('english') for word in words: stem = stemmer.stem(word) print(stem)
@author: Administrator """ import nltk.stem.porter as pt import nltk.stem.lancaster as lc import nltk.stem.snowball as sb words = [ 'table', 'probably', 'wolves', 'dreamt', 'palying', 'is', 'beaches', 'envision', 'grounded' ] ''' 提取词干 ''' stemmer_porter = pt.PorterStemmer() # 偏宽松 stemmer_lancaster = lc.LancasterStemmer() # 偏严格 stemmer_snowball = sb.SnowballStemmer('english') # 适中 for word in words: pstem = stemmer_porter.stem(word) lstem = stemmer_lancaster.stem(word) sstem = stemmer_snowball.stem(word) print('{:10} {:10} {:10} {:10}'.format(word, pstem, lstem, sstem)) #table tabl tabl tabl #probably probabl prob probabl #wolves wolv wolv wolv #dreamt dreamt dreamt dreamt #palying pali paly pali #is is is is #beaches beach beach beach #envision envis envid envis #grounded ground ground ground