예제 #1
0
파일: textanalysis.py 프로젝트: 72Zn/pyling
def getAllWords(stopword, allDocs, porterstemmer):
    wordlist = {}
    tmp = 1
    for news in allDocs:
        line_split = news.split()
        for words in line_split:
            if not (stopword.has_key(words)):
                words = porterstemmer.stem(words, 0, len(words) - 1)
                if (len(words) > 1) and not (stopword.has_key(words)):
                    if words.find("'") > -1:
                        print "!" + words + "!"
                    temp = wordlist.get(words, 0)
                    wordlist[words] = temp + 1
    return wordlist
예제 #2
0
 def process(self, words):
     return [porterstemmer.stem(word) for word in words]
예제 #3
0
 def process(self, words):
     return [porterstemmer.stem(word) for word in words]