Beispiel #1
0
def getAllWords(stopword, allDocs, porterstemmer):
    wordlist = {}
    tmp = 1
    for news in allDocs:
        line_split = news.split()
        for words in line_split:
            if not (stopword.has_key(words)):
                words = porterstemmer.stem(words, 0, len(words) - 1)
                if (len(words) > 1) and not (stopword.has_key(words)):
                    if words.find("'") > -1:
                        print "!" + words + "!"
                    temp = wordlist.get(words, 0)
                    wordlist[words] = temp + 1
    return wordlist
Beispiel #2
0
 def process(self, words):
     return [porterstemmer.stem(word) for word in words]
Beispiel #3
0
 def process(self, words):
     return [porterstemmer.stem(word) for word in words]