def _extractFeatures(self):
        goodWordFrequencies = {
            word: TextHelpers.countwordoccurences(word, self._goodText) /
            self._goodWordCount
            for word in {x
                         for x in TextHelpers.getwords(self._goodText)}
        }

        badWordFrequencies = {
            word: TextHelpers.countwordoccurences(word, self._badText) /
            self._badWordCount
            for word in {x
                         for x in TextHelpers.getwords(self._goodText)}
        }

        keptWords = sorted(goodWordFrequencies.keys(),
                           key=(lambda k: goodWordFrequencies[k] -
                                badWordFrequencies.get(k, 0)),
                           reverse=True)[:self._featureNumber]

        result = [WordClassificationFeature(x) for x in keptWords]

        return result
Esempio n. 2
0
 def CountOccurrences(self, text):
     return TextHelpers.countwordoccurences(self._word, text)