def _extractFeatures(self): goodWordFrequencies = { word: TextHelpers.countwordoccurences(word, self._goodText) / self._goodWordCount for word in {x for x in TextHelpers.getwords(self._goodText)} } badWordFrequencies = { word: TextHelpers.countwordoccurences(word, self._badText) / self._badWordCount for word in {x for x in TextHelpers.getwords(self._goodText)} } keptWords = sorted(goodWordFrequencies.keys(), key=(lambda k: goodWordFrequencies[k] - badWordFrequencies.get(k, 0)), reverse=True)[:self._featureNumber] result = [WordClassificationFeature(x) for x in keptWords] return result
def CountOccurrences(self, text): return TextHelpers.countwordoccurences(self._word, text)