def _extractFeatures(self):
        goodWordFrequencies = {
            word: TextHelpers.countwordoccurences(word, self._goodText) /
            self._goodWordCount
            for word in {x
                         for x in TextHelpers.getwords(self._goodText)}
        }

        badWordFrequencies = {
            word: TextHelpers.countwordoccurences(word, self._badText) /
            self._badWordCount
            for word in {x
                         for x in TextHelpers.getwords(self._goodText)}
        }

        keptWords = sorted(goodWordFrequencies.keys(),
                           key=(lambda k: goodWordFrequencies[k] -
                                badWordFrequencies.get(k, 0)),
                           reverse=True)[:self._featureNumber]

        result = [WordClassificationFeature(x) for x in keptWords]

        return result
    def _extractFeatures(self):
        goodWords = {word for word in TextHelpers.getwords(self._goodText)}
        shortGoodWords = {
            word
            for word in goodWords if len(word) <= self._minlength
        }
        shortWordGains = {x: self._stringGain(x) for x in shortGoodWords}
        longGoodWords = set.difference(goodWords, shortGoodWords)
        stringGains = self._getSubstringGains(longGoodWords)
        allGains = stringGains
        allGains.update(shortWordGains)
        #allGains = { **stringGains, **shortWordGains }

        keptWords = sorted(allGains.keys(),
                           key=(lambda k: allGains[k]),
                           reverse=True)[:self._featureNumber]

        result = [
            StringClassificationFeature(x) for x in keptWords
            if allGains[x] > 1.0
        ]

        return result