def test_full_gain(self): target = lambda x: x % 7 gain = OnlineInformationGain(target, target) entropy = OnlineEntropy(target) for i in range(50): gain.add(i) entropy.add(i) self.assertEqual(gain.get_gain(), entropy.get_entropy()) self.assertGreaterEqual(gain.get_gain(), 0)
def test_full_gain(self): target = lambda x: x % 7 gain = OnlineInformationGain(target, target) entropy = OnlineEntropy(target) for i in xrange(50): gain.add(i) entropy.add(i) self.assertEqual(gain.get_gain(), entropy.get_entropy()) self.assertGreaterEqual(gain.get_gain(), 0)
def importance(self, attribute, examples): """ AIMA implies that importance should be information gain. Since AIMA only defines it for binary features this implementation was based on the wikipedia article: http://en.wikipedia.org/wiki/Information_gain_in_decision_trees """ gain_counter = OnlineInformationGain(attribute, self.target) for example in examples: gain_counter.add(example) return gain_counter.get_gain()
def importance(self, attribute, examples): """ AIMA implies that importance should be information gain. Since AIMA only defines it for binary features this implementation was based on the wikipedia article: http://en.wikipedia.org/wiki/Information_gain_in_decision_trees """ gain_counter = OnlineInformationGain(attribute, self.target) for example in examples: gain_counter.add(example) return gain_counter.get_gain()
def test_no_gain(self): f = lambda x: None gain = OnlineInformationGain(f, f) for i in range(30): gain.add(i) self.assertEqual(gain.get_gain(), 0)
def test_starts_in_zero(self): gain = OnlineInformationGain(lambda x: None, lambda x: None) self.assertEqual(gain.get_gain(), 0) self.assertEqual(list(gain.get_target_class_counts().items()), []) self.assertEqual(gain.get_branches(), [])
def test_no_gain(self): f = lambda x: None gain = OnlineInformationGain(f, f) for i in xrange(30): gain.add(i) self.assertEqual(gain.get_gain(), 0)
def test_starts_in_zero(self): gain = OnlineInformationGain(lambda x: None, lambda x: None) self.assertEqual(gain.get_gain(), 0) self.assertEqual(gain.get_target_class_counts().items(), []) self.assertEqual(gain.get_branches(), [])