def Choosing_the_Best_Split(): print '>>Choosing the Best Split' reload(treepredict) print treepredict.giniimpurity(treepredict.my_data) print treepredict.entropy(treepredict.my_data) set1,set2=treepredict.divideset(treepredict.my_data,2,'yes') print treepredict.entropy(set1) print treepredict.giniimpurity(set1)
import treepredict print treepredict.divideset(treepredict.my_data,2,'yes') print treepredict.divideset1(treepredict.my_data,2,'yes') print treepredict.divideset(treepredict.my_data,3,20) print treepredict.divideset1(treepredict.my_data,3,20)
#!/usr/bin/python # # Script to demonstrate the CART-like DT classifier from # Chapter 7 of "Programming Collective Intelligence" by # T. Segaran, O'Reilly, (c) 2007 # import treepredict results = treepredict.divideset(treepredict.my_data, 2, "yes") # results is now a list of lists # See if records are divided according to FAQ field (column) ... print "\nDivision on Read FAQ field...\n" for list in results: for item in list: print "%15s %15s %5s %10d %15s" % tuple(item) # Let's see the difference between gini- and entropy-based impurities # of the current data (no splitting) print "\nParent node...\n" gini = treepredict.giniimpurity(treepredict.my_data) entr = treepredict.entropy(treepredict.my_data) print "Gini: %8f Entropy: %8f" % (gini, entr) # Let's now split on the Read FAQ field and assess impurity node1, node2 = treepredict.divideset(treepredict.my_data, 2, "yes") print "\nRead FAQ = Yes leaf node...\n" gini = treepredict.giniimpurity(node1) entr = treepredict.entropy(node1)
#! /usr/bin/.env python2 import treepredict print "Gini impurity\n" print treepredict.giniimpurity(treepredict.my_data) print "\n" print "treepredict.entropy\n" print treepredict.entropy(treepredict.my_data) print "\n" set1, set2 = treepredict.divideset(treepredict.my_data, 2, 'yes') print "Gini impurity\n" print treepredict.giniimpurity(set1) print "treepredict.entropy\n" print treepredict.entropy(set1) print '\n' tree = treepredict.buildtree(treepredict.my_data) print 'tree: ', tree print '\n' print 'classify: ', treepredict.classify(['(direct)', 'USA', 'yes', 5], tree)
def Training_the_Tree(): print '>>Training the Tree' setdata=treepredict.divideset(treepredict.my_data,2,'yes') print setdata[0] print setdata[1]
def testBasics(self): d = treepredict.testdata() self.assertAlmostEquals(1.5052408, treepredict.entropy(d)) s1, s2 = treepredict.divideset(d, 2, 'yes') self.assertAlmostEquals(1.2987949, treepredict.entropy(s1))
def testStringDivide(self): self.assertEquals(([('a',)], [('b',), ('c',)]), treepredict.divideset([('a',), ('b',), ('c',)], 0, 'a'))
def testFloatDivide(self): self.assertEquals(([(3.0, ), (4.0, )], [(1.0, ), (2.0, )]), treepredict.divideset([(1.0, ), (2.0, ), (3.0, ), (4.0, )], 0, 3.0))
def testIntegerDivide(self): self.assertEquals(([(3,), (4,)], [(1,), (2,)]), treepredict.divideset([(1,), (2,), (3,), (4,)], 0, 3))
def testFloatDivide(self): self.assertEquals(([(3.0,), (4.0,)], [(1.0,), (2.0,)]), treepredict.divideset([(1.0,), (2.0,), (3.0,), (4.0,)], 0, 3.0))
def testBasics(self): d = treepredict.testdata() self.assertAlmostEquals(0.6328125, treepredict.giniimpurity(d)) s1, s2 = treepredict.divideset(d, 2, 'yes') self.assertAlmostEquals(0.53125, treepredict.giniimpurity(s1))
def testStringDivide(self): self.assertEquals(([('a', )], [('b', ), ('c', )]), treepredict.divideset([('a', ), ('b', ), ('c', )], 0, 'a'))
def test_divideset_with_continuous_attribute(self): (set1, set2) = treepredict.divideset(treepredict.my_data, 3, 20, False) self.assertEqual(len(set1), 6)
def test_divideset_with_discrete_attribute(self): (set1, set2) = treepredict.divideset(treepredict.my_data, 2, 'yes', True) self.assertEqual(len(set1), 8) self.assertEqual(len(set2), 7)
def testIntegerDivide(self): self.assertEquals(([(3, ), (4, )], [(1, ), (2, )]), treepredict.divideset([(1, ), (2, ), (3, ), (4, )], 0, 3))