Esempio n. 1
0
File: run.py Progetto: wz125/courses
def Choosing_the_Best_Split():
  print '>>Choosing the Best Split'
  reload(treepredict)
  print treepredict.giniimpurity(treepredict.my_data)
  print treepredict.entropy(treepredict.my_data)
  set1,set2=treepredict.divideset(treepredict.my_data,2,'yes')
  print treepredict.entropy(set1)
  print treepredict.giniimpurity(set1)
Esempio n. 2
0
import treepredict

print treepredict.divideset(treepredict.my_data,2,'yes')
print treepredict.divideset1(treepredict.my_data,2,'yes')

print treepredict.divideset(treepredict.my_data,3,20)
print treepredict.divideset1(treepredict.my_data,3,20)
#!/usr/bin/python
#
# Script to demonstrate the CART-like DT classifier from
# Chapter 7 of "Programming Collective Intelligence" by
# T. Segaran, O'Reilly, (c) 2007
#
import treepredict

results = treepredict.divideset(treepredict.my_data, 2, "yes")
# results is now a list of lists

# See if records are divided according to  FAQ field (column) ...

print "\nDivision on Read FAQ field...\n"
for list in results:
    for item in list:
        print "%15s %15s %5s %10d %15s" % tuple(item)


# Let's see the difference between gini- and entropy-based impurities
# of the current data (no splitting)
print "\nParent node...\n"
gini = treepredict.giniimpurity(treepredict.my_data)
entr = treepredict.entropy(treepredict.my_data)
print "Gini: %8f    Entropy: %8f" % (gini, entr)

# Let's now split on the Read FAQ field and assess impurity
node1, node2 = treepredict.divideset(treepredict.my_data, 2, "yes")
print "\nRead FAQ =  Yes leaf node...\n"
gini = treepredict.giniimpurity(node1)
entr = treepredict.entropy(node1)
Esempio n. 4
0
#! /usr/bin/.env python2

import treepredict

print "Gini impurity\n"
print treepredict.giniimpurity(treepredict.my_data)

print "\n"

print "treepredict.entropy\n"
print treepredict.entropy(treepredict.my_data)

print "\n"

set1, set2 = treepredict.divideset(treepredict.my_data, 2, 'yes')

print "Gini impurity\n"
print treepredict.giniimpurity(set1)
print "treepredict.entropy\n"
print treepredict.entropy(set1)

print '\n'
tree = treepredict.buildtree(treepredict.my_data)
print 'tree: ', tree

print '\n'

print 'classify: ', treepredict.classify(['(direct)', 'USA', 'yes', 5], tree)
Esempio n. 5
0
File: run.py Progetto: wz125/courses
def Training_the_Tree():
  print '>>Training the Tree'
  setdata=treepredict.divideset(treepredict.my_data,2,'yes')
  print setdata[0]
  print setdata[1]
  def testBasics(self):
    d = treepredict.testdata()
    self.assertAlmostEquals(1.5052408, treepredict.entropy(d))

    s1, s2 = treepredict.divideset(d, 2, 'yes')
    self.assertAlmostEquals(1.2987949, treepredict.entropy(s1))
 def testStringDivide(self):
   self.assertEquals(([('a',)], [('b',), ('c',)]),
       treepredict.divideset([('a',), ('b',), ('c',)], 0, 'a'))
 def testFloatDivide(self):
     self.assertEquals(([(3.0, ), (4.0, )], [(1.0, ), (2.0, )]),
                       treepredict.divideset([(1.0, ), (2.0, ), (3.0, ),
                                              (4.0, )], 0, 3.0))
 def testIntegerDivide(self):
   self.assertEquals(([(3,), (4,)], [(1,), (2,)]),
       treepredict.divideset([(1,), (2,), (3,), (4,)], 0, 3))
 def testFloatDivide(self):
   self.assertEquals(([(3.0,), (4.0,)], [(1.0,), (2.0,)]),
       treepredict.divideset([(1.0,), (2.0,), (3.0,), (4.0,)], 0, 3.0))
    def testBasics(self):
        d = treepredict.testdata()
        self.assertAlmostEquals(1.5052408, treepredict.entropy(d))

        s1, s2 = treepredict.divideset(d, 2, 'yes')
        self.assertAlmostEquals(1.2987949, treepredict.entropy(s1))
    def testBasics(self):
        d = treepredict.testdata()
        self.assertAlmostEquals(0.6328125, treepredict.giniimpurity(d))

        s1, s2 = treepredict.divideset(d, 2, 'yes')
        self.assertAlmostEquals(0.53125, treepredict.giniimpurity(s1))
 def testStringDivide(self):
     self.assertEquals(([('a', )], [('b', ), ('c', )]),
                       treepredict.divideset([('a', ), ('b', ), ('c', )], 0,
                                             'a'))
Esempio n. 14
0
 def test_divideset_with_continuous_attribute(self):
     (set1, set2) = treepredict.divideset(treepredict.my_data, 3, 20, False)
     self.assertEqual(len(set1), 6)
  def testBasics(self):
    d = treepredict.testdata()
    self.assertAlmostEquals(0.6328125, treepredict.giniimpurity(d))

    s1, s2 = treepredict.divideset(d, 2, 'yes')
    self.assertAlmostEquals(0.53125, treepredict.giniimpurity(s1))
Esempio n. 16
0
 def test_divideset_with_discrete_attribute(self):
     (set1, set2) = treepredict.divideset(treepredict.my_data, 2, 'yes',
                                          True)
     self.assertEqual(len(set1), 8)
     self.assertEqual(len(set2), 7)
 def testIntegerDivide(self):
     self.assertEquals(([(3, ), (4, )], [(1, ), (2, )]),
                       treepredict.divideset([(1, ), (2, ), (3, ), (4, )],
                                             0, 3))