예제 #1
0
 def test_build_tree_depth_limit(self):
     fxnGen = build_consistent_generator(10)
     listInst = fxnGen(100)
     cMaxLevel = random.randint(0,3)
     dt = dtree.build_tree(listInst, cMaxLevel=cMaxLevel)
     self.assert_dt_members(dt)
     self.check_dt(dt,cMaxLevel)
 def test_build_tree_depth_limit(self):
     fxnGen = build_consistent_generator(10)
     listInst = fxnGen(100)
     cMaxLevel = random.randint(0, 3)
     dt = dtree.build_tree(listInst, cMaxLevel=cMaxLevel)
     self.assert_dt_members(dt)
     self.check_dt(dt, cMaxLevel)
예제 #3
0
 def test_build_tree_no_gain(self):
     listAttr = randlist(0,5,10)
     listInst = [dtree.Instance(listAttr, randbool())]*random.randint(25,30)
     dt = dtree.build_tree(listInst)
     fMajorityLabel = dtree.majority_label(listInst)
     self.assertTrue(dt.is_leaf())
     self.assertEquals(dt.fLabel, fMajorityLabel)        
예제 #4
0
    def task(self):
        listInstClean = get_clean_insts()
        listInstNoisy = get_noisy_insts()
        listData = []
        listNames = ["Clean", "Noisy"]
        for listInst, sName in zip([listInstClean, listInstNoisy], listNames):

            dt = dtree.build_tree(listInst)
            tf = dtree.TreeFold(listInst, listInst)
            rslt = dtree.evaluate_classification(tf)
            dblCorrect, dblIncorrect = dtree.weight_correct_incorrect(rslt)
            dblAccuracy = dblCorrect / (dblCorrect + dblIncorrect)
            listData.append(dblAccuracy)
        return {
            "chart": {
                "defaultSeriesType": "column"
            },
            "title": {
                "text": "Clean vs. Noisy Training Set Accuracy"
            },
            "xAxis": {
                "categories": listNames
            },
            "yAxis": {
                "title": {
                    "text": "Accuracy"
                },
                "min": 0.0,
                "max": 1.0
            },
            "series": [{
                "name": "Training Set Accuracy",
                "data": listData
            }]
        }
예제 #5
0
 def task(self):
     listInst = get_clean_insts()
     f = open('view.txt', 'w+')
     for inst in listInst:
         f.write(str(inst) + '\n')
     f.close()
     dt = dtree.build_tree(listInst)
     return serialize_tree(dt)
 def task(self):
     listInst = get_clean_insts()
     f = open('view.txt', 'w+')
     for inst in listInst:
         f.write(str(inst) + '\n')
     f.close()
     dt = dtree.build_tree(listInst)
     return serialize_tree(dt)
def random_forest(data, T):
    forest = []

    for i in range(T):
        data_temp = bagging(data)
        tree = build_tree(data_temp, 5, 1)
        forest.append(tree)

    return forest
예제 #8
0
파일: testdtree.py 프로젝트: slymnefe/cs181
 def test_build_tree_gain_limit(self):
     listInst = []
     cAttr = random.randint(5, 10)
     ixAttrImportant = random.randint(0, cAttr - 1)
     for _ in xrange(random.randint(25, 150)):
         listAttr = randlist(0, 1, cAttr)
         fLabel = bool(listAttr[ixAttrImportant])
         listInst.append(dtree.Instance(listAttr, fLabel))
     dt = dtree.build_tree(listInst, dblMinGain=0.55)
     self.assertTrue(dt.is_node())
     self.check_dt(dt, 1)
예제 #9
0
파일: testdtree.py 프로젝트: dzhu/cs181
 def test_build_tree_gain_limit(self):
     listInst = []
     cAttr = random.randint(5,10)
     ixAttrImportant = random.randint(0,cAttr-1)
     for _ in xrange(random.randint(25,150)):
         listAttr = randlist(0,1,cAttr)
         fLabel = bool(listAttr[ixAttrImportant])
         listInst.append(dtree.Instance(listAttr,fLabel))
     dt = dtree.build_tree(listInst, dblMinGain=0.55)
     self.assertTrue(dt.is_node())
     self.check_dt(dt,1)        
예제 #10
0
파일: dttasks.py 프로젝트: dzhu/cs181
    def task(self):
        listInstClean = get_clean_insts()
        listInstNoisy = get_noisy_insts()
        listData = []
        listNames = ["Clean", "Noisy"]
        for listInst, sName in zip([listInstClean, listInstNoisy], listNames):

            dt = dtree.build_tree(listInst)
            tf = dtree.TreeFold(listInst, listInst)
            rslt = dtree.evaluate_classification(tf)
            dblCorrect, dblIncorrect = dtree.weight_correct_incorrect(rslt)
            dblAccuracy = dblCorrect / (dblCorrect + dblIncorrect)
            listData.append(dblAccuracy)
        return {
            "chart": {"defaultSeriesType": "column"},
            "title": {"text": "Clean vs. Noisy Training Set Accuracy"},
            "xAxis": {"categories": listNames},
            "yAxis": {"title": {"text": "Accuracy"}, "min": 0.0, "max": 1.0},
            "series": [{"name": "Training Set Accuracy", "data": listData}],
        }
예제 #11
0
 def test_build_tree(self):
     # test case size grows exponentially in this
     cAttrs = random.randint(1,5)
     listInst = []
     for ixAttr in xrange(cAttrs):
         cEach = 2**(cAttrs - ixAttr)
         listAttrPrefixLeft = [1]*ixAttr
         for _ in xrange(cEach):
             listAttrSuffix = [0]*(cAttrs - ixAttr)
             listAttr = listAttrPrefixLeft + listAttrSuffix
             fLabel = bool(ixAttr % 2)
             inst = dtree.Instance(listAttr,fLabel)
             listInst.append(inst)
     dt = dtree.build_tree(listInst)
     for ixAttr in xrange(cAttrs-1):
         self.assertEqual(dt.ixAttr, ixAttr)
         dtLeft = dt.dictChildren[0]
         self.assertTrue(dtLeft.is_leaf())
         self.assertEqual(dtLeft.fLabel, bool(ixAttr % 2))
         dt = dt.dictChildren[1]
     self.assertTrue(dt.is_leaf())
     self.assertEqual(dt.fLabel, not (cAttrs % 2))
 def test_build_tree(self):
     # test case size grows exponentially in this
     cAttrs = random.randint(1, 5)
     listInst = []
     for ixAttr in xrange(cAttrs):
         cEach = 2 ** (cAttrs - ixAttr)
         listAttrPrefixLeft = [1] * ixAttr
         for _ in xrange(cEach):
             listAttrSuffix = [0] * (cAttrs - ixAttr)
             listAttr = listAttrPrefixLeft + listAttrSuffix
             fLabel = bool(ixAttr % 2)
             inst = dtree.Instance(listAttr, fLabel)
             listInst.append(inst)
     dt = dtree.build_tree(listInst)
     for ixAttr in xrange(cAttrs - 1):
         self.assertEqual(dt.ixAttr, ixAttr)
         dtLeft = dt.dictChildren[0]
         self.assertTrue(dtLeft.is_leaf())
         self.assertEqual(dtLeft.fLabel, bool(ixAttr % 2))
         dt = dt.dictChildren[1]
     self.assertTrue(dt.is_leaf())
     self.assertEqual(dt.fLabel, not (cAttrs % 2))
예제 #13
0
 def task(self):
     listInst = dtree.load_csv_dataset(datadir("data.csv"))
     dt = dtree.build_tree(listInst, cMaxLevel=1)
     return serialize_tree(dt)
예제 #14
0
 def task(self):
     listInst = dtree.load_csv_dataset(datadir("data.csv"))
     dt = dtree.build_tree(listInst[:-10])
     dtree.prune_tree(dt, listInst[-10:])
     return serialize_tree(dt)
예제 #15
0
파일: dttasks.py 프로젝트: dzhu/cs181
 def task(self):
     listInst = dtree.load_csv_dataset(datadir("data.csv"))
     dt = dtree.build_tree(listInst[:-10])
     dtree.prune_tree(dt, listInst[-10:])
     return serialize_tree(dt)
예제 #16
0
 def task(self):
     listInst = get_clean_insts()
     dt = dtree.build_tree(listInst)
     return serialize_tree(dt)
 def filter_unclassifiable(listInst):
     dt = dtree.build_tree(listInst)
     return [inst for inst in listInst
             if dtree.classify(dt, inst) == inst.fLabel]
예제 #18
0
파일: dttasks.py 프로젝트: dzhu/cs181
 def task(self):
     listInst = dtree.load_csv_dataset(datadir("data.csv"))
     dt = dtree.build_tree(listInst, cMaxLevel=1)
     return serialize_tree(dt)
예제 #19
0
 def filter_unclassifiable(listInst):
     dt = dtree.build_tree(listInst)
     return [inst for inst in listInst
             if dtree.classify(dt,inst) == inst.fLabel]
예제 #20
0
파일: dttasks.py 프로젝트: dzhu/cs181
 def task(self):
     listInst = get_clean_insts()
     dt = dtree.build_tree(listInst)
     return serialize_tree(dt)