def test_build_tree_depth_limit(self): fxnGen = build_consistent_generator(10) listInst = fxnGen(100) cMaxLevel = random.randint(0,3) dt = dtree.build_tree(listInst, cMaxLevel=cMaxLevel) self.assert_dt_members(dt) self.check_dt(dt,cMaxLevel)
def test_build_tree_depth_limit(self): fxnGen = build_consistent_generator(10) listInst = fxnGen(100) cMaxLevel = random.randint(0, 3) dt = dtree.build_tree(listInst, cMaxLevel=cMaxLevel) self.assert_dt_members(dt) self.check_dt(dt, cMaxLevel)
def test_build_tree_no_gain(self): listAttr = randlist(0,5,10) listInst = [dtree.Instance(listAttr, randbool())]*random.randint(25,30) dt = dtree.build_tree(listInst) fMajorityLabel = dtree.majority_label(listInst) self.assertTrue(dt.is_leaf()) self.assertEquals(dt.fLabel, fMajorityLabel)
def task(self): listInstClean = get_clean_insts() listInstNoisy = get_noisy_insts() listData = [] listNames = ["Clean", "Noisy"] for listInst, sName in zip([listInstClean, listInstNoisy], listNames): dt = dtree.build_tree(listInst) tf = dtree.TreeFold(listInst, listInst) rslt = dtree.evaluate_classification(tf) dblCorrect, dblIncorrect = dtree.weight_correct_incorrect(rslt) dblAccuracy = dblCorrect / (dblCorrect + dblIncorrect) listData.append(dblAccuracy) return { "chart": { "defaultSeriesType": "column" }, "title": { "text": "Clean vs. Noisy Training Set Accuracy" }, "xAxis": { "categories": listNames }, "yAxis": { "title": { "text": "Accuracy" }, "min": 0.0, "max": 1.0 }, "series": [{ "name": "Training Set Accuracy", "data": listData }] }
def task(self): listInst = get_clean_insts() f = open('view.txt', 'w+') for inst in listInst: f.write(str(inst) + '\n') f.close() dt = dtree.build_tree(listInst) return serialize_tree(dt)
def random_forest(data, T): forest = [] for i in range(T): data_temp = bagging(data) tree = build_tree(data_temp, 5, 1) forest.append(tree) return forest
def test_build_tree_gain_limit(self): listInst = [] cAttr = random.randint(5, 10) ixAttrImportant = random.randint(0, cAttr - 1) for _ in xrange(random.randint(25, 150)): listAttr = randlist(0, 1, cAttr) fLabel = bool(listAttr[ixAttrImportant]) listInst.append(dtree.Instance(listAttr, fLabel)) dt = dtree.build_tree(listInst, dblMinGain=0.55) self.assertTrue(dt.is_node()) self.check_dt(dt, 1)
def test_build_tree_gain_limit(self): listInst = [] cAttr = random.randint(5,10) ixAttrImportant = random.randint(0,cAttr-1) for _ in xrange(random.randint(25,150)): listAttr = randlist(0,1,cAttr) fLabel = bool(listAttr[ixAttrImportant]) listInst.append(dtree.Instance(listAttr,fLabel)) dt = dtree.build_tree(listInst, dblMinGain=0.55) self.assertTrue(dt.is_node()) self.check_dt(dt,1)
def task(self): listInstClean = get_clean_insts() listInstNoisy = get_noisy_insts() listData = [] listNames = ["Clean", "Noisy"] for listInst, sName in zip([listInstClean, listInstNoisy], listNames): dt = dtree.build_tree(listInst) tf = dtree.TreeFold(listInst, listInst) rslt = dtree.evaluate_classification(tf) dblCorrect, dblIncorrect = dtree.weight_correct_incorrect(rslt) dblAccuracy = dblCorrect / (dblCorrect + dblIncorrect) listData.append(dblAccuracy) return { "chart": {"defaultSeriesType": "column"}, "title": {"text": "Clean vs. Noisy Training Set Accuracy"}, "xAxis": {"categories": listNames}, "yAxis": {"title": {"text": "Accuracy"}, "min": 0.0, "max": 1.0}, "series": [{"name": "Training Set Accuracy", "data": listData}], }
def test_build_tree(self): # test case size grows exponentially in this cAttrs = random.randint(1,5) listInst = [] for ixAttr in xrange(cAttrs): cEach = 2**(cAttrs - ixAttr) listAttrPrefixLeft = [1]*ixAttr for _ in xrange(cEach): listAttrSuffix = [0]*(cAttrs - ixAttr) listAttr = listAttrPrefixLeft + listAttrSuffix fLabel = bool(ixAttr % 2) inst = dtree.Instance(listAttr,fLabel) listInst.append(inst) dt = dtree.build_tree(listInst) for ixAttr in xrange(cAttrs-1): self.assertEqual(dt.ixAttr, ixAttr) dtLeft = dt.dictChildren[0] self.assertTrue(dtLeft.is_leaf()) self.assertEqual(dtLeft.fLabel, bool(ixAttr % 2)) dt = dt.dictChildren[1] self.assertTrue(dt.is_leaf()) self.assertEqual(dt.fLabel, not (cAttrs % 2))
def test_build_tree(self): # test case size grows exponentially in this cAttrs = random.randint(1, 5) listInst = [] for ixAttr in xrange(cAttrs): cEach = 2 ** (cAttrs - ixAttr) listAttrPrefixLeft = [1] * ixAttr for _ in xrange(cEach): listAttrSuffix = [0] * (cAttrs - ixAttr) listAttr = listAttrPrefixLeft + listAttrSuffix fLabel = bool(ixAttr % 2) inst = dtree.Instance(listAttr, fLabel) listInst.append(inst) dt = dtree.build_tree(listInst) for ixAttr in xrange(cAttrs - 1): self.assertEqual(dt.ixAttr, ixAttr) dtLeft = dt.dictChildren[0] self.assertTrue(dtLeft.is_leaf()) self.assertEqual(dtLeft.fLabel, bool(ixAttr % 2)) dt = dt.dictChildren[1] self.assertTrue(dt.is_leaf()) self.assertEqual(dt.fLabel, not (cAttrs % 2))
def task(self): listInst = dtree.load_csv_dataset(datadir("data.csv")) dt = dtree.build_tree(listInst, cMaxLevel=1) return serialize_tree(dt)
def task(self): listInst = dtree.load_csv_dataset(datadir("data.csv")) dt = dtree.build_tree(listInst[:-10]) dtree.prune_tree(dt, listInst[-10:]) return serialize_tree(dt)
def task(self): listInst = get_clean_insts() dt = dtree.build_tree(listInst) return serialize_tree(dt)
def filter_unclassifiable(listInst): dt = dtree.build_tree(listInst) return [inst for inst in listInst if dtree.classify(dt, inst) == inst.fLabel]
def filter_unclassifiable(listInst): dt = dtree.build_tree(listInst) return [inst for inst in listInst if dtree.classify(dt,inst) == inst.fLabel]