def task(self): listInst = dtree.load_csv_dataset(datadir("data.csv")) dt = dtree.build_tree(listInst[:-10]) dtree.prune_tree(dt, listInst[-10:]) return serialize_tree(dt)
def test_prune_tree(self): """ Test bottom-up pruning with a validation set. The test builds a random tree, then randomly chooses a node at which to prune. To induce pruning, the test does the following: - set the default label of the node to T - set the default label of the nodes, and actual label of the leaves, of all descendants to F - generate a large number of T instances that follow a path through the node - set the default labels of all ancestors of the node to F - prune the tree - repeat for the node's parent, continuing up to the root. """ def set_labels(dtRoot,f): def down(dt): if dt.is_leaf(): dt.fLabel = f dt.fDefaultLabel = f map(down,dt.dictChildren.values()) down(dtRoot) def check_passes(dtRoot,dtCheck,inst): def down(dt): assert not dt.is_leaf() assert len(dt.dictChildren) == cValue dt = dt.dictChildren[inst.listAttrs[dt.ixAttr]] if dt == dtCheck: return down(dtRoot) cAttr = random.randint(2,4) cValue = random.randint(2,4) dtBase = build_random_tree(cAttr,cValue) listPath = [] listAttrs = [] listDt = [] fTargetValue = True#randbool() set_labels(dtBase, not fTargetValue) dt = dtBase while not dt.is_leaf(): ixValue = random.choice(dt.dictChildren.keys()) listPath.append(ixValue) listAttrs.append(dt.ixAttr) dt = dt.dictChildren[ixValue] while listPath: listPath.pop() dt = dtRoot = dtBase.copy() for ixValue in listPath: dt = dt.dictChildren[ixValue] assert dt.is_node() dt.fDefaultLabel = fTargetValue listInst = [] fxnEnd = lambda: randlist(0,cValue-1,cAttr - len(listPath)) for _ in xrange(random.randint(1,10)): listValue = listPath + fxnEnd() listInstAttr = [None for _ in xrange(cAttr)] assert len(listValue) == cAttr for ixValue,ixAttr in zip(listValue,listAttrs): listInstAttr[ixAttr] = ixValue inst = dtree.Instance(listInstAttr, fTargetValue) check_passes(dtRoot,dt,inst) listInst.append(inst) dtree.prune_tree(dtRoot,listInst) dt = dtRoot for ix,ixValue in enumerate(listPath): assert dt.ixAttr == listAttrs[ix] self.assertTrue(dt.is_node(), str(dtRoot)) self.assertTrue(ixValue in dt.dictChildren) dt = dt.dictChildren[ixValue] self.assertTrue(dt.is_leaf(), str(dt))
def test_prune_tree(self): """ Test bottom-up pruning with a validation set. The test builds a random tree, then randomly chooses a node at which to prune. To induce pruning, the test does the following: - set the default label of the node to T - set the default label of the nodes, and actual label of the leaves, of all descendants to F - generate a large number of T instances that follow a path through the node - set the default labels of all ancestors of the node to F - prune the tree - repeat for the node's parent, continuing up to the root. """ def set_labels(dtRoot, f): def down(dt): if dt.is_leaf(): dt.fLabel = f dt.fDefaultLabel = f map(down, dt.dictChildren.values()) down(dtRoot) def check_passes(dtRoot, dtCheck, inst): def down(dt): assert not dt.is_leaf() assert len(dt.dictChildren) == cValue dt = dt.dictChildren[inst.listAttrs[dt.ixAttr]] if dt == dtCheck: return down(dtRoot) cAttr = random.randint(2, 4) cValue = random.randint(2, 4) dtBase = build_random_tree(cAttr, cValue) listPath = [] listAttrs = [] fTargetValue = True # randbool() set_labels(dtBase, not fTargetValue) dt = dtBase while not dt.is_leaf(): ixValue = random.choice(dt.dictChildren.keys()) listPath.append(ixValue) listAttrs.append(dt.ixAttr) # print ixValue dt = dt.dictChildren[ixValue] # print "-----------------------" while listPath: listPath.pop() dt = dtRoot = dtBase for ixValue in listPath: # print ixValue dt = dt.dictChildren[ixValue] assert dt.is_node() # print "-----------------------------------" dt.fDefaultLabel = fTargetValue listInst = [] fxnEnd = lambda: randlist(0, cValue - 1, cAttr - len(listPath)) for _ in xrange(random.randint(1, 10)): listValue = listPath + fxnEnd() listInstAttr = [None for _ in xrange(cAttr)] assert len(listValue) == cAttr for ixValue, ixAttr in zip(listValue, listAttrs): listInstAttr[ixAttr] = ixValue inst = dtree.Instance(listInstAttr, fTargetValue) check_passes(dtRoot, dt, inst) listInst.append(inst) dtree.prune_tree(dtRoot, listInst) dt = dtRoot for ix, ixValue in enumerate(listPath): assert dt.ixAttr == listAttrs[ix] self.assertTrue(dt.is_node(), str(dtRoot)) self.assertTrue(ixValue in dt.dictChildren) dt = dt.dictChildren[ixValue] self.assertTrue(dt.is_leaf(), str(dt))