def setUpClass(cls): super().setUpClass() WidgetOutputsTestMixin.init(cls) tree = TreeLearner() cls.model = tree(cls.data) cls.model.instances = cls.data cls.signal_name = "Tree" cls.signal_data = cls.model # Load a dataset that contains two variables with the same entropy data_same_entropy = Table( path.join(path.dirname(path.dirname(path.dirname(__file__))), "tests", "datasets", "same_entropy.tab")) cls.data_same_entropy = tree(data_same_entropy) cls.data_same_entropy.instances = data_same_entropy vara = DiscreteVariable("aaa", values=("e", "f", "g")) root = DiscreteNode(vara, 0, np.array([42, 8])) root.subset = np.arange(50) varb = DiscreteVariable("bbb", values=tuple("ijkl")) child0 = MappedDiscreteNode(varb, 1, np.array([0, 1, 0, 0]), (38, 5)) child0.subset = np.arange(16) child1 = Node(None, 0, (13, 3)) child1.subset = np.arange(16, 30) varc = ContinuousVariable("ccc") child2 = NumericNode(varc, 2, 42, (78, 12)) child2.subset = np.arange(30, 50) root.children = (child0, child1, child2) child00 = Node(None, 0, (15, 4)) child00.subset = np.arange(10) child01 = Node(None, 0, (10, 5)) child01.subset = np.arange(10, 16) child0.children = (child00, child01) child20 = Node(None, 0, (90, 4)) child20.subset = np.arange(30, 35) child21 = Node(None, 0, (70, 9)) child21.subset = np.arange(35, 50) child2.children = (child20, child21) domain = Domain([vara, varb, varc], ContinuousVariable("y")) t = [[i, j, k] for i in range(3) for j in range(4) for k in (40, 44)] x = np.array((t * 3)[:50]) data = Table.from_numpy(domain, x, np.arange(len(x))) cls.tree = TreeModel(data, root)
def fit_storage(self, data): if self.binarize and any( attr.is_discrete and len(attr.values) > self.MAX_BINARIZATION for attr in data.domain.attributes): # No fallback in the script; widgets can prevent this error # by providing a fallback and issue a warning about doing so raise ValueError("Exhaustive binarization does not handle " "attributes with more than {} values".format( self.MAX_BINARIZATION)) active_inst = np.nonzero(~np.isnan(data.Y))[0].astype(np.int32) root = self.build_tree(data, active_inst) if root is None: root = Node(None, 0, np.array([0., 0.])) root.subset = active_inst model = TreeModel(data, root) return model