コード例 #1
0
    def setUpClass(cls):
        super().setUpClass()
        WidgetOutputsTestMixin.init(cls)

        tree = TreeLearner()
        cls.model = tree(cls.data)
        cls.model.instances = cls.data

        cls.signal_name = "Tree"
        cls.signal_data = cls.model

        # Load a dataset that contains two variables with the same entropy
        data_same_entropy = Table(
            path.join(path.dirname(path.dirname(path.dirname(__file__))),
                      "tests", "datasets", "same_entropy.tab"))
        cls.data_same_entropy = tree(data_same_entropy)
        cls.data_same_entropy.instances = data_same_entropy

        vara = DiscreteVariable("aaa", values=("e", "f", "g"))
        root = DiscreteNode(vara, 0, np.array([42, 8]))
        root.subset = np.arange(50)

        varb = DiscreteVariable("bbb", values=tuple("ijkl"))
        child0 = MappedDiscreteNode(varb, 1, np.array([0, 1, 0, 0]), (38, 5))
        child0.subset = np.arange(16)
        child1 = Node(None, 0, (13, 3))
        child1.subset = np.arange(16, 30)
        varc = ContinuousVariable("ccc")
        child2 = NumericNode(varc, 2, 42, (78, 12))
        child2.subset = np.arange(30, 50)
        root.children = (child0, child1, child2)

        child00 = Node(None, 0, (15, 4))
        child00.subset = np.arange(10)
        child01 = Node(None, 0, (10, 5))
        child01.subset = np.arange(10, 16)
        child0.children = (child00, child01)

        child20 = Node(None, 0, (90, 4))
        child20.subset = np.arange(30, 35)
        child21 = Node(None, 0, (70, 9))
        child21.subset = np.arange(35, 50)
        child2.children = (child20, child21)

        domain = Domain([vara, varb, varc], ContinuousVariable("y"))
        t = [[i, j, k] for i in range(3) for j in range(4) for k in (40, 44)]
        x = np.array((t * 3)[:50])
        data = Table.from_numpy(domain, x, np.arange(len(x)))
        cls.tree = TreeModel(data, root)
コード例 #2
0
    def fit_storage(self, data):
        if self.binarize and any(
                attr.is_discrete and len(attr.values) > self.MAX_BINARIZATION
                for attr in data.domain.attributes):
            # No fallback in the script; widgets can prevent this error
            # by providing a fallback and issue a warning about doing so
            raise ValueError("Exhaustive binarization does not handle "
                             "attributes with more than {} values".format(
                                 self.MAX_BINARIZATION))

        active_inst = np.nonzero(~np.isnan(data.Y))[0].astype(np.int32)
        root = self.build_tree(data, active_inst)
        if root is None:
            root = Node(None, 0, np.array([0., 0.]))
        root.subset = active_inst
        model = TreeModel(data, root)
        return model