Beispiel #1
0
    def evolve(self, X, y, n_iter=7, min_size_prune=1):
        self.X = X
        self.y = y
        for i in range(n_iter):
            #print("TAO iter ", i, " di ", n_iter)
            for depth in reversed(range(self.classification_tree.depth + 1)):
                #print("Ottimizzo depth", depth, "....")
                T = self.classification_tree
                nodes = ClassificationTree.get_nodes_at_depth(depth, T)
                #print ([node.id for node in nodes])

                for node in nodes:
                    self.optimize_nodes(node)
                #pool = Pool(4)
                #pool.map(self.optimize_nodes, nodes)
                #pool.close()
                #pool.join()

                #for node in nodes:
                #self.optimize_nodes(node)

                #Rimetto apposto i punti associati ad ogni nodo
                self.classification_tree.build_idxs_of_subtree(
                    X, range(len(X)), T.tree[0], oblique=T.oblique)
        #Effettua il pruning finale per togliere dead branches e pure subtrees
        #self.prune(min_size = min_size_prune)
        ClassificationTree.restore_tree(self.classification_tree)
Beispiel #2
0
    def evolve(self, X, y, alfa=0, max_iteration=1):
        ClassificationTree.restore_tree(self.classification_tree)
        complexity = self.classification_tree.n_leaves - 1
        T = self.classification_tree.tree
        self.X = X
        self.y = y
        i = 0

        while (i < max_iteration):
            optimized = []
            error_prev = ClassificationTree.misclassification_loss(
                T[0], X, y, T[0].data_idxs,
                self.classification_tree.oblique) + alfa * complexity

            values = list(self.classification_tree.tree.keys())
            random.shuffle(values)
            #print(values)
            #print("values: ", values)
            while (len(values) > 0):
                #print(complexity)
                node_id = values.pop()
                optimized.append(node_id)
                #print("optimizing node:", node_id)
                self.optimize_node_parallel(T[node_id], X, y, alfa, complexity)
                #print("nodo ottimizzato:  ", node_id)
                ids = ClassificationTree.restore_tree(self.classification_tree)
                complexity = self.classification_tree.n_leaves - 1
                #print("complexity: ", complexity)
                #print("ids: ", ids)
                values = list(set(ids) - set(optimized))
                #print("values dopo restore:  ", values)
                self.classification_tree.build_idxs_of_subtree(
                    X, range(len(X)), T[0], self.classification_tree.oblique)
                error_curr = ClassificationTree.misclassification_loss(
                    T[0], X, y, T[0].data_idxs,
                    self.classification_tree.oblique) + alfa * complexity
            #print(self.max_id)
            #print("i-->", i, "node: ", node_id)
            #for node_id in to_delete:
            #self.delete_node(node_id)

            i += 1
            #print("Ottimizzato nodi algoritmo ls: ", i, " volte")
            if np.abs(error_curr - error_prev) < 1e-01:
                break
Beispiel #3
0
    def prune(self, min_size=1):
        #Prima controllo se ci sono sottoalberi puri.
        #Visito l'albero e verifico se esistono nodi branch ai quali arrivano punti associati a solo una label
        #Poi vedo se il nodo attuale è morto
        T = self.classification_tree
        stack = [T.tree[0]]
        while (stack):
            actual = stack.pop()
            if len(actual.data_idxs) > 0:
                #Se il nodo è puro
                if not actual.is_leaf and all(
                        i == self.y[actual.data_idxs[0]]
                        for i in self.y[actual.data_idxs]):
                    #Devo far diventare una foglia questo nodo con valore pari alla label
                    actual.is_leaf = True
                    actual.left_node = None
                    actual.right_node = None
                    actual.left_node_id = -1
                    actual.right_node_id = -1
                    actual.value = self.y[actual.data_idxs[0]]

                #Se il nodo ha un figlio morto devo sostituire il padre con l'altro figlio
                elif not actual.is_leaf and len(
                        actual.left_node.data_idxs) < min_size:
                    stack.append(actual.right_node)
                    ClassificationTree.replace_node(actual, actual.right_node,
                                                    T)
                elif not actual.is_leaf and len(
                        actual.right_node.data_idxs) < min_size:
                    stack.append(actual.left_node)
                    ClassificationTree.replace_node(actual, actual.left_node,
                                                    T)
                elif not actual.is_leaf:
                    stack.append(actual.right_node)
                    stack.append(actual.left_node)
            ClassificationTree.restore_tree(T)