def genetic_tree_optimization(n_trees, n_iter, depth, X, labels, oblique, X_valid, y_valid, CR=0.95, l = 1): clf = RandomForestClassifier(n_estimators = n_trees, max_depth=depth, random_state=0, min_samples_leaf= 4) clf.fit(X, labels) random_trees = clf.estimators_ trees = [] best_loss = np.inf best_tree = None for tree in random_trees: T = ClassificationTree(oblique = oblique) T.initialize_from_CART(X, labels, tree) tao_opt(T, X, labels) trees.append(T) ClassificationTree.build_idxs_of_subtree(X, range(len(labels)), T.tree[0], oblique) #ClassificationTree.restore_tree(T) #multi_optimize_tao(trees, X, labels) best_loss = np.inf best_tree = None for i in range(n_iter): print("Iter: ", i) #multi_optimize_evolution(trees, X, labels, CR) for tree in trees: #optimize_evolution(tree, trees, X, labels, X_valid, y_valid, CR) trial = mutation(tree, trees, CR, X, labels, depth) tao_opt(trial, X, labels) trial_loss = regularized_loss(trial.tree[0], X, labels, X_valid, y_valid, range(len(labels)), oblique, l=l) loss = regularized_loss(tree.tree[0], X, labels, X_valid, y_valid, range(len(labels)), oblique, l=l) if trial_loss < loss: tree = trial #print("migliore") if loss < best_loss: best_loss = loss best_tree = tree print ("best loss: ", best_loss) print("loss train best: ", 1-ClassificationTree.misclassification_loss(best_tree.tree[0], X, labels, range(len(labels)), oblique)) print("loss valid: ", 1-ClassificationTree.misclassification_loss(best_tree.tree[0], X_valid, y_valid, range(len(y_valid)), oblique)) print("ritorno loss train best: ", 1-ClassificationTree.misclassification_loss(best_tree.tree[0], X, labels, range(len(labels)), oblique)) print("ritono loss valid: ", 1-ClassificationTree.misclassification_loss(best_tree.tree[0], X_valid, y_valid, range(len(y_valid)), oblique)) return best_tree, best_loss
def optimize_evolution(tree, trees, X, labels, X_valid, y_valid, CR): trial = ClassificationTree.copy_tree(tree) ''' n_nodes = len(X[0])/(2**tree.depth - 1) if n_nodes < 1: n_nodes = 1 p = np.random.uniform() for node in nodes: if p < CR and not node.is_leaf: node.feature = np.random.randint(0, len(X[0])) node.threshold = np.random.uniform(np.min(X[node.feature]),np.max(X[node.feature])) ''' crossover(trial, trees, CR, X) ClassificationTree.build_idxs_of_subtree(X, range(len(labels)), trial.tree[0], oblique) #partners = random.sample(trees, 2) #optimize_crossover(trial, ClassificationTree.copy_tree(partners[0]), ClassificationTree.copy_tree(partners[1]), depth, X, labels, oblique) tao_opt(trial, X, labels) loss = regularized_loss(trial.tree[0], X, labels, X_valid, y_valid, range(len(labels)), oblique) #Se il nuovo individuo è migliore del padre li scambio if loss < regularized_loss(tree.tree[0], X, labels, X_valid, y_valid, range(len(labels)), oblique): #if loss < ClassificationTree.misclassification_loss(tree.tree[0], X, labels, range(len(labels)), oblique): tree = trial