Ejemplo n.º 1
0
def genetic_tree_optimization(n_trees, n_iter, depth, X, labels, oblique, X_valid, y_valid, CR=0.95, l = 1):
    clf = RandomForestClassifier(n_estimators = n_trees, max_depth=depth, random_state=0, min_samples_leaf= 4)
    clf.fit(X, labels)
    random_trees = clf.estimators_
    trees = []
    best_loss = np.inf
    best_tree = None
    for tree in random_trees:
        T = ClassificationTree(oblique = oblique)
        T.initialize_from_CART(X, labels, tree)
        tao_opt(T, X, labels)
        trees.append(T)
        ClassificationTree.build_idxs_of_subtree(X, range(len(labels)), T.tree[0], oblique)
        #ClassificationTree.restore_tree(T)
    #multi_optimize_tao(trees, X, labels)

    best_loss = np.inf
    best_tree = None
    for i in range(n_iter):
        print("Iter: ", i)
        #multi_optimize_evolution(trees, X, labels, CR)

        for tree in trees:
            #optimize_evolution(tree, trees, X, labels, X_valid, y_valid, CR)
            trial = mutation(tree, trees, CR, X, labels, depth)
            tao_opt(trial, X, labels)
            trial_loss = regularized_loss(trial.tree[0], X, labels, X_valid, y_valid, range(len(labels)), oblique, l=l)
            loss = regularized_loss(tree.tree[0], X, labels, X_valid, y_valid, range(len(labels)), oblique, l=l)
            if trial_loss < loss:
                tree = trial
                #print("migliore")
            if loss < best_loss:
                best_loss = loss
                best_tree = tree
                print ("best loss: ", best_loss)
                print("loss train best: ", 1-ClassificationTree.misclassification_loss(best_tree.tree[0], X, labels, range(len(labels)), oblique))
                print("loss valid: ", 1-ClassificationTree.misclassification_loss(best_tree.tree[0], X_valid, y_valid, range(len(y_valid)), oblique))

    print("ritorno loss train best: ", 1-ClassificationTree.misclassification_loss(best_tree.tree[0], X, labels, range(len(labels)), oblique))
    print("ritono loss valid: ", 1-ClassificationTree.misclassification_loss(best_tree.tree[0], X_valid, y_valid, range(len(y_valid)), oblique))
    return best_tree, best_loss
Ejemplo n.º 2
0
def optimize_evolution(tree, trees, X, labels, X_valid, y_valid, CR):
    trial = ClassificationTree.copy_tree(tree)
    '''
    n_nodes = len(X[0])/(2**tree.depth - 1)
    if n_nodes < 1:
        n_nodes = 1


    p = np.random.uniform()
    for node in nodes:
        if p < CR and not node.is_leaf:
            node.feature = np.random.randint(0, len(X[0]))
            node.threshold = np.random.uniform(np.min(X[node.feature]),np.max(X[node.feature]))
    '''
    crossover(trial, trees, CR, X)
    ClassificationTree.build_idxs_of_subtree(X, range(len(labels)), trial.tree[0], oblique)
    #partners = random.sample(trees, 2)
    #optimize_crossover(trial, ClassificationTree.copy_tree(partners[0]), ClassificationTree.copy_tree(partners[1]), depth, X, labels, oblique)
    tao_opt(trial, X, labels)
    loss = regularized_loss(trial.tree[0], X, labels, X_valid, y_valid, range(len(labels)), oblique)
    #Se il nuovo individuo è migliore del padre li scambio
    if loss < regularized_loss(tree.tree[0], X, labels, X_valid, y_valid, range(len(labels)), oblique):
    #if loss < ClassificationTree.misclassification_loss(tree.tree[0], X, labels, range(len(labels)), oblique):
        tree = trial