Ejemplos de DecisionTreeRegressor.cost_complexity_pruning_path en Python

Lenguaje de programación: Python

Namespace/Package Name: sklearn.tree

Método / Función: cost_complexity_pruning_path

Ejemplos en hotexamples.com: 7

Python DecisionTreeRegressor.cost_complexity_pruning_path - 7 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de sklearn.tree.DecisionTreeRegressor.cost_complexity_pruning_path extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

DecisionTreeRegressor(30)

fit(30)

apply(28)

get_params(17)

get_depth(12)

get_n_leaves(9)

cost_complexity_pruning_path(7)

decision_path(6)

__init__(6)

n_features_(5)

n_outputs_(5)

n_features_in_(3)

n_outputs(2)

output_coef_path(2)

n_categories(2)

max_leaf_nodes(1)

max_features_(1)

min_samples_leaf(1)

max_features(1)

n_features(1)

min_samples_split(1)

fit_transform(1)

max_depth(1)

fit_intercept(1)

feature_names(1)

feature_idx(1)

dropna(1)

criterion(1)

compile(1)

classes_(1)

class_names(1)

_validate_X_predict(1)

_more_tags(1)

_estimator_type(1)

partial_fit(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: autoregressor.py Proyecto: rleiva/fastautoml

    def DecisionTreeRegressor(self):

        clf = DecisionTreeRegressor(random_state=self.random_state)
        path = clf.cost_complexity_pruning_path(self.X_, self.y_)

        previous_nodes = -1
        best_nsc = 1
        best_model = None

        # For every possible prunning point in reverse order
        for ccp_alpha in reversed(path.ccp_alphas):

            model = DecisionTreeRegressor(ccp_alpha=ccp_alpha,
                                          random_state=self.random_state)
            model.fit(self.X_, self.y_)

            # Skip if nothing has changed
            if model.tree_.node_count == previous_nodes:
                continue

            previous_nodes = model.tree_.node_count

            new_nsc = self.nescience_.nescience(model)

            if new_nsc < best_nsc:
                best_nsc = new_nsc
                best_model = model
            else:
                break

        return (best_nsc, best_model, None)

Ejemplo n.º 2

Mostrar archivo

def item_E_prunning(train_x, train_y, val_x, val_y, plot_option=False):
    E_tree = DecisionTreeRegressor(random_state=0)
    parameters = E_tree.cost_complexity_pruning_path(train_x, train_y)
    ccp_alphas, impurities = parameters.ccp_alphas, parameters.impurities

    regressor_forest = []
    for ccp_alpha in ccp_alphas:
        regressor_tree = arbol_decision(DecisionTreeRegressor(random_state=0, ccp_alpha=ccp_alpha))
        regressor_tree.train_tree(train_x, train_y)
        regressor_forest.append(regressor_tree)
    
    nodo_per_tree= [arbol.tree.tree_.node_count for arbol in regressor_forest]
    max_depth    = [arbol.tree.tree_.max_depth for arbol in regressor_forest]
    
    train_scores = [arbol.error(train_x, train_y) for arbol in regressor_forest]
    test_scores = [arbol.error(val_x, val_y) for arbol in regressor_forest]
    
    
    fig,ax = plt.subplots()
    ax.set_ylabel("error")
    ax.set_xlabel("ccp $\\alpha$")
    ax.plot(ccp_alphas[:-2], train_scores[:-2], marker='o', label="train",drawstyle="steps-post", color=cmap(1), alpha=0.7)
    ax.plot(ccp_alphas[:-2], test_scores[:-2], marker='s', label="val",drawstyle="steps-post", color=cmap(2), alpha=0.7)
    ax.legend(loc='center')
    
    ax2=ax.twinx()
    ax2.plot(ccp_alphas[:-2], max_depth[:-2], marker='^', label="profundidad",drawstyle="steps-post", color=cmap(3), alpha=0.7)
    ax2.set_ylabel("Profundidad")
    ax2.legend(loc='center right')
    
    
    E_tree_optimo = arbol_decision(DecisionTreeRegressor(max_depth=5, ccp_alpha=0.2))
    E_tree_optimo.train_tree(train_x, train_y)
    y_pred = E_tree_optimo.test_tree(val_x)

    if plot_option==True:
        E_tree_optimo.plot_save_tree(val_y, y_pred, "E_tree_optimo.pdf")
        
    plt.show()

    E_tree_optimo.acc_error(y_pred,train_x, train_y, val_x, val_y )

Ejemplo n.º 3

Mostrar archivo

Archivo: main.py Proyecto: RealShreyas/ml-assignments

# Adds a grid to the plot
plt.grid()
# X-axis label
plt.ylabel('RMS error')
# Y-axis label
plt.xlabel('Depth')
# Export the plot
plt.savefig('error.png')


## The below code contains pruning the decision tree

regr = DecisionTreeRegressor(max_depth=10)
X_train, X_test, y_train, y_test = process_input()
# This is the function which returns ccp alphas and impurity of leaves
path = regr.cost_complexity_pruning_path(X_train, y_train)

# This will store the alphas and their corresponding impurities
ccp_alphas, impurities = path.ccp_alphas, path.impurities

plt.figure(figsize=(10, 6))
plt.plot(ccp_alphas, impurities)
plt.xlabel("Effective alpha")
plt.ylabel("Total Impurity of Leaves")
plt.savefig('AlphavsImpurity.png')

regrs = []

# Build trees based on different CCP values
for ccp_alpha in ccp_alphas:
    regr = DecisionTreeRegressor(random_state=0, ccp_alpha=ccp_alpha, max_depth=10)

Ejemplo n.º 4

Mostrar archivo

predictions_dt = dt.predict(x_test)

dt.score(x_test, y_test)
mse = mean_squared_error(y_test, predictions_dt)
rmse = mse**(1 / 2)
print(mse)
print(rmse)

# ---------- Checking the score by changing complexity parameter

from sklearn import tree
plt.figure(figsize=(7, 4))
tree.plot_tree(dt, filled=True)

path = dt.cost_complexity_pruning_path(x_train, y_train)
ccp_alphas, impurities = path.ccp_alphas, path.impurities  # ---- The weakest link is characterized by an effective alpha,
# where the nodes with the smallest effective alpha are pruned first
dts = []
for ccp_alpha in ccp_alphas:
    dt = DecisionTreeRegressor(random_state=0, ccp_alpha=ccp_alpha)
    dt.fit(x_train, y_train)
    dts.append(dt)
print("Number of nodes in the last tree is :{} with ccp_alpha : {}".format(
    dts[-1].tree_.node_count, ccp_alphas[-1]))

train_scores = [dt.score(x_train, y_train) for dt in dts]
test_scores = [dt.score(x_test, y_test) for dt in dts]

fig, ax = plt.subplots()
ax.set_xlabel("alpha")

Ejemplo n.º 5

Mostrar archivo

Archivo: explanation.py Proyecto: BipengWang/Hammett-with-996-values

        #select training Y data from Y-996
        Y_train = Y_996[index_996]

        #select the remaining X as testing data
        X_test = np.delete(X_996, index_996, axis=0)

        #select the remaining Y as testing data
        Y_test = np.delete(Y_996, index_996, axis=0)

        #assign to new variables
        X_resample = X_train
        Y_resample = Y_train

        #call function to find alpha values
        model = regr.cost_complexity_pruning_path(X_resample,
                                                  Y_resample.ravel())

        #find alpha and impurities
        ccp_alphas, impurities = model.ccp_alphas, model.impurities

        for i in range(0, len(ccp_alphas)):
            if ccp_alphas[i] < 0:
                #in very unusual cases, the alpha values are negative, find it and make it zero
                ind_minus_alpha_row.append(m)
                ind_minus_alpha_column.append(i)
                minus_alpha.append(ccp_alphas[i])
                ccp_alphas[i] = 0

        #save alpha, shape = (tree_num, len(seies))
        alpha_all.append(ccp_alphas)

Ejemplo n.º 6

Mostrar archivo

#visualize the tree
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(10, 10))
tree.plot_tree(dtr, filled=True)
plt.show()

# Another alternative is given by pruning the tree which is controlled by setting $\alpha$. Reasonable ranges for $\alpha$ also depend on the data and need to be tested anyway when optimizing the hyperparameter. For instance, a value $\alpha = 0.05$ results in the following structure.

# In[5]:

#we may use of tree pruning which is controlled by ccp_alpha
dtr = DecisionTreeRegressor(ccp_alpha=0.05)

#fit the tree
dtr.fit(X, y)

path = dtr.cost_complexity_pruning_path(X, y)
ccp_alphas, impurities = path.ccp_alphas, path.impurities

#visualize the tree
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(10, 10))
tree.plot_tree(dtr, filled=True)
plt.show()

# ## Decision Trees
#
# So far, we examined a regression problem. How do we derive trees for classification problems? More or less in the same manner, however, we can not use $RSS$ to evaluate model performance. Instead, we need a loss function which is suited for the classification problem. Intuitively, we may want to minimize the classification error. However, it has been found that this does not lead to qualitative tree structures. Instead, at each split either the **Gini index** or **cross-entropy** is used to evaluate the quality of the split. The Gini index $G$ is given by:
#
# $$
# G = \sum_k \hat{p}_{lk} (1 - \hat{p}_{lk})
# $$
#

Ejemplo n.º 7

Mostrar archivo

model = DecisionTreeRegressor(random_state = 1, max_depth = 20, min_samples_split = 6, min_samples_leaf = 2)
model.fit(X_trspl, y_trspl)
y_pred = model.predict(X_tespl)

rmse = RMSE(y_tespl, y_pred)
print(rmse)

feat_importances = pd.Series(model.feature_importances_, index=X_trspl.columns)
feat_importances.nlargest(20).plot(kind='barh')
plt.title('Feature Important based on Decision Tree regressor')
plt.xlabel('feature scores')
plt.ylabel('feature names')
plt.show()

# Total impurity of leaves vs effective alphas of pruned tree
path = model.cost_complexity_pruning_path(X_trspl, y_trspl)
ccp_alphas, impurities = path.ccp_alphas, path.impurities
fig, ax = plt.subplots()
ax.plot(ccp_alphas[:-1], impurities[:-1], marker='o', drawstyle="steps-post")
ax.set_xlabel("effective alpha")
ax.set_ylabel("total impurity of leaves")
ax.set_title("Total Impurity vs effective alpha for training set")

models = []
for ccp_alpha in ccp_alphas:
    model = DecisionTreeRegressor(random_state=0, ccp_alpha=ccp_alpha)
    model.fit(X_trspl, y_trspl)
    models.append(model)
print("Number of nodes in the last tree is: {} with ccp_alpha: {}".format(
      models[-1].tree_.node_count, ccp_alphas[-1]))