Example #1
0
#%% DECISION TREE
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier(criterion='gini')

compute_score(model, X_selected_train, y_train)
compute_score_accuracy(model, X_selected_train, y_train)
compute_score_f1(model, X_selected_train, y_train)

model.fit(X_selected_train, y_train)
y_pred = model.predict(X_selected_test)
cm = confusion_matrix(y_test, y_pred)
#Tree pruning - using minimal cost complexity pruning, so we recursively find the weakest link
#(effective alpha), the nodes with the smallest effective alpha are pruned first.
#the function returns effective alphas and corresponding leaf impurities at each step
#As alpha ---> more of the tree is pruned, which increases the impurity of the leaves.
path = model.cost_complexity_pruning_path(X_selected_train, y_train)
ccp_alphas, impurities = path.ccp_alphas, path.impurities

ccp_alphas, impurities = path.ccp_alphas, path.impurities
plt.figure(figsize=(10, 6))
plt.plot(ccp_alphas, impurities, 
         linewidth=1, color='black')
plt.xlabel("effective alpha")
plt.ylabel("total impurity of leaves")

clfs = []
for ccp_alpha in ccp_alphas:
    clf = DecisionTreeClassifier(random_state=0, ccp_alpha=ccp_alpha)
    clf.fit(X_selected_train, y_train)
    clfs.append(clf)
tree_depths = [clf.tree_.max_depth for clf in clfs]