def test_pickle_version_warning_is_not_raised_with_matching_version(): iris = datasets.load_iris() tree = DecisionTreeClassifier().fit(iris.data, iris.target) tree_pickle = pickle.dumps(tree) assert b"version" in tree_pickle tree_restored = assert_no_warnings(pickle.loads, tree_pickle) # test that we can predict with the restored decision tree classifier score_of_original = tree.score(iris.data, iris.target) score_of_restored = tree_restored.score(iris.data, iris.target) assert score_of_original == score_of_restored
from mrex.tree import DecisionTreeClassifier from mrex.metrics import zero_one_loss from mrex.ensemble import AdaBoostClassifier n_estimators = 400 # A learning rate of 1. may not be optimal for both SAMME and SAMME.R learning_rate = 1. X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1) X_test, y_test = X[2000:], y[2000:] X_train, y_train = X[:2000], y[:2000] dt_stump = DecisionTreeClassifier(max_depth=1, min_samples_leaf=1) dt_stump.fit(X_train, y_train) dt_stump_err = 1.0 - dt_stump.score(X_test, y_test) dt = DecisionTreeClassifier(max_depth=9, min_samples_leaf=1) dt.fit(X_train, y_train) dt_err = 1.0 - dt.score(X_test, y_test) ada_discrete = AdaBoostClassifier(base_estimator=dt_stump, learning_rate=learning_rate, n_estimators=n_estimators, algorithm="SAMME") ada_discrete.fit(X_train, y_train) ada_real = AdaBoostClassifier(base_estimator=dt_stump, learning_rate=learning_rate, n_estimators=n_estimators, algorithm="SAMME.R")
ax[0].set_title("Number of nodes vs alpha") ax[1].plot(ccp_alphas, depth, marker='o', drawstyle="steps-post") ax[1].set_xlabel("alpha") ax[1].set_ylabel("depth of tree") ax[1].set_title("Depth vs alpha") fig.tight_layout() ############################################################################### # Accuracy vs alpha for training and testing sets # ---------------------------------------------------- # When ``ccp_alpha`` is set to zero and keeping the other default parameters # of :class:`DecisionTreeClassifier`, the tree overfits, leading to # a 100% training accuracy and 88% testing accuracy. As alpha increases, more # of the tree is pruned, thus creating a decision tree that generalizes better. # In this example, setting ``ccp_alpha=0.015`` maximizes the testing accuracy. train_scores = [clf.score(X_train, y_train) for clf in clfs] test_scores = [clf.score(X_test, y_test) for clf in clfs] fig, ax = plt.subplots() ax.set_xlabel("alpha") ax.set_ylabel("accuracy") ax.set_title("Accuracy vs alpha for training and testing sets") ax.plot(ccp_alphas, train_scores, marker='o', label="train", drawstyle="steps-post") ax.plot(ccp_alphas, test_scores, marker='o', label="test",