from sklearn.datasets import load_boston
from sklearn import tree


def plot_pruned_path(scores, with_std=True):
    """Plots the cross validated scores versus the number of leaves of trees"""
    import matplotlib.pyplot as plt
    means = np.array([np.mean(s) for s in scores])
    stds = np.array([np.std(s) for s in scores]) / np.sqrt(len(scores[1]))

    x = range(len(scores) + 1, 1, -1)

    plt.plot(x, means)
    if with_std:
        plt.plot(x, means + 2 * stds, lw=1, c='0.7')
        plt.plot(x, means - 2 * stds, lw=1, c='0.7')

    plt.xlabel('Number of leaves')
    plt.ylabel('Cross validated score')


boston = load_boston()
clf = tree.DecisionTreeRegressor(max_depth=8)

#Compute the cross validated scores
scores = tree.prune_path(clf, boston.data, boston.target,
                                    max_n_leaves=20, n_iterations=10,
                                    random_state=0)

plot_pruned_path(scores)
        plt.plot(x, means + 2 * stds, lw=1, c='0.7')
        plt.plot(x, means - 2 * stds, lw=1, c='0.7')

    plt.xlabel('Number of leaves')
    plt.ylabel('Cross validated score')


# Create a random dataset
rng = np.random.RandomState(1)
X = np.sort(5 * rng.rand(80, 1), axis=0)
y = np.sin(X).ravel()
y[1::5] += 3 * (0.5 - rng.rand(16))


clf = tree.DecisionTreeRegressor(max_depth=20)
scores = tree.prune_path(clf, X, y, max_n_leaves=20,
                                    n_iterations=100, random_state=0)
plot_pruned_path(scores)

clf = tree.DecisionTreeRegressor(max_depth=20, n_leaves=15)
clf.fit(X, y)
X_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis]

#Prepare the different pruned level
y_15 = clf.predict(X_test)

clf = clf.prune(6)
y_7 = clf.predict(X_test)

clf = clf.prune(2)
y_2 = clf.predict(X_test)
        plt.plot(x, means - 2 * stds, lw=1, c='0.7')

    plt.xlabel('Number of leaves')
    plt.ylabel('Cross validated score')


# Create a random dataset
rng = np.random.RandomState(1)
X = np.sort(5 * rng.rand(80, 1), axis=0)
y = np.sin(X).ravel()
y[1::5] += 3 * (0.5 - rng.rand(16))

clf = tree.DecisionTreeRegressor(max_depth=20)
scores = tree.prune_path(clf,
                         X,
                         y,
                         max_n_leaves=20,
                         n_iterations=100,
                         random_state=0)
plot_pruned_path(scores)

clf = tree.DecisionTreeRegressor(max_depth=20, n_leaves=15)
clf.fit(X, y)
X_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis]

#Prepare the different pruned level
y_15 = clf.predict(X_test)

clf = clf.prune(6)
y_7 = clf.predict(X_test)

clf = clf.prune(2)
    import matplotlib.pyplot as plt
    means = np.array([np.mean(s) for s in scores])
    stds = np.array([np.std(s) for s in scores]) / np.sqrt(len(scores[1]))

    x = range(len(scores) + 1, 1, -1)

    plt.plot(x, means)
    if with_std:
        plt.plot(x, means + 2 * stds, lw=1, c='0.7')
        plt.plot(x, means - 2 * stds, lw=1, c='0.7')

    plt.xlabel('Number of leaves')
    plt.ylabel('Cross validated score')

    plt.show()


print "loading training dataset features"
traindata=np.asarray(pickle.load(open("data/traindata-allfeatures.list","r"))).astype(np.float)
print "loading class labels for training dataset"
target=np.asarray(pickle.load(open("data/target.list","r"))).astype(np.float)

clf = tree.DecisionTreeClassifier()

#Compute the cross validated scores
scores = tree.prune_path(clf, traindata, target,
                                    max_n_leaves=10, n_iterations=5,
                                    random_state=0)

plot_pruned_path(scores)
def plot_pruned_path(scores, with_std=True):
    """Plots the cross validated scores versus the number of leaves of trees"""
    import matplotlib.pyplot as plt
    means = np.array([np.mean(s) for s in scores])
    stds = np.array([np.std(s) for s in scores]) / np.sqrt(len(scores[1]))

    x = range(len(scores) + 1, 1, -1)

    plt.plot(x, means)
    if with_std:
        plt.plot(x, means + 2 * stds, lw=1, c='0.7')
        plt.plot(x, means - 2 * stds, lw=1, c='0.7')

    plt.xlabel('Number of leaves')
    plt.ylabel('Cross validated score')


boston = load_boston()
clf = tree.DecisionTreeRegressor(max_depth=8)

#Compute the cross validated scores
scores = tree.prune_path(clf,
                         boston.data,
                         boston.target,
                         max_n_leaves=20,
                         n_iterations=10,
                         random_state=0)

plot_pruned_path(scores)