Beispiel #1
0
def answer_five():
    from sklearn.tree import DecisionTreeClassifier
    from adspy_shared_utilities import plot_decision_tree #TM
    from adspy_shared_utilities import plot_feature_importances #TM
    from sklearn.svm import SVC #TM

    # Your code here
    clf_dt = DecisionTreeClassifier(max_depth = 5, min_samples_leaf = 10, random_state = 0).fit(X_train2, y_train2)
    clf_SVC = SVC(kernel = 'linear', C=10).fit(X_train2, y_train2)
    #plot_decision_tree(clf, X_train2.columns, y_train2.columns)
    print('Mushroom dataset: decision tree')
    print('Accuracy of DT classifier on training set: {:.2f}' .format(clf_dt.score(X_train2, y_train2)))
    print('Accuracy of DT classifier on test set: {:.2f}'.format(clf_dt.score(X_test2, y_test2)))
    
    print('\n')

    print('Mushroom dataset: SVC')
    print('Accuracy of SVC classifier on training set: {:.2f}' .format(clf_SVC.score(X_train2, y_train2)))
    print('Accuracy of SVC classifier on test set: {:.2f}'.format(clf_SVC.score(X_test2, y_test2)))

    plt.figure(figsize=(10,6),dpi=80)
    plot_feature_importances(clf_dt, X_train2.columns )
    plt.tight_layout()

    plt.show()


    return True# Your answer here
Beispiel #2
0
def show_dectree_info():
    """ Display info on decission tree clf passed as argunent.
    """
    clf = DecisionTreeClassifier(random_state=0).fit(X_train2, y_train2)
    fi = pd.DataFrame(data=clf.feature_importances_,
                      index=X_train2.columns,
                      columns=['feature importance'])
    fi.sort_values('feature importance', ascending=False, inplace=True)
    top_fi = fi.head(5)
    print('Feature importances: {}'.format(top_fi))

    plt.figure(figsize=(10, 4), dpi=80)
    plot_feature_importances(clf, X_test2.columns)
    plt.show()
def answer_five():
    from sklearn.tree import DecisionTreeClassifier
    from adspy_shared_utilities import plot_feature_importances

    clf = DecisionTreeClassifier(random_state=0).fit(X_train2, y_train2)
    plot_feature_importances(clf, X_train2.columns)
    importances = clf.feature_importances_
    
    # Sort feature importances in descending order
    indices = np.argsort(importances)[::-1]
    
    # Rearrange feature names so they match the sorted feature importances
    names = [X_train2.columns[i] for i in indices]

    return names[:5]
 def feature_importance():
     from adspy_shared_utilities import plot_feature_importances
     import matplotlib.pyplot as plt
     ##################################################
     ### Jupyter Notebooks Needs This Line
     #%matplotlib notebook
     ##################################################
     #plt.clf()
     plt.figure(figsize=(10, 4), dpi=80)
     plot_feature_importances(clf, feature_names_list)
     #print('Feature importances: {}'.format(clf.feature_importances_))
     print(
         'Feature importances: {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}'
         .format(
             clf.feature_importances_[0],
             clf.feature_importances_[1],
             clf.feature_importances_[2],
             clf.feature_importances_[3],
             clf.feature_importances_[4],
             clf.feature_importances_[5],
             clf.feature_importances_[6],
         ))
     return plt.show()
Beispiel #5
0
plot_decision_tree(clf, iris.feature_names, iris.target_names)

# #### Pre-pruned version (max_depth = 3)

# In[ ]:

plot_decision_tree(clf2, iris.feature_names, iris.target_names)

# #### Feature importance

# In[ ]:

from adspy_shared_utilities import plot_feature_importances

plt.figure(figsize=(10, 4), dpi=80)
plot_feature_importances(clf, iris.feature_names)
plt.show()

print('Feature importances: {}'.format(clf.feature_importances_))

# In[ ]:

from sklearn.tree import DecisionTreeClassifier
from adspy_shared_utilities import plot_class_regions_for_classifier_subplot

X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                    iris.target,
                                                    random_state=0)
fig, subaxes = plt.subplots(6, 1, figsize=(6, 32))

pair_list = [[0, 1], [0, 2], [0, 3], [1, 2], [1, 3], [2, 3]]
from adspy_shared_utilities import plot_decision_tree
from adspy_shared_utilities import plot_feature_importances
from sklearn import tree
mush_df = pd.read_csv("mushrooms.csv")
print(mush_df)
mush_df2 = pd.get_dummies(mush_df)
print(mush_df2)

X_mush = mush_df2.iloc[:,2:]
y_mush = mush_df2.iloc[:,1]
print(X_mush)
print(y_mush)
X_train2, X_test2, y_train2, y_test2 = train_test_split(X_mush, y_mush, random_state=0)

X_subset = X_test2
y_subset = y_test2

model = DecisionTreeClassifier(random_state=0)
clf = model.fit(X_train2, y_train2)
print(clf.score(X_train2,y_train2))

plt.figure(figsize=(10, 20))
plot_feature_importances(clf, X_train2.columns)
# plt.show()
# print(-np.sort(-clf.feature_importances_))
sorted = np.argsort(-clf.feature_importances_)
names=[]
# Most important 5 features
for i in range(5):
    names.append(X_train2.columns[sorted[:5]][i])
print(names)
Beispiel #7
0
# Visualizing decision trees (TODO: make it display):
plt.figure()
plot_decision_tree(clf, iris.feature_names, iris.target_names) # (Figure 22)
# Color intensity represents which majority class is present in each node.
# values section corresponds to how many training instances belong in each class.

# Visualizing (pre-pruned version max_depth = 3)
plt.figure()
plot_decision_tree(clf2, iris.feature_names, iris.target_names) # (Figure 23)

# Feature importance
from adspy_shared_utilities import plot_feature_importances

plt.figure(figsize=(10,4), dpi=80) # Dots per inch
plot_feature_importances(clf, iris.feature_names) # (Figure 24)
print("Feature importances: {}\n".format(clf.feature_importances_)) # Inherent property of classifier, not user-defined property

#from sklearn.tree import DecisionTreeClassifier
#from adspy_shared_utilities import plot_class_regions_for_classifier_subplot as pcr
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=0)
fig, subaxes = plt.subplots(1, 6, figsize=(32, 6))

pair_list = [[0, 1], [0, 2], [0, 3], [1, 2] ,[1, 3], [2, 3]]
tree_max_depth = 4

for pair, axis in zip(pair_list, subaxes):
    X = X_train[:, pair]
    y = y_train
    
    clf = DecisionTreeClassifier(max_depth=tree_max_depth).fit(X, y)
Beispiel #8
0
def decisiontree():

    iris = load_iris()
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=3)
    clf = DecisionTreeClassifier().fit(X_train, y_train)

    print(
        'Accuracy of Decision Tree classifier on training set: {:.2f}'.format(
            clf.score(X_train, y_train)))
    print('Accuracy of Decision Tree classifier on test set: {:.2f}'.format(
        clf.score(X_test, y_test)))

    clf2 = DecisionTreeClassifier(max_depth=3).fit(X_train, y_train)
    print(
        'Accuracy of Decision Tree classifier on training set: {:.2f}'.format(
            clf2.score(X_train, y_train)))
    print('Accuracy of Decision Tree classifier on test set: {:.2f}'.format(
        clf2.score(X_test, y_test)))

    plot_decision_tree(clf, iris.feature_names, iris.target_names)
    plot_decision_tree(clf2, iris.feature_names, iris.target_names)

    plt.figure(figsize=(10, 4), dpi=80)
    plot_feature_importances(clf, iris.feature_names)
    plt.show()

    print('Feature importances: {}'.format(clf.feature_importances_))

    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=0)
    fig, subaxes = plt.subplots(6, 1, figsize=(6, 32))
    pair_list = [[0, 1], [0, 2], [0, 3], [1, 2], [1, 3], [2, 3]]
    tree_max_depth = 4

    for pair, axis in zip(pair_list, subaxes):
        X = X_train[:, pair]
        y = y_train
        clf = DecisionTreeClassifier(max_depth=tree_max_depth).fit(X, y)
        title = 'Decision Tree, max_depth = {:d}'.format(tree_max_depth)
        plot_class_regions_for_classifier_subplot(clf, X, y, None, None, title,
                                                  axis, iris.target_names)
        axis.set_xlabel(iris.feature_names[pair[0]])
        axis.set_ylabel(iris.feature_names[pair[1]])

    plt.tight_layout()
    plt.show()

    cancer = load_breast_cancer()
    X_cancer, y_cancer = load_breast_cancer(return_X_y=True)
    X_train, X_test, y_train, y_test = train_test_split(X_cancer,
                                                        y_cancer,
                                                        random_state=0)
    clf = DecisionTreeClassifier(max_depth=4,
                                 min_samples_leaf=8,
                                 random_state=0).fit(X_train, y_train)
    plot_decision_tree(clf, cancer.feature_names, cancer.target_names)

    print('Breast cancer dataset: decision tree')
    print('Accuracy of DT classifier on training set: {:.2f}'.format(
        clf.score(X_train, y_train)))
    print('Accuracy of DT classifier on test set: {:.2f}'.format(
        clf.score(X_test, y_test)))

    plt.figure(figsize=(10, 6), dpi=80)
    plot_feature_importances(clf, cancer.feature_names)
    plt.tight_layout()
    plt.show()