Beispiel #1
0
def describe_model(model, X, Y, log, plot_all):
    predictor_names = X.columns.values.tolist()
    log += print_and_log("Model:")
    log += print_and_log(model)

    log += print_and_log("Predictor variables:")
    for p in predictor_names:
        log += print_and_log("\t{}".format(p))

    # Export tree SVG
    if plot_all:
        from dtreeviz.trees import dtreeviz

        log += print_and_log("\nExport tree to SVG:")
        viz = dtreeviz(
            model,
            X.values,
            Y.values.ravel(),
            target_name="perf",
            feature_names=predictor_names,
        )
        viz.save("trytree.svg")
        viz.view()

    return log
Beispiel #2
0
def viz_digits(orientation="TD",
               max_depth=3,
               random_state=666,
               fancy=True,
               pickX=False):
    clf = tree.DecisionTreeClassifier(max_depth=max_depth,
                                      random_state=random_state)
    digits = load_digits()

    # "8x8 image of integer pixels in the range 0..16."
    columns = [f'pixel[{i},{j}]' for i in range(8) for j in range(8)]

    clf.fit(digits.data, digits.target)

    X = None
    if pickX:
        X = digits.data[np.random.randint(0, len(digits.data)), :]

    viz = dtreeviz(clf,
                   digits.data,
                   digits.target,
                   target_name='number',
                   feature_names=columns,
                   orientation=orientation,
                   class_names=[chr(c) for c in range(ord('0'),
                                                      ord('9') + 1)],
                   fancy=fancy,
                   histtype='bar',
                   X=X)
    return viz
def viz_tree(clf,Xdata,Ydata,label,outfile,point=None):
    from dtreeviz.trees import dtreeviz
    from sklearn.tree import export_graphviz

    X      = Xdata.pd
    Y      = Ydata.pd
    X_headers = X.columns
    Y_headers = Y.columns

    # If "point" given as argument, find the closest datapoint to "point", will do treewalk for this observation
    if(point is not None):
        points = Xdata.vtk.points
        dist = points-point
        dist = np.sqrt(dist[:,0]**2.0 + dist[:,1]**2.0 + dist[:,2]**2.0)
        loc = np.argmin(dist)

        print('point = ', point)
        print('nearest point = ',points[loc,:])
        print('distance = ',dist[loc])

        datapoint = X.iloc[loc]
    else:
        datapoint = None

    # Extract the classifier object from the clf multilearn object
    index = Y_headers.to_list().index(label)
    clf = clf.classifiers_[index]

    # TODO: check if clf is a decision tree

    viz = dtreeviz(clf, X, Y[label],
              feature_names=X_headers,
              target_name=label,class_names=["False","True"],X=datapoint)

    viz.save(outfile)
Beispiel #4
0
def viz_boston(orientation="TD", max_depth=3, random_state=666, fancy=True):
    regr = tree.DecisionTreeRegressor(max_depth=max_depth,
                                      random_state=random_state)
    boston = load_boston()

    regr = regr.fit(boston.data, boston.target)

    X = boston.data[np.random.randint(0, len(boston.data)), :]

    print(boston.feature_names)
    features = np.array([
        'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
        'PTRATIO', 'B', 'LSTAT'
    ])
    viz = dtreeviz(regr,
                   boston.data,
                   boston.target,
                   target_name='price',
                   feature_names=features,
                   orientation=orientation,
                   fancy=fancy,
                   X=X)

    export_graphviz(regr,
                    out_file="/tmp/boston-scikit-tree.dot",
                    filled=True,
                    rounded=True,
                    special_characters=True)

    return viz
Beispiel #5
0
def viz_diabetes(orientation="TD",
                 max_depth=3,
                 random_state=666,
                 fancy=True,
                 pickX=False):
    diabetes = load_diabetes()

    regr = tree.DecisionTreeRegressor(max_depth=max_depth,
                                      random_state=random_state)
    regr.fit(diabetes.data, diabetes.target)

    X = None
    if pickX:
        X = diabetes.data[np.random.randint(0, len(diabetes.data)), :]

    viz = dtreeviz(regr,
                   diabetes.data,
                   diabetes.target,
                   target_name='progr',
                   feature_names=diabetes.feature_names,
                   orientation=orientation,
                   fancy=fancy,
                   X=X)

    return viz
Beispiel #6
0
def viz_knowledge(orientation="TD", max_depth=3, random_state=666, fancy=True):
    # data from https://archive.ics.uci.edu/ml/datasets/User+Knowledge+Modeling
    clf = tree.DecisionTreeClassifier(max_depth=max_depth,
                                      random_state=random_state)
    know = pd.read_csv("data/knowledge.csv")
    target_names = ['very_low', 'Low', 'Middle', 'High']
    know['UNS'] = know['UNS'].map({n: i for i, n in enumerate(target_names)})

    X_train, y_train = know.drop('UNS', axis=1), know['UNS']
    clf = clf.fit(X_train[['PEG', 'LPR']], y_train)

    X = X_train.iloc[np.random.randint(0, len(know))]

    viz = dtreeviz(
        clf,
        X_train[['PEG', 'LPR']],
        y_train,
        target_name='UNS',
        feature_names=['PEG', 'LPR'],
        orientation=orientation,
        class_names=target_names,
        #                   show_node_labels=True,
        histtype='strip',
        fancy=fancy)
    return viz
Beispiel #7
0
def visualize(clf):
    (train, target) = read_train_data()
    viz = dtreeviz(clf, train, target, target_name='HSV', feature_names=['Hue', 'Saturation', 'Value'],
                   class_names=['Yellow', 'Green',
                                'Red', 'Blue', 'Black', 'White']
                   )

    viz.view()
Beispiel #8
0
    def export_tree_advanced(self,X,Y,feature_names,filename_out):

        viz = dtreeviz(self.model, X, Y, feature_names=feature_names,colors={'classes': [None,  None,['#0080FF80', '#FF800080']]})
        viz.save(filename_out)
        ext = filename_out.split('.')[-1]
        name = filename_out.split('.' + ext)[0]
        if os.path.isfile(name):
            os.remove(name)

        return
# ----------------------------------------------------------------------------------------------------------------------
Beispiel #9
0
    def interpret(
        self,
        X_train,
        y_train,
        X_validation,
        y_validation,
        model_file_path,
        learner_name,
        target_name=None,
        class_names=None,
        metric_name=None,
        ml_task=None,
        explain_level=2,
    ):
        super(DecisionTreeAlgorithm, self).interpret(
            X_train,
            y_train,
            X_validation,
            y_validation,
            model_file_path,
            learner_name,
            target_name,
            class_names,
            metric_name,
            ml_task,
            explain_level,
        )
        if explain_level == 0:
            return
        with warnings.catch_warnings():
            warnings.simplefilter(action="ignore")
            try:
                if len(class_names) > 10:
                    # dtreeviz does not support more than 10 classes
                    return
                viz = dtreeviz(
                    self.model,
                    X_train,
                    y_train,
                    target_name="target",
                    feature_names=X_train.columns,
                    class_names=class_names,
                )
                tree_file_plot = os.path.join(model_file_path,
                                              learner_name + "_tree.svg")
                viz.save(tree_file_plot)
            except Exception as e:
                logger.info(
                    f"Problem when visualizing decision tree. {str(e)}")

            save_rules(self.model, X_train.columns, class_names,
                       model_file_path, learner_name)
Beispiel #10
0
def explain(skater_exp: Explanation, training_df, test_df, explanation_target,
            prefix_target):
    job = skater_exp.job
    model = joblib.load(job.predictive_model.model_path)
    model = model[0]

    features = list(training_df.drop(['trace_id', 'label'], 1).columns.values)
    interpreter = Interpretation(training_df, feature_names=features)
    X_train = training_df.drop(['trace_id', 'label'], 1)
    Y_train = training_df['label'].values

    model_inst = InMemoryModel(model.predict,
                               examples=X_train,
                               model_type=model._estimator_type,
                               unique_values=[1, 2],
                               feature_names=features,
                               target_names=['label'])
    surrogate_explainer = interpreter.tree_surrogate(model_inst, seed=5)

    surrogate_explainer.fit(X_train,
                            Y_train,
                            use_oracle=True,
                            prune='post',
                            scorer_type='default')
    surrogate_explainer.class_names = features

    viz = dtreeviz(surrogate_explainer.estimator_,
                   X_train,
                   Y_train,
                   target_name='label',
                   feature_names=features,
                   orientation="TD",
                   class_names=list(surrogate_explainer.class_names),
                   fancy=True,
                   X=None,
                   label_fontsize=12,
                   ticks_fontsize=8,
                   fontname="Arial")
    name = create_unique_name("skater_plot.svg")
    viz.save(name)
    if os.path.getsize(name) > 15000000:
        return 'The file size is too big'
    f = open(name, "r")
    response = f.read()
    os.remove(name)
    if os.path.isfile(name.split('.svg')[0]):
        os.remove(name.split('.svg')[0])

    return response
Beispiel #11
0
def viz_iris(orientation="TD", max_depth=5, random_state=666, fancy=True):
    clf = tree.DecisionTreeClassifier(max_depth=max_depth,
                                      random_state=random_state)
    iris = load_iris()

    data = pd.DataFrame(iris.data)
    data.columns = iris.feature_names

    clf = clf.fit(data, iris.target)

    # for i in range(len(iris.data)):
    for i in [60]:
        x = data.iloc[i]
        pred = clf.predict([x.values])

        shadow_tree = ShadowDecTree(
            clf,
            iris.data,
            iris.target,
            feature_names=iris.feature_names,
            class_names=["setosa", "versicolor", "virginica"])

        pred2 = shadow_tree.predict(x.values)
        print(
            f'{x} -> {pred[0]} vs mine {pred2[0]}, path = {[f"node{p.feature_name()}" for p in pred2[1]]}'
        )
        path = [n.id for n in pred2[1]]
        if pred[0] != pred2[0]:
            print("MISMATCH!")

    features = list(data.columns)
    features = np.array([
        'sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',
        'petal width (cm)'
    ])
    st = dtreeviz(
        clf,
        iris.data,
        iris.target,
        target_name='variety',
        feature_names=features,
        orientation=orientation,
        class_names=["setosa", "versicolor", "virginica"],  # 0,1,2 targets
        #histtype='strip',
        fancy=fancy,
        X=x)

    return st
Beispiel #12
0
def simple_dtree(df,
                 x_list,
                 y_var,
                 max_depth=3,
                 regressor=False,
                 min_samples_split=2,
                 test_size=0.3):
    """
    instant DecisionTree model
    """
    from sklearn.tree import DecisionTreeRegressor
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.model_selection import train_test_split
    from dtreeviz.trees import dtreeviz

    X = df[x_list]
    y = df[y_var]
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        random_state=123,
                                                        test_size=test_size)

    # build model, and fitting
    if regressor:
        model = DecisionTreeRegressor(max_depth=max_depth,
                                      random_state=123,
                                      min_samples_split=min_samples_split)
    else:
        model = DecisionTreeClassifier(max_depth=max_depth,
                                       random_state=123,
                                       min_samples_split=min_samples_split)
    model.fit(X_train, y_train)

    # plot
    print('Train score: {:.3f}'.format(model.score(X_train, y_train)))
    print('Test score: {:.3f}'.format(model.score(X_test, y_test)))
    viz = dtreeviz(
        model,
        X_train=X,
        y_train=y,
        target_name=y_var,
        feature_names=x_list,
        precision=2,
        class_names=None
        if model.classes_ is None else model.classes_.tolist(),
    )
    viz.view()
    return model
Beispiel #13
0
def weird_binary_case():
    # See bug https://github.com/parrt/dtreeviz/issues/17
    import numpy as np
    from sklearn.tree import DecisionTreeClassifier
    from dtreeviz.trees import dtreeviz

    x = np.random.choice([-1, 1], size=(100, 2))
    y = np.random.choice([0, 1], size=100)

    viz = dtreeviz(tree_model=DecisionTreeClassifier(max_depth=1).fit(x, y),
                   X_train=x,
                   y_train=y,
                   feature_names=['a', 'b'],
                   target_name='y',
                   class_names=[1, 0])
    return viz
Beispiel #14
0
    def interpret(
        self,
        X_train,
        y_train,
        X_validation,
        y_validation,
        model_file_path,
        learner_name,
        target_name=None,
        class_names=None,
        metric_name=None,
        ml_task=None,
        explain_level=2,
    ):
        super(DecisionTreeRegressorAlgorithm, self).interpret(
            X_train,
            y_train,
            X_validation,
            y_validation,
            model_file_path,
            learner_name,
            target_name,
            class_names,
            metric_name,
            ml_task,
            explain_level,
        )
        if explain_level == 0:
            return
        try:

            viz = dtreeviz(
                self.model,
                X_train,
                y_train,
                target_name="target",
                feature_names=X_train.columns,
            )
            tree_file_plot = os.path.join(model_file_path,
                                          learner_name + "_tree.svg")
            viz.save(tree_file_plot)
        except Exception as e:
            logger.info(
                f"Problem when visuzalizin decision tree regressor. {str(e)}")

        save_rules(self.model, X_train.columns, None, model_file_path,
                   learner_name)
def explore_prediction():
    """Interfaz interactiva para ver como se hace una predicción al azar."""
    x_sample = df.sample()
    display(x_sample)

    viz = dtreeviz.dtreeviz(
        clf,
        X,
        y,
        target_name='smoker',
        feature_names=list(X.columns),
        class_names=list(y_encoder.classes_),
        scale=1.0,
        X=x_sample[X.columns].iloc[0].values,
    )

    display(viz)
 def interpret(self,
               X,
               y,
               model_file_path,
               learner_name,
               target_name=None,
               class_names=None):
     try:
         viz = dtreeviz(self.model,
                        X,
                        y,
                        target_name="target",
                        feature_names=X.columns)
         self._tree_file_plot = os.path.join(model_file_path,
                                             learner_name + "_tree.svg")
         viz.save(self._tree_file_plot)
     except Exception as e:
         self._tree_file_plot = None
Beispiel #17
0
    def visualize_decision_tree(self,
                                X_train,
                                Y_train,
                                feature_names,
                                max_depth=3):
        X_train = self.sample_like(X_train, frac=self.sample_frac)
        Y_train = self.sample_like(Y_train, frac=self.sample_frac)

        Y_train = self.ravel_like(Y_train)
        clf = DecisionTreeClassifier(max_depth=max_depth)
        clf.fit(X_train, Y_train)
        for pred_col in self.pred_cols:
            viz = dtreeviz(clf,
                           X_train,
                           Y_train,
                           target_name=pred_col,
                           feature_names=feature_names,
                           class_names=list(set([str(i) for i in Y_train])))
            display(viz)
Beispiel #18
0
def getDtreeVizImg(rand_state=42, max_depth=3):
    fname = BASE_IMG_PATH + "dtv" + str(rand_state) + '_' + str(
        max_depth) + '.png'
    if (not os.path.exists(fname)):
        X = df.drop(['Class'], axis=1)
        y = df['Class']

        # Split the training and test dataset.
        # Since in this program the focus is not on predictions
        # but on creating a new style decision tree, the test set is unused
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=rand_state)
        # initialise the decision tree model
        DecsTreeModel = DecisionTreeClassifier(criterion='entropy',
                                               max_depth=max_depth)
        # train the model with the training set
        DecsTreeModel.fit(X_train, y_train)

        # Creating the dtreeviz style decision tree
        viz = dtreeviz(DecsTreeModel,
                       X_train,
                       y_train,
                       feature_names=list(X.columns.values),
                       class_names=['0', '1'])

        # save the decision tree image in SVG format
        viz.save(BASE_IMG_PATH + "svgtempfile.svg")
        out = BytesIO()

        #convert the decision tree image from SVG to PNG format
        cairosvg.svg2png(url=BASE_IMG_PATH + "svgtempfile.svg", write_to=out)
        dtree = Image.open(out)
        dtree.save(fname)

    with open(fname, "rb") as image:
        img = image.read()
        # The Base 64 format is useful for serialization of image data.
        # Doing this enables the image data to be saved in peristent storage or transfer it over network.
        barray = base64.b64encode(img).decode('utf-8')

    return barray
def run_single_tree(X_train, y_train, X_test, y_test, depth, plot_tree,
                    feature_cols):
    model = tree.DecisionTreeRegressor(max_depth=depth).fit(X_train, y_train)
    accuracy_train = model.score(X_train, y_train)
    accuracy_test = model.score(X_test, y_test)

    y_predictions = model.predict(X_test)
    y_predictions_series = pandas.Series(y_predictions, index=y_test.index)

    def resid(row):
        return row["true"] - row["pred"]

    resid_df = pandas.concat([y_test, y_predictions_series],
                             keys=["true", "pred"],
                             axis=1)
    resid_df["residual"] = resid_df.apply(resid, axis=1)

    plt.scatter(resid_df["true"][:round(len(resid_df) / 30)],
                resid_df["residual"][:round(len(resid_df) / 30)],
                alpha=0.5)
    plt.savefig(f"plots/residplot-depth-{depth}.png")
    plt.clf()

    print('Single tree depth: ', depth)
    print('Accuracy, Training Set: ', round(accuracy_train * 100, 5), '%')
    print('Accuracy, Test Set: ', round(accuracy_test * 100, 5), '%')
    print('RMSE Tree, Test Set: ',
          sqrt(mean_squared_error(y_test, y_predictions)))

    if plot_tree:
        viz = dtreeviz(model,
                       X_train,
                       y_train,
                       fancy=False,
                       target_name='chance',
                       feature_names=feature_cols)

        viz.save("plots/plot.svg")
    return accuracy_train, accuracy_test
Beispiel #20
0
def produce_tree_visualization(tree, tree_index, x, y, target_name,
                               feature_names, class_names, model_uid):
    """
    Produces visualization of a decision tree from an ensemble.

    :param tree: tree model
    :param tree_index: index of the tree in the ensemble
    :param x: predictor matrix
    :param y: target series
    :param target_name: name of the target
    :param feature_names: list of feature names
    :param class_names: name of the target classes
    :param model_uid: name of the model
    """
    viz = dtreeviz(tree.estimators_[tree_index],
                   x,
                   y,
                   target_name=target_name,
                   feature_names=feature_names,
                   class_names=class_names)
    viz.save(
        os.path.join('modeling', model_uid, 'diagnostics', 'trees',
                     f'decision_tree_{tree_index}.svg'))
Beispiel #21
0
def create_tree(df, max_depth, target='admitted', **kwargs):

    X = df.drop(columns=[target])
    y = df[target]

    tree = DecisionTreeClassifier(
        max_depth=max_depth,
        **kwargs,
    )

    tree.fit(X, y)

    rep = report(y, tree.predict(X))

    viz = dtreeviz(
        tree,
        X,
        y,
        target_name='admitted',
        feature_names=X.columns,
        class_names=['no', 'yes'],
    )

    return tree, rep, viz
Beispiel #22
0
def describe_hpo(gs, X, Y, log, plot_all):
    predictor_names = X.columns.values.tolist()
    log += print_and_log(
        "\n----------------------------------------------------------------------------"
    )
    log += print_and_log("Available predictor variables:")
    for p in predictor_names:
        log += print_and_log("\t{}".format(p))

    log += print_and_log("\nBest parameters set found on development set:")
    for bestpar_name, bestpar_value in gs.best_params_.items():
        log += print_and_log("\t{}: {}".format(bestpar_name, bestpar_value))

    log += print_and_log("\nBest estimator:")
    best_estimator = gs.best_estimator_._final_estimator
    log += print_and_log(best_estimator)
    log += print_and_log(
        "----------------------------------------------------------------------------"
    )

    # Export tree SVG
    if plot_all:
        from dtreeviz.trees import dtreeviz

        log += print_and_log("\nExport tree to SVG:")
        viz = dtreeviz(
            best_estimator,
            X.values,
            Y.values.ravel(),
            target_name="perf",
            feature_names=predictor_names,
        )
        viz.save("trytree.svg")
        viz.view()

    return log
Beispiel #23
0
def viz_wine(orientation="TD",
             max_depth=3,
             random_state=666,
             fancy=True,
             pickX=False):
    clf = tree.DecisionTreeClassifier(max_depth=max_depth)
    wine = load_wine()

    X_train = wine.data
    y_train = wine.target
    clf.fit(X_train, y_train)

    X = None
    if pickX:
        X = X_train[np.random.randint(0, len(X_train.data)), :]

    viz = dtreeviz(clf,
                   wine.data,
                   wine.target,
                   target_name='wine',
                   feature_names=wine.feature_names,
                   class_names=list(wine.target_names),
                   X=X)  # pass the test observation
    return viz
 def interpret(self,
               X,
               y,
               model_file_path,
               learner_name,
               target_name=None,
               class_names=None):
     try:
         if len(class_names) > 10:
             # dtreeviz does not support more than 10 classes
             return
         viz = dtreeviz(
             self.model,
             X,
             y,
             target_name="target",
             feature_names=X.columns,
             class_names=class_names,
         )
         self._tree_file_plot = os.path.join(model_file_path,
                                             learner_name + "_tree.svg")
         viz.save(self._tree_file_plot)
     except Exception as e:
         self._tree_file_plot = None
Beispiel #25
0
from dtreeviz.trees import dtreeviz
from sklearn import tree
from sklearn.datasets import load_wine

wine = load_wine()
classifier = tree.DecisionTreeClassifier(max_depth=2)
classifier.fit(wine.data, wine.target)

vis = dtreeviz(
    classifier,
    wine.data,
    wine.target,
    target_name="wine_type",
    feature_names=wine.feature_names,
)

vis.view()
#     .set_axis(['X0', 'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10', 'X11',
#                'X12', 'X13', 'X14', 'X15', 'X16', 'X17', 'X18', 'X19', 'X20',
#                'X21', 'X22', 'X23', 'Y'], axis=1)\
#     .to_csv('csv/accomodation_data.csv')

# 4 決定木によるモデル構築 --------------------------------------------------------

# モデル構築
# --- 決定木による分類器
clf = DecisionTreeClassifier(max_depth=2)
clf = clf.fit(X=features, y=data_o)

# 確認
vars(clf)

# 5 決定木の可視化 ---------------------------------------------------------------

# indexの抽出
time_index = df_info.resample('M').count().index

# 決定木を描画
viz = dtreeviz(
    clf,
    features,
    data_o,
    target_name='Class',
    feature_names=time_index,
    class_names=['False', 'True'],
)
viz
Beispiel #27
0
#print representation
text_representation = tree.export_text(clf)
print(text_representation)
#save to a file
with open('decisiontree1.log','w') as fout :    fout.write(text_representation)

#plottree
tree.plot_tree(decision_tree=clf)

fig = plt.figure(figsize=(10,8))
_ = tree.plot_tree(clf, feature_names= iris.feature_names, class_names=iris.target_names, filled=True)  #see plot

import graphviz
dot_data = tree.export_graphviz(decision_tree=clf, out_file=None, feature_names = iris.feature_names, class_names = iris.target_names, filled=True)
graph = graphviz.Source(dot_data, format='png')
#error dot path
import os
os.environ["PATH"] += os.pathsep + 'c:/Program Files (x86)/Graphviz2.38/bin/'
graph = graphviz.Source(dot_data, format='png')
graph
graph.render('DecisionTree.png ')


#%%%Plot DT with dtreeviz
#pip install dtreeviz
from dtreeviz.trees import dtreeviz
viz = dtreeviz(clf, X,y, target_name='target', feature_names = iris.feature_names, class_names = list(iris.target_names))
viz
viz.save('dt2.svg')

Beispiel #28
0
import pandas as pd
from sys import argv
from dtreeviz.trees import dtreeviz
from sklearn.metrics import f1_score
from sklearn.tree import DecisionTreeClassifier

data = pd.read_csv(argv[1])
cols = list(data.columns)
full = cols[1:-4]  # the first one is the date and the last four are the labels
labels = cols[-4]  # these are the raw labels (0, 1, 2)
names = []
for f in full:
    if f in argv or len(argv) < 3:
        names.append(f)
X = data[names]
y = data[labels]
print(names)

dt = DecisionTreeClassifier()
model = dt.fit(X, y)
v = dtreeviz(model,
             X,
             y,
             target_name='trend',
             feature_names=names,
             class_names=['decrease', 'stable', 'increase'])
v.save('dt.svg')
print(f1_score(y, dt.predict(X), average='weighted'))
#正答率を求める
pre1 = clf_result.predict(X_train)
ac_score1 = metrics.accuracy_score(y_train, pre1)
print("トレーニングデータ正答率 = ", ac_score1)
pre2 = clf_result.predict(X_test)
ac_score2 = metrics.accuracy_score(y_test, pre2)
print("テストデータ正答率 = ", ac_score2)

#重要度の可視化
features = []
for s in range(len(X.columns)):
    features.append(X.columns[s])

n_features = X.shape[1]
plt.barh(range(n_features), clf_result.feature_importances_, align="center")
plt.yticks(np.arange(n_features), features)
plt.xlabel("importance")
plt.ylabel("Feature value")
plt.savefig("barh.png")

#決定木の可視化
viz = dtreeviz(clf_result,
               X.values,
               y.values,
               target_name="variety",
               feature_names=features,
               class_names=["0", "1", "2", "3"])

viz.view()
# DOT data
dot_data = tree.export_graphviz(clf, out_file=None, 
                                feature_names=iris.feature_names,  
                                class_names=iris.target_names,
                                filled=True)

# Draw graph
graph = graphviz.Source(dot_data, format="png") 
graph
#graph.render("decision_tree_graphivz")

# Plot Decision Tree with dtreeviz Package
from dtreeviz.trees import dtreeviz # remember to load the package

viz = dtreeviz(clf, X, y,
                target_name="target",
                feature_names=iris.feature_names,
                class_names=list(iris.target_names))

viz


################## REGRESSION TASK #######################

# Visualizing the Decision Tree in Regression Task
from sklearn import datasets
from sklearn.tree import DecisionTreeRegressor
from sklearn import tree

# Prepare the data data
boston = datasets.load_boston()
X = boston.data