y = df['prognosis'] x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42) print("DecisionTree") dt = DecisionTreeClassifier() clf_dt = dt.fit(x_train, y_train) print("Acurracy: ", clf_dt.score(x_test, y_test)) print("cross result========") scores = cross_val_score(dt, x_test, y_test, cv=3) print(scores) print(scores.mean()) export_graphviz(dt, out_file='diseaseData/tree.dot', feature_names=cols) dt.__getstate__() importances = dt.feature_importances_ indices = np.argsort(importances)[::-1] # Print the feature ranking print("Feature ranking:") features = cols for f in range(5): print( "%d. feature %d - %s (%f)" % (f + 1, indices[f], features[indices[f]], importances[indices[f]])) export_graphviz(dt, out_file='diseaseData/tree-top5.dot', feature_names=cols, max_depth=5) Image(filename='tree-top5.png')