def display_decision_tree(train_x, valid_x, train_y, valid_y): print('(9) Display Decision Tree\n') fullClassTree = DecisionTreeClassifier(max_depth=4, random_state=1) fullClassTree.fit(train_x, train_y) plotDecisionTree(fullClassTree, feature_names=train_x.columns) prediction_train = fullClassTree.predict( train_x) #use the DT model to predict on the training data prediction_valid = fullClassTree.predict( valid_x) #use the DT model to predict on the validation data print('precision on test is:', precision_score(valid_y, prediction_valid)) print('recall on test is:', recall_score(valid_y, prediction_valid)) print('f1 on test is:', f1_score(valid_y, prediction_valid)) print('Logistic Regression:Accuracy on train is:', accuracy_score(train_y, prediction_train)) print('Logistic Regression:Accuracy on test is:', accuracy_score(valid_y, prediction_valid), '\n') importances = fullClassTree.feature_importances_ important_df = pd.DataFrame({ 'feature': train_x.columns, 'importance': importances }) #,'std':std}) important_df = important_df.sort_values('importance', ascending=False) print(important_df)
print("Precision_score train is:",precision_score(train_y,lr_prediction_train)) print("Precision_score on test is:",precision_score(valid_y,lr_prediction_valid)) print("Recall_score on train is:",recall_score(train_y,lr_prediction_train)) print("Recall_score on test is:",recall_score(valid_y,lr_prediction_valid)) print("f1_score on train is:",f1_score(train_y,lr_prediction_train)) print("f1_score on test is:",f1_score(valid_y,lr_prediction_valid)) """ Decision Tree """ # apply the decision tree model DecisionTree = DecisionTreeClassifier(max_depth = 4) DecisionTree.fit(train_X, train_y) plotDecisionTree(DecisionTree, feature_names=train_X.columns) importances = DecisionTree.feature_importances_ im = pd.DataFrame({'feature': train_X.columns, 'importance': importances}) im = im.sort_values('importance',ascending=False) print(im) dt_prediction_train = DecisionTree.predict(train_X) dt_prediction_valid = DecisionTree.predict(valid_X) print("Accuracy score on train is:",accuracy_score(train_y,dt_prediction_train)) print("Accuracy score on test is:",accuracy_score(valid_y,dt_prediction_valid)) print("Precision score on train is:",precision_score(train_y,dt_prediction_train)) print("Precision score on test is:",precision_score(valid_y,dt_prediction_valid)) print("Recall score on train is:",recall_score(train_y,dt_prediction_train))
# The package _scikit-learn_ has the class `DecisionTreeClassifier` to build a decision tree model. The function `plotDecisionTree` from the _dmba_ package can be used to visualize the tree. loan3000 = pd.read_csv(LOAN3000_CSV) predictors = ['borrower_score', 'payment_inc_ratio'] outcome = 'outcome' X = loan3000[predictors] y = loan3000[outcome] loan_tree = DecisionTreeClassifier(random_state=1, criterion='entropy', min_impurity_decrease=0.003) loan_tree.fit(X, y) plotDecisionTree(loan_tree, feature_names=predictors, class_names=loan_tree.classes_) print(textDecisionTree(loan_tree)) ### The Recursive Partitioning Algorithm fig, ax = plt.subplots(figsize=(6, 4)) loan3000.loc[loan3000.outcome == 'paid off'].plot(x='borrower_score', y='payment_inc_ratio', style='.', markerfacecolor='none', markeredgecolor='C1', ax=ax) loan3000.loc[loan3000.outcome == 'default'].plot(x='borrower_score',
18, 20, ], } gridSearch = GridSearchCV(DecisionTreeRegressor(), param_grid, cv=5, n_jobs=-1) gridSearch.fit(train_X, train_y) print('Improved parameters: ', gridSearch.best_params_) regTree = gridSearch.best_estimator_ regressionSummary(train_y, regTree.predict(train_X)) regressionSummary(valid_y, regTree.predict(valid_X)) #plot reg tree plotDecisionTree(regTree, feature_names=train_X.columns) plotDecisionTree(regTree, feature_names=train_X.columns, rotate=True) # In[48]: #Classification Tree retail = pd.read_csv('retailsales1.csv') predictors = ['inventorygrowth', 'populationgrowth'] outcome = 'yoygtenp' X = pd.get_dummies(retail[predictors], drop_first=True) y = retail[outcome] train_X, valid_X, train_y, valid_y = train_test_split(X, y,