y_train=Y_train).predict(x_test=X_test.iloc[:, selected_features]) print("Goodness of Model (Adjusted R-Squared Score):", regressor.r_score()) # In[] 使用「均方差」(Mean Squared Error, MSE)比較兩個模型的好壞。MSE 小者勝 # 均方差定義 = SUM(Yi-Yh)^2/n # 亦有人將「均方差」再開根號,得到「均方差根」(Root of MSE, RMSE)後,才來比較 from HappyML.performance import rmse rmse_linear = rmse(Y_test, Y_pred_simple) rmse_multi = rmse(Y_test, Y_predict) if rmse_linear < rmse_multi: print( "RMSE Linear:{:.4f} < RMSE Multi-Linear:{:.4f}...Linear smaller, WIN!!" .format(rmse_linear, rmse_multi)) else: print( "RMSE Linear:{:.4f} > RMSE Multi-Linear:{:.4f}...Multi-Linear smaller, WIN!!" .format(rmse_linear, rmse_multi)) # In[] Check for Assumption of Regression from HappyML.criteria import AssumptionChecker checker = AssumptionChecker(X_train.iloc[:, selected_features], X_test.iloc[:, selected_features], Y_train, Y_test, Y_predict) checker.y_lim = (-4, 4) checker.heatmap = True checker.check_all()
request_salary = float(input("請輸入對方要求的月薪:")) salary_pred = reg_poly.predict(pd.DataFrame([[work_years]])).iloc[0, 0] salary_lower = reg_poly.predict(pd.DataFrame([[math.floor(work_years)] ])).iloc[0, 0] salary_upper = reg_poly.predict(pd.DataFrame([[math.ceil(work_years)] ])).iloc[0, 0] print("合理的月薪推測在 {:.2f} 左右".format(salary_pred)) print("範圍落在 {:.2f}~{:.2f} 之間".format(salary_lower, salary_upper)) # In[] Comparing the two models from HappyML.performance import rmse rmse_linear = rmse(Y, Y_simple) rmse_poly = rmse(Y, Y_poly) if rmse_linear < rmse_poly: print( "RMSE Linear:{:.4f} < RMSE Polynomial:{:.4f}...Linear smaller, WIN!!". format(rmse_linear, rmse_poly)) else: print( "RMSE Linear:{:.4f} > RMSE Polynomial:{:.4f}...Polynomial smaller, WIN!!" .format(rmse_linear, rmse_poly)) # In[] Check for Assumption of Linear Regression from HappyML.criteria import AssumptionChecker checker = AssumptionChecker(X, X, Y, Y, Y_poly) checker.check_all()
title="訓練集樣本點 vs. 預測模型", font="DFKai-sb") md.sample_model(sample_data=(X_test, Y_test), model_data=(X_test, Y_pred), title="測試集樣本點 vs. 預測模型", font="DFKai-sb") # In[] Test for Linearity of Features #from HappyML import model_drawer as md # #for i in range(X_train.shape[1]): # md.sample_model(sample_data=(X_train[:, i], Y_train), model_data=None, title="Linearity of Column {}".format(i)) from HappyML.criteria import AssumptionChecker checker = AssumptionChecker(X_train, X_test, Y_train, Y_test, Y_pred) checker.sample_linearity() # In[] Test for Normal Distribution of Residuals #import scipy.stats as stats #import matplotlib.pyplot as plt # #residuals = (Y_test - Y_pred).ravel() #stats.probplot(residuals, plot=plt) #plt.show() checker.residuals_normality() # In[] Check for Errpr Uncorrelated #from statsmodels.stats.stattools import durbin_watson #
X_test = smtools.add_constant(X_test) X_opttest = X_test.iloc[:, features] Y_predmulti = regressor_OLS.predict(X_opttest) # In[] from sklearn.metrics import mean_squared_error import numpy as np rmse_linear = np.sqrt(mean_squared_error(Y_test, Y_pred_simple)) rmse_multi = np.sqrt(mean_squared_error(Y_test, Y_predmulti)) if rmse_linear < rmse_multi: print( "RMSE Linear:{:.4f} < RMSE Multi-Linear:{:.4f}...Linear smaller, WIN!!" .format(rmse_linear, rmse_multi)) else: print( "RMSE Linear:{:.4f} > RMSE Multi-Linear:{:.4f}...Multi-Linear smaller, WIN!!" .format(rmse_linear, rmse_multi)) # In[] from HappyML.criteria import AssumptionChecker checker = AssumptionChecker(X_train.iloc[:, features], X_test.iloc[:, features], Y_train, Y_test, Y_predmulti) checker.y_lim = (-4, 4) checker.heatmap = True checker.check_all()
#recalls = cross_val_score(estimator=classifier.classifier, X=X, y=Y.values.ravel(), scoring="recall", cv=k_fold, n_jobs=-1) #print("{} Folds Mean Recall: {}".format(k_fold, recalls.mean())) # #precisions = cross_val_score(estimator=classifier.classifier, X=X, y=Y.values.ravel(), scoring="precision", cv=k_fold, n_jobs=-1) #print("{} Folds Mean Precision: {}".format(k_fold, precisions.mean())) # #f_scores = cross_val_score(estimator=classifier.classifier, X=X, y=Y.values.ravel(), scoring="f1", cv=k_fold, n_jobs=-1) #print("{} Folds Mean F1-Score: {}".format(k_fold, f_scores.mean())) from HappyML.performance import KFoldClassificationPerformance K = 10 kfp = KFoldClassificationPerformance(x_ary=X, y_ary=Y, classifier=classifier.classifier, k_fold=K, verbose=False) print("{} Folds Mean Accuracy: {}".format(K, kfp.accuracy())) print("{} Folds Mean Recall: {}".format(K, kfp.recall())) print("{} Folds Mean Precision: {}".format(K, kfp.precision())) print("{} Folds Mean F1-Score: {}".format(K, kfp.f_score())) # In[] Visualization import HappyML.model_drawer as md md.classify_result(x=X_train, y=Y_train, classifier=classifier, title="訓練集 vs. 模型", font='DFKai-sb') md.classify_result(x=X_test, y=Y_test, classifier=classifier, title="測試集 vs. 模型", font='DFKai-sb') # In[] Check for Variables Independence from HappyML.criteria import AssumptionChecker checker = AssumptionChecker(x_train=X_train, x_test=X_test, y_train=Y_train, y_test=Y_test, y_pred=Y_pred) checker.features_correlation(heatmap=True)
X_train, X_test, Y_train, Y_test = pp.split_train_test(x_ary=X, y_ary=Y) X_train, X_test = pp.feature_scaling(fit_ary=X_train, transform_arys=(X_train, X_test)) classifier = NaiveBayesClassifier() Y_pred = classifier.fit(X_train, Y_train).predict(X_test) K = 10 Kfp = KFoldClassificationPerformance(X, Y, classifier.classifier, K) print("{} Folds Mean Accuracy: {}".format(K, Kfp.accuracy())) print("{} Folds Mean Recall: {}".format(K, Kfp.recall())) print("{} Folds Mean Precision: {}".format(K, Kfp.precision())) print("{} Folds Mean F1-Score: {}".format(K, Kfp.f_score())) checker = AssumptionChecker(X_train, X_test, Y_train, Y_test, Y_pred) checker.features_correlation() selector = pp.KBestSelector(best_k=2) X = selector.fit(X, Y, False, True).transform(X) X_train = selector.transform(X_train) X_test = selector.transform(X_test) X_train, X_test = pp.feature_scaling(fit_ary=X_train, transform_arys=(X_train, X_test)) classifier = NaiveBayesClassifier() Y_pred = classifier.fit(X_train, Y_train).predict(X_test) md.classify_result(x=X_train, y=Y_train, classifier=classifier, title="訓練集 vs. 模型", font="Microsoft JhengHei") md.classify_result(x=X_test, y=Y_test, classifier=classifier, title="測試集 vs. 模型", font="Microsoft JhengHei")