selected_features = regressor.backward_elimination(x_train=X_train, y_train=Y_train, verbose=True) Y_predict = regressor.fit( x_train=X_train.iloc[:, selected_features], y_train=Y_train).predict(x_test=X_test.iloc[:, selected_features]) print("Goodness of Model (Adjusted R-Squared Score):", regressor.r_score()) # In[] 使用「均方差」(Mean Squared Error, MSE)比較兩個模型的好壞。MSE 小者勝 # 均方差定義 = SUM(Yi-Yh)^2/n # 亦有人將「均方差」再開根號,得到「均方差根」(Root of MSE, RMSE)後,才來比較 from HappyML.performance import rmse rmse_linear = rmse(Y_test, Y_pred_simple) rmse_multi = rmse(Y_test, Y_predict) if rmse_linear < rmse_multi: print( "RMSE Linear:{:.4f} < RMSE Multi-Linear:{:.4f}...Linear smaller, WIN!!" .format(rmse_linear, rmse_multi)) else: print( "RMSE Linear:{:.4f} > RMSE Multi-Linear:{:.4f}...Multi-Linear smaller, WIN!!" .format(rmse_linear, rmse_multi)) # In[] Check for Assumption of Regression from HappyML.criteria import AssumptionChecker checker = AssumptionChecker(X_train.iloc[:, selected_features],
@author: henry """ import HappyML.preprocessor as pp from HappyML.regression import PolynomialRegressor import pandas as pd import HappyML.model_drawer as md from HappyML.performance import rmse dataset = pp.dataset("Device_Failure.csv") X, Y = pp.decomposition(dataset, x_columns=[0], y_columns=[1]) X_train, X_test, Y_train, Y_test = pp.split_train_test(x_ary=X, y_ary=Y, train_size=0.75) reg_poly = PolynomialRegressor() reg_poly.best_degree(x_train=X_train, y_train=Y_train, x_test=X_test, y_test=Y_test) Y_poly = reg_poly.fit(x_train=X, y_train=Y).predict(x_test=X) years = float(input("請輸入設備已使用年份:")) hours_pred = reg_poly.predict(pd.DataFrame([[years]])).iloc[0, 0] print("您的設備預測總失效時間 =", "{:.4f}".format(hours_pred), "小時") print("平均每年失效時間 =", "{:.4f}".format(hours_pred / years), "小時/年") md.sample_model(sample_data=(X, Y), model_data=(X, Y_poly)) print("Degree =", reg_poly.degree, "RMSE =", "{:.4f}".format(rmse(Y, Y_poly)))
import pandas as pd work_years = float(input("請輸入對方的年資:")) request_salary = float(input("請輸入對方要求的月薪:")) salary_pred = reg_poly.predict(pd.DataFrame([[work_years]])).iloc[0, 0] salary_lower = reg_poly.predict(pd.DataFrame([[math.floor(work_years)] ])).iloc[0, 0] salary_upper = reg_poly.predict(pd.DataFrame([[math.ceil(work_years)] ])).iloc[0, 0] print("合理的月薪推測在 {:.2f} 左右".format(salary_pred)) print("範圍落在 {:.2f}~{:.2f} 之間".format(salary_lower, salary_upper)) # In[] Comparing the two models from HappyML.performance import rmse rmse_linear = rmse(Y, Y_simple) rmse_poly = rmse(Y, Y_poly) if rmse_linear < rmse_poly: print( "RMSE Linear:{:.4f} < RMSE Polynomial:{:.4f}...Linear smaller, WIN!!". format(rmse_linear, rmse_poly)) else: print( "RMSE Linear:{:.4f} > RMSE Polynomial:{:.4f}...Polynomial smaller, WIN!!" .format(rmse_linear, rmse_poly)) # In[] Check for Assumption of Linear Regression from HappyML.criteria import AssumptionChecker
# In[] from sklearn.preprocessing import PolynomialFeatures deg = 12 poly_reg = PolynomialFeatures(degree=deg) X_poly = poly_reg.fit_transform(X) # In[] import pandas as pd regressor = SimpleRegressor() regressor.fit(X_poly, Y) Y_predict = regressor.predict(x_test=pd.DataFrame(X_poly)) md.sample_model(sample_data=(X, Y), model_data=(X, Y_predict)) # In[] from HappyML.performance import rmse print("Degree: {} RMSE:{:.4f}".format(deg, rmse(Y, Y_predict))) # In[] from HappyML.performance import rmse rmse_linear = rmse(Y, Y_simple) rmse_poly = rmse(Y, Y_predict) if rmse_linear < rmse_poly: print("RMSE Linear:{:.4f} < RMSE Polynomial:{:.4f}...Linear smaller, WIN!!".format(rmse_linear, rmse_poly)) else: print("RMSE Linear:{:.4f} > RMSE Polynomial:{:.4f}...Polynomial smaller, WIN!!".format(rmse_linear, rmse_poly))