selected_features = regressor.backward_elimination(x_train=X_train,
                                                   y_train=Y_train,
                                                   verbose=True)

Y_predict = regressor.fit(
    x_train=X_train.iloc[:, selected_features],
    y_train=Y_train).predict(x_test=X_test.iloc[:, selected_features])

print("Goodness of Model (Adjusted R-Squared Score):", regressor.r_score())

# In[] 使用「均方差」(Mean Squared Error, MSE)比較兩個模型的好壞。MSE 小者勝
# 均方差定義 = SUM(Yi-Yh)^2/n
# 亦有人將「均方差」再開根號,得到「均方差根」(Root of MSE, RMSE)後,才來比較
from HappyML.performance import rmse

rmse_linear = rmse(Y_test, Y_pred_simple)
rmse_multi = rmse(Y_test, Y_predict)

if rmse_linear < rmse_multi:
    print(
        "RMSE Linear:{:.4f} < RMSE Multi-Linear:{:.4f}...Linear smaller, WIN!!"
        .format(rmse_linear, rmse_multi))
else:
    print(
        "RMSE Linear:{:.4f} > RMSE Multi-Linear:{:.4f}...Multi-Linear smaller, WIN!!"
        .format(rmse_linear, rmse_multi))

# In[] Check for Assumption of Regression
from HappyML.criteria import AssumptionChecker

checker = AssumptionChecker(X_train.iloc[:, selected_features],
Example #2
0
@author: henry
"""

import HappyML.preprocessor as pp
from HappyML.regression import PolynomialRegressor
import pandas as pd
import HappyML.model_drawer as md
from HappyML.performance import rmse

dataset = pp.dataset("Device_Failure.csv")
X, Y = pp.decomposition(dataset, x_columns=[0], y_columns=[1])

X_train, X_test, Y_train, Y_test = pp.split_train_test(x_ary=X,
                                                       y_ary=Y,
                                                       train_size=0.75)

reg_poly = PolynomialRegressor()
reg_poly.best_degree(x_train=X_train,
                     y_train=Y_train,
                     x_test=X_test,
                     y_test=Y_test)
Y_poly = reg_poly.fit(x_train=X, y_train=Y).predict(x_test=X)

years = float(input("請輸入設備已使用年份:"))
hours_pred = reg_poly.predict(pd.DataFrame([[years]])).iloc[0, 0]
print("您的設備預測總失效時間 =", "{:.4f}".format(hours_pred), "小時")
print("平均每年失效時間 =", "{:.4f}".format(hours_pred / years), "小時/年")

md.sample_model(sample_data=(X, Y), model_data=(X, Y_poly))
print("Degree =", reg_poly.degree, "RMSE =", "{:.4f}".format(rmse(Y, Y_poly)))
Example #3
0
import pandas as pd

work_years = float(input("請輸入對方的年資:"))
request_salary = float(input("請輸入對方要求的月薪:"))
salary_pred = reg_poly.predict(pd.DataFrame([[work_years]])).iloc[0, 0]
salary_lower = reg_poly.predict(pd.DataFrame([[math.floor(work_years)]
                                              ])).iloc[0, 0]
salary_upper = reg_poly.predict(pd.DataFrame([[math.ceil(work_years)]
                                              ])).iloc[0, 0]
print("合理的月薪推測在 {:.2f} 左右".format(salary_pred))
print("範圍落在 {:.2f}~{:.2f} 之間".format(salary_lower, salary_upper))

# In[] Comparing the two models
from HappyML.performance import rmse

rmse_linear = rmse(Y, Y_simple)
rmse_poly = rmse(Y, Y_poly)

if rmse_linear < rmse_poly:
    print(
        "RMSE Linear:{:.4f} < RMSE Polynomial:{:.4f}...Linear smaller, WIN!!".
        format(rmse_linear, rmse_poly))
else:
    print(
        "RMSE Linear:{:.4f} > RMSE Polynomial:{:.4f}...Polynomial smaller, WIN!!"
        .format(rmse_linear, rmse_poly))

# In[] Check for Assumption of Linear Regression

from HappyML.criteria import AssumptionChecker
Example #4
0
# In[]
from sklearn.preprocessing import PolynomialFeatures

deg = 12
poly_reg = PolynomialFeatures(degree=deg)
X_poly = poly_reg.fit_transform(X)

# In[]
import pandas as pd
regressor = SimpleRegressor()
regressor.fit(X_poly, Y)
Y_predict = regressor.predict(x_test=pd.DataFrame(X_poly))

md.sample_model(sample_data=(X, Y), model_data=(X, Y_predict))

# In[]
from HappyML.performance import rmse

print("Degree: {} RMSE:{:.4f}".format(deg, rmse(Y, Y_predict)))

# In[]
from HappyML.performance import rmse

rmse_linear = rmse(Y, Y_simple)
rmse_poly = rmse(Y, Y_predict)

if rmse_linear < rmse_poly:
    print("RMSE Linear:{:.4f} < RMSE Polynomial:{:.4f}...Linear smaller, WIN!!".format(rmse_linear, rmse_poly))
else:
    print("RMSE Linear:{:.4f} > RMSE Polynomial:{:.4f}...Polynomial smaller, WIN!!".format(rmse_linear, rmse_poly))