def MELHOR_RESULTADO_MG(): X = dataSet[:, 0:4] y = dataSet[:, 4] param = { 'learning_rate': [0.05], 'max_depth': [50], 'max_features': ['log2'], 'min_samples_leaf': [11], 'n_estimators': [60] } X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=9) model = GradientBoostingRegressor(random_state=0) grid = GridSearchCV(model, param, cv=10, verbose=0, n_jobs=-1, scoring='r2', iid=True) grid.fit(X_train, y_train) best_model = grid.best_estimator_ print("Cross Validation R2 Score :", grid.best_score_) print(grid.best_params_) pltResults(best_model, 1, X_train, X_test, y_train, y_test) #Conluido - Best Score: 0.73465 Semente: 9 #Best Params {'learning_rate': 0.05, 'max_depth': 50, 'max_features': 'log2', 'min_samples_leaf': 11, 'n_estimators': 60, 'random_state': 0} #Best Score 0.7346493872397788 #Best Seed 9 #R2 Test: 0.7641468991808511 MSE Test: 0.0037531633086605658 #R2 Train: 0.8748115698686528 MSE Train: 0.003076693813010081 MELHOR_RESULTADO_MG()
def MELHOR_RESULTADO_NA(): X = dataSet[:, 0:4] y = dataSet[:, 5] param = { 'learning_rate': [0.05], 'max_depth': [20], 'max_features': ['log2'], 'min_samples_leaf': [5], 'n_estimators': [50], } X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1) model = GradientBoostingRegressor(random_state=0) grid = GridSearchCV(model, param, cv=10, verbose=0, n_jobs=-1, scoring='r2', iid=True) grid.fit(X_train, y_train) best_model = grid.best_estimator_ print("Cross Validation R2 Score :", grid.best_score_) pltResults(best_model, 1, X_train, X_test, y_train, y_test) #Conluido - Best Score: 0.73448 Semente: 9 #Best Params {'learning_rate': 0.05, 'max_depth': 10, 'max_features': 2, 'min_samples_leaf': 5, 'n_estimators': 50, 'random_state': 0} #Best Score 0.734480868500437 MELHOR_RESULTADO_NA()
def MELHOR_RESULTADO_K(): X = dataSet[:, 0:4] y = dataSet[:, 6] param = { 'bootstrap': [True], 'max_depth': [10], 'max_features': ['log2'], 'min_samples_leaf': [2], 'n_estimators': [60] } X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=3) model = RandomForestRegressor(random_state=0) grid = GridSearchCV(model, param, cv=10, verbose=0, n_jobs=-1, scoring='r2', iid=True) grid.fit(X_train, y_train) best_model = grid.best_estimator_ print("Cross Validation R2 Score :", grid.best_score_) print(grid.best_params_) pltResults(best_model, 0, X_train, X_test, y_train, y_test)
def MELHOR_RESULTADO_NA(): X = dataSet[:, 0:4] y = dataSet[:, 5] param = { 'bootstrap': [True], 'max_depth': [10], 'max_features': ['log2'], 'min_samples_leaf': [2], 'n_estimators': [100] } X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2) model = RandomForestRegressor(random_state=0) grid = GridSearchCV(model, param, cv=10, verbose=0, n_jobs=-1, scoring='r2', iid=True) grid.fit(X_train, y_train) best_model = grid.best_estimator_ print("Cross Validation R2 Score :", grid.best_score_) print(grid.best_params_) pltResults(best_model, 0, X_train, X_test, y_train, y_test) #Conluido - Best Score: 0.61644 Semente: 2 #Best Params {'bootstrap': True, 'max_depth': 10, 'max_features': 'log2', 'min_samples_leaf': 2, 'n_estimators': 100} #Best Score 0.6164423932855161 #Best Seed 2 #R2 Test: 0.6470743582563896 MSE Test: 0.011315230193482718 #R2 Train: 0.8844389141031836 MSE Train: 0.002520229138039719 MELHOR_RESULTADO_NA()
def MELHOR_RESULTADO_MG(): X = dataSet[:, 0:4] y = dataSet[:, 4] param = { 'bootstrap': [True], 'max_depth': [20], 'max_features': ['auto'], 'min_samples_leaf': [4], 'n_estimators': [60] } X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=9) model = RandomForestRegressor(random_state=0) grid = GridSearchCV(model, param, cv=10, verbose=0, n_jobs=-1, scoring='r2', iid=True) grid.fit(X_train, y_train) best_model = grid.best_estimator_ print("Cross Validation R2 Score :", grid.best_score_) print(grid.best_params_) pltResults(best_model, 3, X_train, X_test, y_train, y_test) #Conluido - Best Score: 0.73366 Semente: 9 #Best Params {'bootstrap': True, 'max_depth': 20, 'max_features': 'auto', 'min_samples_leaf': 4, 'n_estimators': 60} #Best Score 0.7336590449066758 #Best Seed 9 #R2 Test: 0.7158820898963456 MSE Test: 0.00452120795457348 #R2 Train: 0.8717629515241512 MSE Train: 0.0031516181905177796 MELHOR_RESULTADO_MG()
def MELHOR_RESULTADO_K(): X = dataSet[:,0:4] y = dataSet[:,6] param ={'algorithm': ['brute'], 'n_neighbors': [8], 'weights': ['distance']} X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state=4) model = KNeighborsRegressor() grid = GridSearchCV(model,param, cv=10,verbose=0,n_jobs=-1,scoring='r2',iid=True) grid.fit(X_train,y_train) best_model = grid.best_estimator_ print("Cross Validation R2 Score :",grid.best_score_) print(grid.best_params_) pltResults(best_model,3,X_train,X_test,y_train,y_test) #Conluido - Best Score: 0.67596 Semente: 4 #Best Params {'algorithm': 'brute', 'n_neighbors': 8, 'weights': 'distance'} #Best Score 0.6759638656720487 #Best Seed 4 #R2 Test: 0.8262489727826184 MSE Test: 0.0030635365836426814 #R2 Train: 0.9999999999999659 MSE Train: 8.640127461660114e-16 MELHOR_RESULTADO_K()
def MELHOR_RESULTADO_NA(): X = dataSet[:,0:4] y = dataSet[:,5] param ={'algorithm': ['brute'], 'n_neighbors': [6], 'weights': ['distance']} X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state=9) model = KNeighborsRegressor() grid = GridSearchCV(model,param, cv=10,verbose=0,n_jobs=-1,scoring='r2',iid=True) grid.fit(X_train,y_train) best_model = grid.best_estimator_ print("Cross Validation R2 Score :",grid.best_score_) print(grid.best_params_) pltResults(best_model,3,X_train,X_test,y_train,y_test) #Conluido - Best Score: 0.62096 Semente: 9 #Best Params {'algorithm': 'brute', 'n_neighbors': 6, 'weights': 'distance'} #Best Score 0.6209552829789358 #Best Seed 9 #R2 Test: 0.7066316533521843 MSE Test: 0.009787010608152074 #R2 Train: 0.9999999999999105 MSE Train: 1.9219260943726335e-15 MELHOR_RESULTADO_NA()
def MELHOR_RESULTADO_MG(): X = dataSet[:,0:4] y = dataSet[:,4] param ={'algorithm': ['auto'], 'n_neighbors': [6], 'weights': ['distance']} X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state=5) model = KNeighborsRegressor() grid = GridSearchCV(model,param, cv=10,verbose=0,n_jobs=-1,scoring='r2',iid=True) grid.fit(X_train,y_train) best_model = grid.best_estimator_ print("Cross Validation R2 Score :",grid.best_score_) print(grid.best_params_) pltResults(best_model,3,X_train,X_test,y_train,y_test) #Conluido - Best Score: 0.69719 Semente: 4 #Best Params {'algorithm': 'auto', 'n_neighbors': 6, 'weights': 'distance'} #Best Score 0.6971875382557432 #Best Seed 4 #R2 Test: 0.8477948730251221 MSE Test: 0.0026071842742281586 #R2 Train: 1.0 MSE Train: 0.0 MELHOR_RESULTADO_MG()
#dataSet = loadMainDataSet() #Set features and target y_column = 2 X = dataSet[:,0:2] y = dataSet[:,2] best_model , best_params, best_score,best_seed = findBalancedDataSet(range(1,10),X,y,GridSearchCVKNeighborsRegressor) print("#Best Params",best_params) print("#Best Score",best_score) print("#Best Seed",best_seed) X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state=best_seed) pltResults(best_model,X.shape[1]-1,X_train,X_test,y_train,y_test) ########## MG ################# #Conluido - Best Score: 0.69719 Semente: 4 #Best Params {'algorithm': 'auto', 'n_neighbors': 6, 'weights': 'distance'} #Best Score 0.6971875382557432 #Best Seed 4 #R2 Test: 0.8477948730251221 MSE Test: 0.0026071842742281586 #R2 Train: 1.0 MSE Train: 0.0 def MELHOR_RESULTADO_MG(): X = dataSet[:,0:4] y = dataSet[:,4] param ={'algorithm': ['auto'], 'n_neighbors': [6], 'weights': ['distance']}
def MELHOR_RESULTADO_K(): X = dataSet[:, 0:4] y = dataSet[:, 6] param = { 'learning_rate': [0.05], 'max_depth': [30], 'max_features': ['log2'], 'min_samples_leaf': [4], 'n_estimators': [90], } X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=3) model = GradientBoostingRegressor(random_state=0) grid = GridSearchCV(model, param, cv=10, verbose=0, n_jobs=-1, scoring='r2', iid=True) grid.fit(X_train, y_train) best_model = grid.best_estimator_ print(grid.best_score_) print(grid.best_params_) pltResults(best_model, 0, X_train, X_test, y_train, y_test) #Conluido - Best Score: 0.63825 Semente: 3 #Best Params {'learning_rate': 0.05, 'max_depth': 30, 'max_features': 'log2', 'min_samples_leaf': 4, 'n_estimators': 90, 'random_state': 0} #Best Score 0.6382454854124988 #R2 Test: 0.6772838242726009 MSE Test: 0.0078083038481006 #R2 Train: 0.9892885578561049 MSE Train: 0.00025464662511601085 MELHOR_RESULTADO_K() ############### MG ########################## #Conluido - Best Score: 0.65662 Semente: 9 #Best Params {'learning_rate': 0.05, 'max_depth': 7, 'max_features': 'auto', 'min_samples_leaf': 5, 'n_estimators': 19, 'random_state': 0} #Best Score 0.6566162097362611 #Best Seed 9 #R2 Test: 0.6698401069997604 MSE Test: 0.005253880453960908 #R2 Train: 0.7738415101819238 MSE Train: 0.0055581847751659365 #Conluido - Best Score: 0.73203 Semente: 9 #Best Params {'learning_rate': 0.05, 'max_depth': 50, 'max_features': 'log2', 'min_samples_leaf': 5, 'n_estimators': 50, 'random_state': 0} #Best Score 0.7320304172385002 #Best Seed 9 #R2 Test: 0.6928402471363506 MSE Test: 0.004887876014100292 #R2 Train: 0.9412048892640388 MSE Train: 0.0014449782080243406 #Conluido - Best Score: 0.73465 Semente: 9 #Best Params {'learning_rate': 0.05, 'max_depth': 50, 'max_features': 'log2', 'min_samples_leaf': 11, 'n_estimators': 60, 'random_state': 0} #Best Score 0.7346493872397788 #Best Seed 9 #R2 Test: 0.7641468991808511 MSE Test: 0.0037531633086605658 #R2 Train: 0.8748115698686528 MSE Train: 0.003076693813010081 ############### Na ########################## #Conluido - Best Score: 0.59157 Semente: 2 #Best Params {'learning_rate': 0.05, 'max_depth': 50, 'max_features': 'log2', 'min_samples_leaf': 5, 'n_estimators': 50, 'random_state': 0} #Best Score 0.5915734707430875 #Best Seed 2 #R2 Test: 0.6002369455399681 MSE Test: 0.012816895257928434 #R2 Train: 0.9122383051241635 MSE Train: 0.0019139624633444869 #Conluido - Best Score: 0.59157 Semente: 2 #Best Params {'learning_rate': 0.05, 'max_depth': 20, 'max_features': 'log2', 'min_samples_leaf': 5, 'n_estimators': 50, 'random_state': 0} #Best Score 0.5915734707430875 #Best Seed 2 #R2 Test: 0.6002369455399681 MSE Test: 0.012816895257928434 #R2 Train: 0.9122383051241635 MSE Train: 0.0019139624633444869 #Conluido - Best Score: 0.59717 Semente: 2 #Best Params {'learning_rate': 0.05, 'max_depth': 5, 'max_features': 'log2', 'min_samples_leaf': 5, 'n_estimators': 50, 'random_state': 0} #Best Score 0.5971743288694583 #Best Seed 2 #R2 Test: 0.6153244736541088 MSE Test: 0.012333170548047006 #R2 Train: 0.8709102998267071 MSE Train: 0.002815269701498251 ############### K ############################# #Conluido - Best Score: 0.63825 Semente: 3 #Best Params {'learning_rate': 0.05, 'max_depth': 30, 'max_features': 'log2', 'min_samples_leaf': 4, 'n_estimators': 90, 'random_state': 0} #Best Score 0.6382454854124988 #Best Seed 3 #R2 Test: 0.6772838242726009 MSE Test: 0.0078083038481006 #R2 Train: 0.9892885578561049 MSE Train: 0.00025464662511601085