# std_scaler = StandardScaler() # std_scaler.fit(x_array) # x_array = std_scaler.transform(x_array) x_train, x_test, y_train, y_test = train_test_split(x_array, y_array, train_size=0.75, random_state=random_seed) print(np.shape(x_train)) print(np.shape(y_train)) predict_, rmse_, r2_score_ = [], [], [] for num, model in enumerate(models): print('\nmodel name:' + model_name[num]) model_start = time.time() model.fit(x_train, y_train) predict_.append(np.ravel(model.predict(x_test))) r2_score_.append(r2_score(y_test, model.predict(x_test))) rmse_.append(RMSE(y_test, model.predict(x_test))) model_end = time.time() model_tol = str_round(model_end - model_start) print('R^2= ' + str_round(r2_score(y_test, model.predict(x_test)), 3)) print('RMSE= ' + str_round(math.sqrt(mean_squared_error(y_test, model.predict(x_test))), 3)) print('model running time=' + model_tol + 's') # print(r2_score_) # print(rmse_) predict_ = np.array(predict_) print(predict_[:, 6]) y_predict = np.average(predict_, weights=model_weight, axis=0) print(y_predict) print(y_test) print(str_round(r2_score(y_test, y_predict), 3))
df.loc[i, 'soil_num'] = df.loc[i,'soil_num'][0:8] d_rows = df[df['soil_num'].duplicated(keep=False)] df.drop(d_rows.index,axis=0,inplace=True) g_item = d_rows.groupby('soil_num').mean() df = df.append(g_item,sort=False) df.sort_values(by='soil_num',inplace=True,ascending=False) df.drop(['soil_num'],axis=1,inplace=True) # print(df) X_array = np.array(df.loc[:,'point_1':'point_9']) Y_array = np.array(df.loc[:,'TOC':'WT']) # print(X_array) scaler = StandardScaler() scaler.fit(X_array) X_train = scaler.transform(X_array) for i in range(Y_array.shape[1]): Y_train = Y_array[:,i] rf = RandomForestRegressor(n_estimators=200, criterion='mse', max_depth=9, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features=1, max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, bootstrap=True, oob_score=True, n_jobs=None, random_state=1, verbose=0, warm_start=False) rf.fit(X_train,Y_train) print('R^2 =',rf.score(X_train,Y_train)) print('RMSE = ',RMSE(Y_train,rf.predict(X_train)))
min_weight_fraction_leaf=0.0, max_features=1, max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, bootstrap=True, oob_score=True, n_jobs=None, random_state=1, verbose=0, warm_start=False) # forest = ExtraTreesRegressor(n_estimators=100) # forest = MLPRegressor( # hidden_layer_sizes=(5, 3), activation='relu', solver='adam', alpha=0.0001, batch_size='auto', # learning_rate='constant', learning_rate_init=0.001, power_t=0.5, max_iter=15000, shuffle=True, # random_state=1, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, # early_stopping=False, beta_1=0.9, beta_2=0.999, epsilon=1e-08) forest.fit(x, y) ax.scatter(x_test[:, 0], y_test, color='teal', s=10, label='origin') ax.scatter(x_test[:, 0], forest.predict(x_test), color='chocolate', s=3, label='predict') ax.set_xticks([]) ax.text(0.54, 0.80, 'r^2=' + str(round(r2_score(y_test, forest.predict(x_test)), 3)) + '\nrmse=' + str(round(RMSE(y_test, forest.predict(x_test)), 3)) , transform=ax.transAxes, fontsize=8,bbox=dict(boxstyle="round,pad=0.2", fc="w", ec="gray", lw=1,alpha=0.5) ) ax.legend(fontsize=8, loc='upper left') n = n + 1 # pca = PCA(n_components=2) # pca = PCA(n_components='mle', copy=True, whiten=False) # # n_components 保留特征数,‘mle’为自动选择。copy将原始训练数据复制。whiten白化,使得每个特征具有相同方差 # pca.fit() # pca.fit_transform() # pca.transform() # pca.inverse_transform() # plt.savefig('D:\\Desktop\\PCA.jpg', dpi=400) plt.show()
] plt.figure('Normalize', figsize=(9.6, 5.4), dpi=100) # plt.suptitle('Normalize Methods', fontsize=30) def str_round(text, decimal=3): return str(round(text, decimal)) for num, method in enumerate(methods): start_time = time.time() method.fit(x_train, y_train) prediction = method.predict(x_test) r2 = str_round(r2_score(y_test, prediction), 3) rmse = str_round(RMSE(y_test, prediction), 3) plt.subplot(2, 4, num + 1) plt.title(method_name[num], fontsize=15) plt.scatter(x_label, y_test, c='chocolate', s=10, label='origin') plt.scatter(x_label, prediction, c='teal', s=3, label='predict') plt.text(0.13, 37, 'r^2=' + r2 + '\nrmse=' + rmse, fontsize=8, bbox=dict(boxstyle="round,pad=0.2", fc="w", ec="gray", lw=1, alpha=0.8)) plt.xticks([]), plt.yticks(fontsize=6)
p_x_test.append(nine_point_average(i)) p_x_test = np.array(p_x_test) clf1 = clf clf1.fit(x_train, y_train) clf2 = clf clf2.fit(p_x_train, y_train) plt.figure(figsize=(9.6, 5.4), dpi=100) plt.subplot(2, 1, 1) plt.title('origin') plt.scatter(x_test[:, 0], y_test, s=10, c='chocolate', label='origin') plt.scatter(x_test[:, 0], clf1.predict(x_test), s=3, c='teal', label='predict') plt.text( 2.75, 33, 'r^2=' + str(round(r2_score(y_test, clf1.predict(x_test)), 3)) + '\nrmse=' + str(round(RMSE(y_test, clf1.predict(x_test)), 3))) plt.legend(fontsize=8) plt.xticks([]), plt.yticks(fontsize=6) plt.subplot(2, 1, 2) plt.title('tranform') plt.scatter(p_x_test[:, 0], y_test, s=10, c='chocolate', label='origin') plt.scatter(p_x_test[:, 0], clf2.predict(p_x_test), s=3, c='teal', label='predict') plt.text( 2.75, 33, 'r^2=' + str(round(r2_score(y_test, clf2.predict(p_x_test)), 3)) + '\nrmse=' + str(round(RMSE(y_test, clf2.predict(p_x_test)), 3)))
''' plt.figure('Outlier Test', figsize=(9.6, 3.8), dpi=200) # plt.suptitle('Elliptic Envelope contamination', fontsize=20) for num, i in enumerate(list([0.01, 0.03, 0.05, 0.1, 0.2, 0.5])): cov = EllipticEnvelope(random_state=1, contamination=i) cov.fit(np.hstack([X_array, Y_array.reshape(-1, 1)])) index = cov.predict(np.hstack([X_array, Y_array.reshape(-1, 1)])) X_valid, X_invalid = error_wipe(X_array, index) Y_valid, Y_invalid = error_wipe(Y_array, index) reg1 = LinearRegression() reg1.fit(X_valid, Y_valid) reg2 = LinearRegression() reg2.fit(X_array, Y_array) reg1.rmse = RMSE(Y_valid,reg1.predict(X_valid)) reg2.rmse = RMSE(Y_array,reg2.predict(X_array)) print('reg1', reg1.score(X_valid, Y_valid)) print('reg2', reg2.score(X_array, Y_array)) print(all_np(index)) print(random.sample(range(100), 10)) plt.subplot(2, 3, num + 1) plt.title('contamination = ' + str(i), fontsize=10) plt.scatter(X_valid[:, 48], Y_valid, color='chocolate', s=10) plt.scatter(X_invalid[:, 48], Y_invalid, color='teal', s=15) plt.xticks([]), plt.yticks(fontsize=6) plt.text(0.16, 38, '原始R^2 = ' + str(round(reg2.score(X_array, Y_array), 3)) + '\n去除异常值后R^2 = ' + str( round(reg1.score(X_valid, Y_valid), 3)) + '\n原始RMSE=' + str(round(reg2.rmse,3)) + '\n去除异常值后RMSE=' + str(round(reg1.rmse,3)), fontsize=6) plt.savefig('D:\Desktop\hyper\EllipticEnvelope contamination.jpeg', dpi=400)
# plt.subplot(2, 3, num + 1) # plt.title('nu = ' + str(i), fontsize=10) # plt.scatter(X_valid[:, 48], Y_valid, color='chocolate', s=10) # plt.scatter(X_invalid[:, 48], Y_invalid, color='teal', s=15) # plt.xticks([]), plt.yticks(fontsize=6) # plt.text(0.16, 38, '原始R^2 = ' + str(round(reg2.score(X_array, Y_array), 3)) + '\n去除异常值后R^2 = ' + str( # round(reg1.score(X_valid, Y_valid), 3)) + '\n原始RMSE=' + str(round(reg2.rmse, 3)) + '\n去除异常值后RMSE=' + str( # round(reg1.rmse, 3)), fontsize=6) # # plt.savefig('D:\Desktop\hyper\OneClassSVM contamination.jpeg', dpi=400) # plt.show() one = OneClassSVM(kernel='rbf', degree=3, gamma='auto', coef0=0.0, tol=0.001, nu=0.3, shrinking=True, cache_size=200, verbose=False, max_iter=-1, random_state=1) one.fit(X_array) index = one.predict(X_array) print(all_np(index)) X_valid, X_invalid = error_wipe(X_array, index) Y_valid, Y_invalid = error_wipe(Y_array, index) reg = LinearRegression() reg.fit(X_valid,Y_valid) print(str(round(reg.score(X_valid,Y_valid),3))) print(str(round(RMSE(Y_valid,reg.predict(X_valid)),3))) plt.figure('OneClassSVM',figsize=(4.8,5.4),dpi=100) plt.title('OneClassSVM',fontsize=20) plt.scatter(X_array[:,48],Y_array,color='teal', s=10) plt.scatter(X_valid[:,48],Y_valid,color='chocolate', s=15) plt.savefig('D:\Desktop\hyper\OneClassSVM.jpg',dpi=100) plt.show()
print(name) try: regressor.fit(x_train, y_train) except ValueError: p_x_train, p_x_test, p_y_train, p_y_test = continuous_to_multiclass( x_train, x_test, y_train, y_test) regressor.fit(p_x_train, p_y_train) error = False try: score = regressor.score(p_x_train, p_y_train) except AttributeError: score = False print(AttributeError) try: rmse = RMSE(p_y_test, regressor.predict(p_x_test)) except AttributeError: rmse = False print(AttributeError) else: error = True try: score = regressor.score(x_train, y_train) except AttributeError: score = False print(AttributeError) try: rmse = RMSE(y_test, regressor.predict(x_test)) except AttributeError: rmse = False