def regtree(par, *data): print(par) X_train, X_test, Y_train, Y_test = data regTreeModel=tree.DecisionTreeRegressor\ (max_features=par[0],min_samples_split=par[1],min_samples_leaf=par[2], min_weight_fraction_leaf=par[3],max_leaf_nodes=int(par[4])) fitModel = linear_model.LinearRegression() Yp,Yptrain,regTreeModel,fitModelList,predind=\ SSRS.RegressionTree(X_train,X_test,Y_train,Y_test,regTreeModel,fitModel,Field, doFitSelection=0,doMultiBand=1) rmse, rmse_band = SSRS.RMSECal(Yp, Y_test) print("RMSE=%f" % rmse) return rmse
def testModel(predListTest): nmodel = predListTest.__len__() nit = 50 # test 1: different size of training and test testErr = np.ones([nmodel, 5, 50]) testsize = [0.2, 0.4, 0.5, 0.6, 0.7] for k in range(0, nit): print(k) for j in range(0, nmodel): for i in range(0, 5): ind = range(0, nind) X_train,X_test,Y_train,Y_test,ind_train,ind_test = \ cross_validation.train_test_split(Xn,dist,ind,test_size=testsize[i],random_state=k) regTreeModel = tree.DecisionTreeRegressor(max_leaf_nodes=20, min_samples_leaf=20) fitModel = linear_model.LinearRegression() predSel = predListTest[j] predName = [Field[jj] for jj in predSel] Yp,Yptrain,regTreeModel,fitModelList,predind=\ SSRS.RegressionTree(X_train[:,predSel],X_test[:,predSel],Y_train,Y_test,regTreeModel,fitModel,predName, doFitSelection=0,doMultiBand=1) rmse, rmse_band = SSRS.RMSECal(Yp, Y_test) testErr[j, i, k] = rmse # test 2: use 1 HUC2 as test testErr1_huc2_rt = np.ones([nmodel, 18]) trainErr1_huc2_rt = np.ones([nmodel, 18]) IDhucfile = r"E:\work\SSRS\data\IDhuc_mb_4949.mat" mat = sio.loadmat(IDhucfile) IDhuc = mat["IDhuc"] huc2 = IDhuc[indvalid, 1] for k in range(0, nit): print(k) for i in range(0, 18): ind = range(0, nind) X_train,X_test,Y_train,Y_test,ind_train,ind_test = \ cross_validation.train_test_split(Xn,dist,ind,test_size=0.2,random_state=k) ind_test = np.where(huc2 == i + 1)[0] X_test = Xn[ind_test, :] Y_test = dist[ind_test, :] for j in range(0, nmodel): regTreeModel = tree.DecisionTreeRegressor(max_leaf_nodes=20) fitModel = linear_model.LinearRegression() predSel = predListTest[j] predName = [Field[jj] for jj in predSel] Yp,Yptrain,regTreeModel,fitModelList,predind=\ SSRS.RegressionTree(X_train[:,predSel],X_test[:,predSel],Y_train,Y_test,regTreeModel,fitModel,predName, doFitSelection=0,doMultiBand=1) rmse, rmse_band = SSRS.RMSECal(Yptrain, Y_train) trainErr1_huc2_rt[j, i] = rmse rmse, rmse_band = SSRS.RMSECal(Yp, Y_test) testErr1_huc2_rt[j, i] = rmse # test 3: leave out 1 HUC2 one time testErr1_huc2 = np.ones([nmodel, 18]) trainErr1_huc2 = np.ones([nmodel, 18]) IDhucfile = r"E:\work\SSRS\data\IDhuc_mb_4949.mat" mat = sio.loadmat(IDhucfile) IDhuc = mat["IDhuc"] huc2 = IDhuc[indvalid, 1] for i in range(0, 18): ind_test = np.where(huc2 == i + 1)[0] ind_train = np.where(huc2 != i + 1)[0] X_train = Xn[ind_train, :] X_test = Xn[ind_test, :] Y_train = dist[ind_train, :] Y_test = dist[ind_test, :] for j in range(0, nmodel): regTreeModel = tree.DecisionTreeRegressor(max_leaf_nodes=20) fitModel = linear_model.LinearRegression() predSel = predListTest[j] predName = [Field[jj] for jj in predSel] Yp,Yptrain,regTreeModel,fitModelList,predind=\ SSRS.RegressionTree(X_train[:,predSel],X_test[:,predSel],Y_train,Y_test,regTreeModel,fitModel,predName, doFitSelection=0,doMultiBand=1) rmse, rmse_band = SSRS.RMSECal(Yptrain, Y_train) trainErr1_huc2[j, i] = rmse rmse, rmse_band = SSRS.RMSECal(Yp, Y_test) testErr1_huc2[j, i] = rmse # test 4: leave out 2 HUC2 one time testErr2_huc2 = np.ones([nmodel, 18 * 17]) trainErr2_huc2 = np.ones([nmodel, 18 * 17]) hucTab = np.ones([18 * 17, 2]) IDhucfile = r"E:\work\SSRS\data\IDhuc_mb_4949.mat" mat = sio.loadmat(IDhucfile) IDhuc = mat["IDhuc"] huc2 = IDhuc[indvalid, 1] n = -1 for i in range(0, 18): print(i) for j in range(0, 18): if i == j: continue n = n + 1 hucTab[n, 0] = i hucTab[n, 1] = j ind_test = np.where((huc2 == i + 1) | (huc2 == j + 1))[0] ind_train = np.where((huc2 != i + 1) & (huc2 != j + 1))[0] X_train = Xn[ind_train, :] X_test = Xn[ind_test, :] Y_train = dist[ind_train, :] Y_test = dist[ind_test, :] for k in range(0, nmodel): regTreeModel = tree.DecisionTreeRegressor(max_leaf_nodes=20) fitModel = linear_model.LinearRegression() predSel = predListTest[k] predName = [Field[jj] for jj in predSel] Yp,Yptrain,regTreeModel,fitModelList,predind=\ SSRS.RegressionTree(X_train[:,predSel],X_test[:,predSel],Y_train,Y_test,regTreeModel,fitModel,predName, doFitSelection=0,doMultiBand=1) rmse, rmse_band = SSRS.RMSECal(Yptrain, Y_train) trainErr2_huc2[k, n] = rmse rmse, rmse_band = SSRS.RMSECal(Yp, Y_test) testErr2_huc2[k, n] = rmse return testErr,trainErr1_huc2,testErr1_huc2,\ trainErr1_huc2_rt,testErr1_huc2_rt,\ trainErr2_huc2,testErr2_huc2,hucTab
"Y_test": Y_test }) ### regModel = linear_model.LinearRegression() #regModel=svm.SVC() regModel = KNeighborsRegressor(n_neighbors=10) regModel = tree.DecisionTreeRegressor() regModel = GaussianNB() regModel=tree.DecisionTreeRegressor\ (max_features=0.8,max_depth=10,min_samples_split=3,min_samples_leaf=5, min_weight_fraction_leaf=0.5,max_leaf_nodes=10) Yp,Yptrain,regModelList=SSRS.Regression\ (X_train,X_test,Y_train,Y_test,multiband=1,regModel=regModel,doplot=2) rmse, rmse_band = SSRS.RMSECal(Yp, Y_test) print(rmse) ## local regression tree regTreeModel = tree.DecisionTreeRegressor(max_leaf_nodes=20) fitModel = linear_model.LinearRegression() Yp,Yptrain,regTreeModel,fitModelList,predind=\ SSRS.RegressionTree(X_train,X_test,Y_train,Y_test,regTreeModel,fitModel,Field,doMultiBand=0) rmse, rmse_band = SSRS.RMSECal(Yptrain, Y_train) print(rmse) rmse, rmse_band = SSRS.RMSECal(Yp, Y_test) print(rmse) SSRS.Regression_plot(Yptrain, Y_train, doplot=2) SSRS.Regression_plot(Yp, Y_test, doplot=2)