Example #1
0
def regtree(par, *data):
    print(par)
    X_train, X_test, Y_train, Y_test = data
    regTreeModel=tree.DecisionTreeRegressor\
        (max_features=par[0],min_samples_split=par[1],min_samples_leaf=par[2],
         min_weight_fraction_leaf=par[3],max_leaf_nodes=int(par[4]))
    fitModel = linear_model.LinearRegression()
    Yp,Yptrain,regTreeModel,fitModelList,predind=\
        SSRS.RegressionTree(X_train,X_test,Y_train,Y_test,regTreeModel,fitModel,Field,
                            doFitSelection=0,doMultiBand=1)
    rmse, rmse_band = SSRS.RMSECal(Yp, Y_test)
    print("RMSE=%f" % rmse)
    return rmse
Example #2
0
def testModel(predListTest):
    nmodel = predListTest.__len__()
    nit = 50
    # test 1: different size of training and test
    testErr = np.ones([nmodel, 5, 50])
    testsize = [0.2, 0.4, 0.5, 0.6, 0.7]
    for k in range(0, nit):
        print(k)
        for j in range(0, nmodel):
            for i in range(0, 5):
                ind = range(0, nind)
                X_train,X_test,Y_train,Y_test,ind_train,ind_test = \
                    cross_validation.train_test_split(Xn,dist,ind,test_size=testsize[i],random_state=k)
                regTreeModel = tree.DecisionTreeRegressor(max_leaf_nodes=20,
                                                          min_samples_leaf=20)
                fitModel = linear_model.LinearRegression()
                predSel = predListTest[j]
                predName = [Field[jj] for jj in predSel]
                Yp,Yptrain,regTreeModel,fitModelList,predind=\
                    SSRS.RegressionTree(X_train[:,predSel],X_test[:,predSel],Y_train,Y_test,regTreeModel,fitModel,predName,
                                        doFitSelection=0,doMultiBand=1)
                rmse, rmse_band = SSRS.RMSECal(Yp, Y_test)
                testErr[j, i, k] = rmse

    # test 2: use 1 HUC2 as test
    testErr1_huc2_rt = np.ones([nmodel, 18])
    trainErr1_huc2_rt = np.ones([nmodel, 18])
    IDhucfile = r"E:\work\SSRS\data\IDhuc_mb_4949.mat"
    mat = sio.loadmat(IDhucfile)
    IDhuc = mat["IDhuc"]
    huc2 = IDhuc[indvalid, 1]
    for k in range(0, nit):
        print(k)
        for i in range(0, 18):
            ind = range(0, nind)
            X_train,X_test,Y_train,Y_test,ind_train,ind_test = \
                cross_validation.train_test_split(Xn,dist,ind,test_size=0.2,random_state=k)
            ind_test = np.where(huc2 == i + 1)[0]
            X_test = Xn[ind_test, :]
            Y_test = dist[ind_test, :]
            for j in range(0, nmodel):
                regTreeModel = tree.DecisionTreeRegressor(max_leaf_nodes=20)
                fitModel = linear_model.LinearRegression()
                predSel = predListTest[j]
                predName = [Field[jj] for jj in predSel]
                Yp,Yptrain,regTreeModel,fitModelList,predind=\
                    SSRS.RegressionTree(X_train[:,predSel],X_test[:,predSel],Y_train,Y_test,regTreeModel,fitModel,predName,
                                        doFitSelection=0,doMultiBand=1)
                rmse, rmse_band = SSRS.RMSECal(Yptrain, Y_train)
                trainErr1_huc2_rt[j, i] = rmse
                rmse, rmse_band = SSRS.RMSECal(Yp, Y_test)
                testErr1_huc2_rt[j, i] = rmse

    # test 3: leave out 1 HUC2 one time
    testErr1_huc2 = np.ones([nmodel, 18])
    trainErr1_huc2 = np.ones([nmodel, 18])
    IDhucfile = r"E:\work\SSRS\data\IDhuc_mb_4949.mat"
    mat = sio.loadmat(IDhucfile)
    IDhuc = mat["IDhuc"]
    huc2 = IDhuc[indvalid, 1]
    for i in range(0, 18):
        ind_test = np.where(huc2 == i + 1)[0]
        ind_train = np.where(huc2 != i + 1)[0]
        X_train = Xn[ind_train, :]
        X_test = Xn[ind_test, :]
        Y_train = dist[ind_train, :]
        Y_test = dist[ind_test, :]
        for j in range(0, nmodel):
            regTreeModel = tree.DecisionTreeRegressor(max_leaf_nodes=20)
            fitModel = linear_model.LinearRegression()
            predSel = predListTest[j]
            predName = [Field[jj] for jj in predSel]
            Yp,Yptrain,regTreeModel,fitModelList,predind=\
                SSRS.RegressionTree(X_train[:,predSel],X_test[:,predSel],Y_train,Y_test,regTreeModel,fitModel,predName,
                                    doFitSelection=0,doMultiBand=1)
            rmse, rmse_band = SSRS.RMSECal(Yptrain, Y_train)
            trainErr1_huc2[j, i] = rmse
            rmse, rmse_band = SSRS.RMSECal(Yp, Y_test)
            testErr1_huc2[j, i] = rmse

    # test 4: leave out 2 HUC2 one time
    testErr2_huc2 = np.ones([nmodel, 18 * 17])
    trainErr2_huc2 = np.ones([nmodel, 18 * 17])
    hucTab = np.ones([18 * 17, 2])
    IDhucfile = r"E:\work\SSRS\data\IDhuc_mb_4949.mat"
    mat = sio.loadmat(IDhucfile)
    IDhuc = mat["IDhuc"]
    huc2 = IDhuc[indvalid, 1]
    n = -1
    for i in range(0, 18):
        print(i)
        for j in range(0, 18):
            if i == j:
                continue
            n = n + 1
            hucTab[n, 0] = i
            hucTab[n, 1] = j
            ind_test = np.where((huc2 == i + 1) | (huc2 == j + 1))[0]
            ind_train = np.where((huc2 != i + 1) & (huc2 != j + 1))[0]
            X_train = Xn[ind_train, :]
            X_test = Xn[ind_test, :]
            Y_train = dist[ind_train, :]
            Y_test = dist[ind_test, :]
            for k in range(0, nmodel):
                regTreeModel = tree.DecisionTreeRegressor(max_leaf_nodes=20)
                fitModel = linear_model.LinearRegression()
                predSel = predListTest[k]
                predName = [Field[jj] for jj in predSel]
                Yp,Yptrain,regTreeModel,fitModelList,predind=\
                    SSRS.RegressionTree(X_train[:,predSel],X_test[:,predSel],Y_train,Y_test,regTreeModel,fitModel,predName,
                                        doFitSelection=0,doMultiBand=1)
                rmse, rmse_band = SSRS.RMSECal(Yptrain, Y_train)
                trainErr2_huc2[k, n] = rmse
                rmse, rmse_band = SSRS.RMSECal(Yp, Y_test)
                testErr2_huc2[k, n] = rmse
    return testErr,trainErr1_huc2,testErr1_huc2,\
           trainErr1_huc2_rt,testErr1_huc2_rt,\
           trainErr2_huc2,testErr2_huc2,hucTab
Example #3
0
    "Y_test": Y_test
})

###
regModel = linear_model.LinearRegression()
#regModel=svm.SVC()
regModel = KNeighborsRegressor(n_neighbors=10)
regModel = tree.DecisionTreeRegressor()
regModel = GaussianNB()
regModel=tree.DecisionTreeRegressor\
    (max_features=0.8,max_depth=10,min_samples_split=3,min_samples_leaf=5,
     min_weight_fraction_leaf=0.5,max_leaf_nodes=10)

Yp,Yptrain,regModelList=SSRS.Regression\
    (X_train,X_test,Y_train,Y_test,multiband=1,regModel=regModel,doplot=2)
rmse, rmse_band = SSRS.RMSECal(Yp, Y_test)
print(rmse)

## local regression tree
regTreeModel = tree.DecisionTreeRegressor(max_leaf_nodes=20)
fitModel = linear_model.LinearRegression()
Yp,Yptrain,regTreeModel,fitModelList,predind=\
    SSRS.RegressionTree(X_train,X_test,Y_train,Y_test,regTreeModel,fitModel,Field,doMultiBand=0)
rmse, rmse_band = SSRS.RMSECal(Yptrain, Y_train)
print(rmse)
rmse, rmse_band = SSRS.RMSECal(Yp, Y_test)
print(rmse)

SSRS.Regression_plot(Yptrain, Y_train, doplot=2)
SSRS.Regression_plot(Yp, Y_test, doplot=2)