Exemple #1
0
def metrics(yp, yt, keyLst, var_s, output_path="./"):
    statErr = statError(yp, yt)
    statDictLst = [statErr]
    dataBox = list()
    for iS in range(len(keyLst)):
        statStr = keyLst[iS]
        temp = list()
        for k in range(len(statDictLst)):
            data = statDictLst[k][statStr]
            data = data[~np.isnan(data)]
            temp.append(data)
        dataBox.append(temp)
    median_dict = plotBoxFigMulti(dataBox, label1=keyLst, sharey=False, figsize=(12, 5), path_fig=output_path)

    return median_dict
Exemple #2
0
            temp = 1
        else:
            if temp != 0:
                temp = temp + 1
ind = np.random.randint(0, ngrid)
print(np.array([maskObs[ind, :], maskDay[ind, :]]))
maskObsDay = maskObs * maskDay
unique, counts = np.unique(maskDay, return_counts=True)
print(np.asarray((unique, counts)).T)
print(counts / ngrid / nt)

fLst = [1, 2, 3]
statLstF = list()
statLstP = list()
maskF = (maskDay >= 1) & (maskDay <= 3)
statP = stat.statError(utils.fillNan(yp, maskF), utils.fillNan(obs, maskF))
statF = stat.statError(utils.fillNan(yf, maskF), utils.fillNan(obs, maskF))

# plot map and time series
import importlib
importlib.reload(plot)
dataGrid = [statP['RMSE'] - statF['RMSE'], statP['Corr'] - statF['Corr']]
prcp = df.getDataTs('APCP_FORA').squeeze()
dataTs = [obs, yp, yf]
crd = df.getGeo()
t = df.getT()
mapNameLst = ['dRMSE', 'dR']
tsNameLst = ['obs', 'prj', 'fore']
plot.plotTsMap(
    dataGrid,
    dataTs,
Exemple #3
0
# # test error train on different year
trLst = [[20150402, 20160401], [20160401, 20170401], [20170401, 20180401]]
statPLst = list()
statFLst = list()
for k in range(3):
    trTrain = trLst[k]
    taTrain = utils.time.tRange2Array(trTrain)
    taAll = utils.time.tRange2Array([20150402, 20180401])
    indTrain, ind2 = utils.time.intersect(taAll, taTrain)
    indTest = np.delete(np.arange(len(taAll)), indTrain)
    tempYp = ypLst[k][:, indTest]
    tempYf = yfLst[k][:, indTest]
    tempMask = maskF[:, indTest]
    tempObs = obs[:, indTest]
    tempStatP = stat.statError(utils.fillNan(tempYp, tempMask),
                               utils.fillNan(tempObs, tempMask))
    tempStatF = stat.statError(utils.fillNan(tempYf, tempMask),
                               utils.fillNan(tempObs, tempMask))
    statPLst.append(tempStatP)
    statFLst.append(tempStatF)

# plot map and time series
import importlib
importlib.reload(plot)
dataGrid = [
    statPLst[0]['RMSE'] - statFLst[0]['RMSE'],
    statPLst[1]['RMSE'] - statFLst[1]['RMSE'],
    statPLst[2]['RMSE'] - statFLst[2]['RMSE']
]
prcp = df.getDataTs('APCP_FORA').squeeze()
dataTs = [[obs, ypLst[0], yfLst[0]], [obs, ypLst[1], yfLst[1]],
Exemple #4
0
    tRange = [20160501, 20170501]
    predLst = list()
    outLSTM = os.path.join(pathSMAP['Out_L3_NA'], 'DA', 'CONUSv2f1')
    df, pred, obs = master.test(
        outLSTM, tRange=tRange, subset=subset, batchSize=100)
    predLst.append(pred.squeeze())
    for out in outLst:
        df, pred, obs = master.test(
            out, tRange=tRange, subset=subset, batchSize=100)
        predLst.append(pred.squeeze())
    obs = obs.squeeze()

# plot box - latency
# if 'post' in doLst:
caseLst = ['Predict'] + ['Nowcast ' + str(nd) + 'd latency' for nd in dLst]
statLst1 = [stat.statError(x, obs) for x in predLst]
keyLst = list(statLst1[0].keys())
dataBox = list()
for iS in range(len(keyLst)):
    statStr = keyLst[iS]
    temp = list()
    for k in range(len(statLst1)):
        data = statLst1[k][statStr]
        data = data[~np.isnan(data)]
        temp.append(data)
    dataBox.append(temp)
fig = plot.plotBoxFig(dataBox, keyLst, caseLst, sharey=False)
fig.show()
fig.savefig(os.path.join(saveDir, 'box_latency'))

# figure out how many days observation lead
Exemple #5
0
    caseLst.append('All-90-95-DA' + str(nDay))
outLst = [os.path.join(pathCamels['Out'], save_path, x) for x in caseLst]
subset = 'All'
tRange = [19950101, 20000101]
predLst = list()
for out in outLst:
    df, pred, obs = master.test(out,
                                tRange=tRange,
                                subset=subset,
                                basinnorm=True,
                                epoch=200)
    # pred=np.maximum(pred,0)
    predLst.append(pred)

# plot box
statDictLst = [stat.statError(x.squeeze(), obs.squeeze()) for x in predLst]
# keyLst = list(statDictLst[0].keys())
keyLst = ['Bias', 'RMSE', 'NSE']
dataBox = list()
for iS in range(len(keyLst)):
    statStr = keyLst[iS]
    temp = list()
    for k in range(len(statDictLst)):
        data = statDictLst[k][statStr]
        data = data[~np.isnan(data)]
        temp.append(data)
    dataBox.append(temp)
# plt.style.use('classic')
plt.rcParams['font.size'] = 14
plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams["legend.columnspacing"] = 0.1
Exemple #6
0
ypLst = list()
modelName = 'LSTM'
model = train.loadModel(outFolder, nEpoch, modelName=modelName)
yp = train.testModel(model, x, batchSize=100).squeeze()
ypLst.append(
    dbCsv.transNorm(yp, rootDB=rootDB, fieldName='SMAP_AM', fromRaw=False))
modelName = 'LSTM-DA'
model = train.loadModel(outFolder, nEpoch, modelName=modelName)
yp = train.testModel(model, x, z=y, batchSize=100).squeeze()
ypLst.append(
    dbCsv.transNorm(yp, rootDB=rootDB, fieldName='SMAP_AM', fromRaw=False))

##

statErr1 = stat.statError(ypLst[0], yt)
statErr2 = stat.statError(ypLst[1], yt)
dataGrid = [statErr2['RMSE'], statErr2['RMSE'] - statErr1['RMSE']]
dataTs = [ypLst[0], ypLst[1], yt]
t = df.getT()
crd = df.getGeo()
mapNameLst = ['DA', 'DA-LSTM']
tsNameLst = ['LSTM', 'DA', 'SMAP']
colorMap = None
colorTs = None

plot.plotTsMap(
    dataGrid,
    dataTs,
    crd,
    t,
Exemple #7
0
    yp1 = train.testModel(model1, x2, c2)
    yp1 = camels.transNorm(yp1, 'usgsFlow', toNorm=False).squeeze()

    model2 = train.loadModel(outFolder, nEpoch, modelName='DA-1')
    yp2 = train.testModel(model2, xz1, c2)
    yp2 = camels.transNorm(yp2, 'usgsFlow', toNorm=False).squeeze()

    model3 = train.loadModel(outFolder, nEpoch, modelName='DA-7')
    yp3 = train.testModel(model3, xz2, c2)
    yp3 = camels.transNorm(yp3, 'usgsFlow', toNorm=False).squeeze()

    yLst = [yt2, yp1, yp2, yp3]

# plot box
statDictLst = [
    stat.statError(yp1, yt2),
    stat.statError(yp2, yt2),
    stat.statError(yp3, yt2)
]
keyLst = list(statDictLst[0].keys())

dataBox = list()
for iS in range(len(keyLst)):
    statStr = keyLst[iS]
    temp = list()
    for k in range(len(statDictLst)):
        data = statDictLst[k][statStr]
        data = data[~np.isnan(data)]
        temp.append(data)
    dataBox.append(temp)
fig = plot.plotBoxFig(dataBox, keyLst, ['LSTM', 'DA-1', 'DA-7'], sharey=False)
         predLst.append(pred) # the prediction list for all the models
         obsLst.append(obs)
         np.save(os.path.join(out, 'pred.npy'), pred)
         np.save(os.path.join(out, 'obs.npy'), obs)
         f = np.load(os.path.join(out, 'x.npy'))  # it has been saved previously in the out directory (forcings)
         T = (f[:, :, 3] + f[:, :, 4]) / 2    # mean air T for T_residual
         T_air = np.expand_dims(T, axis=2)
         pred_res = pred - T_air
         obs_res = obs - T_air
         predLst_res.append(pred_res)
         obsLst_res.append(obs_res)
     # calculate statistic metrics
        # statDict = stat.statError(pred.squeeze(), obs.squeeze())
       #  statDictLst.append([statDict])
 #    statDictLst1 = [stat.statError(x.squeeze(), obs.squeeze()) for x, y in predLst]
     statDictLst = [stat.statError(x.squeeze(), y.squeeze()) for (x, y) in zip(predLst, obsLst)]
     statDictLst_res = [stat.statError_res(x.squeeze(), y.squeeze(), z.squeeze(), w.squeeze()) for (x, y, z, w) in
                    zip(predLst, obsLst, predLst_res, obsLst_res)]
     ### save this file too
     # median and STD calculation
     count = 0
     mdstd = np.zeros([len(statDictLst_res[0]),3])
     for i in statDictLst_res[0].values():
         median = np.nanmedian((i))    # abs(i)
         STD = np.nanstd((i))        # abs(i)
         mean = np.nanmean((i))      #abs(i)
         k = np.array([[median,STD, mean]])
         mdstd[count] = k
         count = count +1
     mdstd = pd.DataFrame(mdstd, index=statDictLst_res[0].keys(), columns=['median', 'STD','mean'])
     if retrained==True:
latC, lonC = dfC.getGeo()

# case = '090303'
for case in caseLst:
    testName = subsetPattern.format(case, 3)
    errLst = list()
    for k in levLst:
        if k in [0, 1]:
            # if k in [-1]:
            subset = 'ecoReg_{}_L{}_v2f1'.format(case, k)
        else:
            subset = subsetPattern.format(case, k)
        outName = subset + '_Forcing'
        out = os.path.join(pathSMAP['Out_L3_NA'], 'ecoRegionCase', outName)
        df, yp, yt = master.test(out, tRange=tRange, subset=testName)
        errLst.append(stat.statError(yp[:, :, 0], yt[:, :, 0]))

    # plot box
    cLst = 'ygbr'
    keyLst = ['RMSE', 'Corr']
    dataBox = list()
    for key in keyLst:
        temp = list()
        for err in errLst:
            temp.append(err[key])
        dataBox.append(temp)
    fig = plot.plotBoxFig(dataBox,
                          '  ',
                          figsize=(8, 6),
                          colorLst=cLst,
                          sharey=False)
optTrain = default.update(default.optTrainSMAP, nEpoch=100)
out = os.path.join(cDir, 'output', 'CONUSv4f1_multi')
masterDict = wrapMaster(out, optData, optModel, optLoss, optTrain)

# train
# train(masterDict)
# runTrain(masterDict, cudaID=2, screen='LSTM-multi')

# test
df, yp, yt, sigma = test(out, tRange=[20160401, 20170401], subset='CONUSv4f1')

# plot ts MAP
dataGrid = list()
dataTs = list()
for k in range(2):
    statErr = stat.statError(yp[:, :, k], yt[:, :, k])
    dataGrid.append(statErr['RMSE'])
    dataTs.append([yp[:, :, k], yt[:, :, k]])
t = df.getT()
crd = df.getGeo()
mapNameLst = ['RMSE ', 'RMSE']
tsNameLst = ['LSTM', 'SMAP']
plot.plotTsMap(dataGrid,
               dataTs,
               lat=crd[0],
               lon=crd[1],
               t=t,
               mapNameLst=mapNameLst,
               tsNameLst=tsNameLst,
               multiTS=True,
               isGrid=True)
Exemple #11
0
daylen = xt.shape[1]
Pred = np.full(yt.shape, np.nan)
for ii in range(ngage):
    xdata = x[ii, :, :]
    ydata = y[ii, :, :]
    regmodel = LinearRegression().fit(xdata, ydata)
    xtest = xt[ii, :, :]
    ypred = regmodel.predict(xtest)
    Pred[ii, :, 0] = ypred.squeeze()
pred = camels.transNorm(Pred, 'usgsFlow', toNorm=False)
obs = camels.transNorm(yt, 'usgsFlow', toNorm=False)
gageid = 'All'
pred = camels.basinNorm(pred, gageid=gageid, toNorm=False)
obs = camels.basinNorm(obs, gageid=gageid, toNorm=False)
# plot box
statDictLst = [stat.statError(pred.squeeze(), obs.squeeze())]
keyLst = ['Bias', 'RMSE', 'NSE']
dataBox = list()
for iS in range(len(keyLst)):
    statStr = keyLst[iS]
    temp = list()
    for k in range(len(statDictLst)):
        data = statDictLst[k][statStr]
        data = data[~np.isnan(data)]
        temp.append(data)
    dataBox.append(temp)
# plt.style.use('classic')
plt.rcParams['font.size'] = 14
plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams["legend.columnspacing"] = 0.1
plt.rcParams["legend.handletextpad"] = 0.2
Exemple #12
0
                             batchSize=100)
df2, yf2, obs2 = master.test(out,
                             tRange=[20160401, 20180401],
                             subset=subset,
                             batchSize=100)
out = os.path.join(pathSMAP['Out_L3_NA'], 'DA', 'CONUSv2f1_LSTM2015')
df1, yp1, obs1 = master.test(out,
                             tRange=[20150402, 20180401],
                             subset=subset,
                             batchSize=100)
df2, yp2, obs2 = master.test(out,
                             tRange=[20160401, 20180401],
                             subset=subset,
                             batchSize=100)

statF = stat.statError(yf2.squeeze(), obs2.squeeze())
statP = stat.statError(yp2.squeeze(), obs2.squeeze())
t = df1.getT()
lat, lon = df1.getGeo()
dataTS = [obs1.squeeze(), yp1.squeeze(), yf1.squeeze()]
tBar = np.datetime64('2016-04-01')


def funcMap():
    gridF, uy, ux = utils.grid.array2grid(statF['RMSE'], lat=lat, lon=lon)
    gridP, uy, ux = utils.grid.array2grid(statP['RMSE'], lat=lat, lon=lon)
    figM, axM = plt.subplots(1, 2, figsize=(10, 4))
    axplot.mapGrid(axM[0], uy, ux, gridF, vRange=[0, 0.1], cmap=plt.cm.jet)
    axM[0].set_title('Temporal Test RMSE of LSTM-DI')
    axplot.mapGrid(axM[1], uy, ux, gridP, vRange=[0, 0.1], cmap=plt.cm.jet)
    axM[1].set_title('Temporal Test RMSE of LSTM')
Exemple #13
0
maskObs = 1 * ~np.isnan(obs.squeeze())
maskDay = np.zeros(maskObs.shape).astype(int)
ngrid, nt = maskObs.shape
for j in range(ngrid):
    temp = 0
    for i in range(nt):
        maskDay[j, i] = temp
        if maskObs[j, i] == 1:
            temp = 1
        else:
            if temp != 0:
                temp = temp + 1
ind = np.random.randint(0, ngrid)
maskObsDay = maskObs * maskDay
maskF = (maskDay >= 1) & (maskDay <= 3)
statP = stat.statError(yp, obs)
statLst = [
    stat.statError(utils.fillNan(x, maskF), utils.fillNan(obs, maskF))
    for x in yfLst
]

# if 'post' in doLst:
caseLst = ['Predict'] + [str(nd) + 'd latency' for nd in dLst]
keyLst = list(statLst[0].keys())
dataBox = list()
for iS in range(len(keyLst)):
    key = keyLst[iS]
    temp = list()
    temp.append(statP[key])
    print(key, np.nanmedian(statP[key]))
    for k in range(len(statLst)):
    temp = 0
    for i in range(nt):
        maskDay[j, i] = temp
        if maskObs[j, i] == 1:
            temp = 1
        else:
            if temp != 0:
                temp = temp + 1
ind = np.random.randint(0, ngrid)
maskObsDay = maskObs * maskDay
unique, counts = np.unique(maskObsDay, return_counts=True)
maskF = (maskDay >= 1) & (maskDay <= 3)
statPLst = list()
statFLst = list()
for k in range(3):
    statP = stat.statError(utils.fillNan(ypLst[k], maskF),
                           utils.fillNan(obs, maskF))
    statF = stat.statError(utils.fillNan(yfLst[k], maskF),
                           utils.fillNan(obs, maskF))
    statPLst.append(statP)
    statFLst.append(statF)

cropFile = r'/mnt/sdb/Data/Crop/cropRate_CONUSv2f1.csv'
cropRate = pd.read_csv(cropFile, dtype=np.float, header=None).values
# croprate - 0 corn, 4 soybean, 22 spring wheat, 23 winter wheat
dataGrid = [
    (statPLst[0]['RMSE'] - statFLst[0]['RMSE']) / statPLst[0]['RMSE'],
    (statPLst[1]['RMSE'] - statFLst[1]['RMSE']) / statPLst[1]['RMSE'],
    (statPLst[2]['RMSE'] - statFLst[2]['RMSE']) / statPLst[2]['RMSE'],
]
prcp = df.getDataTs('APCP_FORA').squeeze()
dataTs = [[obs, ypLst[0], yfLst[0]], [obs, ypLst[1], yfLst[1]],
Exemple #15
0
                                             subset='CONUSv4f1',
                                             tRange=[sd, ed])
        obs = df.getData(varT='SMAP_AM', doNorm=True, rmNan=False)
        modelName = 'LSTM-DA-' + str(k)
        model = train.loadModel(outFolder, nEpoch, modelName=modelName)
        yP = train.testModel(model, (x, obs), batchSize=100).squeeze()
        ypLst.append(
            dbCsv.transNorm(yP,
                            rootDB=rootDB,
                            fieldName='SMAP_AM',
                            fromRaw=False))

if 'post' in doLst:
    statDictLst = list()
    for k in range(0, len(ypLst)):
        statDictLst.append(stat.statError(ypLst[k], yT))
    keyLst = ['RMSE', 'ubRMSE', 'Bias', 'Corr']
    caseLst = ['LSTM']
    for k in dLst:
        caseLst.append('DA-' + str(k))

    # plot box
    dataBox = list()
    cmap = plt.cm.jet
    cLst = cmap(np.linspace(0, 1, len(caseLst)))
    for iS in range(len(keyLst)):
        statStr = keyLst[iS]
        temp = list()
        for k in range(len(statDictLst)):
            temp.append(statDictLst[k][statStr])
        dataBox.append(temp)
Exemple #16
0
        maskDay[j, i] = temp
        if maskObs[j, i] == 1:
            temp = 1
        else:
            if temp != 0:
                temp = temp + 1
ind = np.random.randint(0, ngrid)
maskObsDay = maskObs * maskDay
maskF = (maskDay >= 1) & (maskDay <= 3)

# figure out train and test time index
tR0 = [20150402, 20180401]
tA0 = utils.time.tRange2Array(tR0)
nt = len(tA0)
tTrainLst = list()
tTestLst = list()
for k in range(len(yrLst)):
    tR = tRangeLst[k]
    tA = utils.time.tRange2Array(tR)
    ind0 = np.array(range(nt))
    ind1, ind2 = utils.time.intersect(tA0, tA)
    tTestLst.append(np.delete(ind0, ind1))
    tTrainLst.append(ind1)

# calculate stat
for k in range(len(yrLst)):
    yfTemp = utils.fillNan(yfLst[k], maskF)
    yfTemp = yfTemp[:, tTestLst[k]]
    statP = stat.statError(yfTemp, utils.fillNan(obs, maskF))
    statF = stat.statError(yfTemp, utils.fillNan(obs, maskF))
Exemple #17
0
    if iEns == 0:
        predLst = predtempLst
    else:
        for ii in range(len(outLst)):
            predLst[ii] = np.concatenate([predLst[ii], predtempLst[ii]],
                                         axis=2)
    # predLst: List of all experiments with shape: Ntime*Nbasin*Nensemble

# get the ensemble mean from simulations of different seeds
ensLst = []
for ii in range(len(outLst)):
    temp = np.nanmean(predLst[ii], axis=2, keepdims=True)
    ensLst.append(temp)

# plot boxplots for different experiments
statDictLst = [stat.statError(x.squeeze(), obsAll.squeeze()) for x in ensLst]
keyLst = ["NSE", "KGE"]  # which metric to show
dataBox = list()
for iS in range(len(keyLst)):
    statStr = keyLst[iS]
    temp = list()
    for k in range(len(statDictLst)):
        data = statDictLst[k][statStr]
        data = data[~np.isnan(data)]
        temp.append(data)
    dataBox.append(temp)

plt.rcParams["font.size"] = 14
labelname = ["PUR", "PUR-FDC", "PUR-1/3FDC"]
xlabel = ["NSE", "KGE"]
fig = plot.plotBoxFig(dataBox, xlabel, labelname, sharey=False, figsize=(6, 5))
Exemple #18
0
    for i in range(nt):
        maskDay[j, i] = temp
        if maskObs[j, i] == 1:
            temp = 1
        else:
            if temp != 0:
                temp = temp + 1
maskObsDay = maskObs * maskDay

fLst = [1, 2, 3]
statLst = list()
for nf in fLst:
    maskF = maskDay == nf
    temp = list()
    for yf in yfLst:
        statErr = stat.statError(utils.fillNan(yf, maskF),
                                 utils.fillNan(obs, maskF))
        temp.append(statErr)
    statLst.append(temp)

# load result from RK
dirRK = r'D:\\data\\Koster17\\'
fileNameLst = ['rmse_lead_{}.dat'.format(x) for x in [1, 2, 3]]
tempLst = list()
for k in range(3):
    # lon lat are identical. Tested
    temp = np.loadtxt(os.path.join(dirRK, fileNameLst[k]))
    tempLst.append(temp[:, 2])
RKlon = temp[:, 0]
RKlat = temp[:, 1]
lat, lon = df.getGeo()
errLst = list()
            temp = 1
        else:
            if temp != 0:
                temp = temp + 1
ind = np.random.randint(0, ngrid)
print(np.array([maskObs[ind, :], maskDay[ind, :]]))
maskObsDay = maskObs * maskDay
unique, counts = np.unique(maskDay, return_counts=True)
print(np.asarray((unique, counts)).T)
print(counts / ngrid / nt)

fLst = [1, 2, 3]
statLstF = list()
statLstP = list()
maskF = (maskDay >= 1) & (maskDay <= 3)
statP = stat.statError(utils.fillNan(yp, maskF), utils.fillNan(obs, maskF))
statF = stat.statError(utils.fillNan(yf, maskF), utils.fillNan(obs, maskF))
for nf in fLst:
    xp = np.full([ngrid, nt], np.nan)
    xf = np.full([ngrid, nt], np.nan)
    y = np.full([ngrid, nt], np.nan)
    xf[maskObsDay == nf] = yf[maskObsDay == nf]
    xp[maskObsDay == nf] = yp[maskObsDay == nf]
    y[maskObsDay == nf] = obs[maskObsDay == nf]
    statLstF.append(stat.statError(xf, y))
    statLstP.append(stat.statError(xp, y))

# plot box - forecast
matplotlib.rcParams.update({'font.size': 11})
matplotlib.rcParams.update({'lines.linewidth': 2})
matplotlib.rcParams.update({'lines.markersize': 12})
Exemple #20
0
maskDay = np.zeros(maskObs.shape).astype(int)
ngrid, nt = maskObs.shape
for j in range(ngrid):
    temp = 0
    for i in range(nt):
        maskDay[j, i] = temp
        if maskObs[j, i] == 1:
            temp = 1
        else:
            if temp != 0:
                temp = temp + 1
ind = np.random.randint(0, ngrid)
maskObsDay = maskObs * maskDay
unique, counts = np.unique(maskObsDay, return_counts=True)
maskF = (maskDay >= 1) & (maskDay <= 3)
statP = stat.statError(utils.fillNan(yp, maskF), utils.fillNan(obs, maskF))
statF = stat.statError(utils.fillNan(yf, maskF), utils.fillNan(obs, maskF))

maskObsDay = maskObs * maskDay
print(np.array([maskObs[ind, :], maskDay[ind, :]]))
print(np.asarray((unique, counts)).T)
print(counts / ngrid / nt)

# see result for different seasons
tRangeLst = [[20160401, 20160701], [20160701, 20161001], [20161001, 20170101],
             [20170101, 20170401], [20170401, 20170701], [20170701, 20171001],
             [20171001, 20180101], [20180101, 20180401]]

tAllA = utils.time.tRange2Array(tAllR)
statPLst = list()
statFLst = list()
Exemple #21
0
# %%  load data and stat
kcLst = [7, 8, 13]
tRange = [20160401, 20180401]
statLst = list()
statRefLst = list()
for kc in kcLst:
    tempLst = list()
    for k in range(1, 18):
        testName = subsetLst[kc - 1]
        if k != kc:
            outName = 'ecoRegion{:02d}{:02d}_v2f1_Forcing'.format(kc, k)
        else:
            outName = 'ecoRegion{:02d}_v2f1_Forcing'.format(kc)
        out = os.path.join(pathSMAP['Out_L3_NA'], 'ecoRegion', outName)
        df, yp, yt = master.test(out, tRange=tRange, subset=testName)
        temp = stat.statError(yp[:, :, 0], yt[:, :, 0])
        tempLst.append(temp)
        if k == kc:
            statRefLst.append(temp)
    statLst.append(tempLst)

# %% plot box
keyLst = stat.keyLst
ecoLst = ['{:02d}'.format(x) for x in range(1, 18)]
caseLst = ['{:02d}'.format(x) for x in [7, 8, 13]]

for k in range(len(caseLst)):
    dataBox = list()
    key = 'RMSE'
    for ii in range(len(ecoLst)):
        temp = list()
        np.save(os.path.join(out, 'obs.npy'), obs)
        f = np.load(
            os.path.join(out, 'x.npy')
        )  # it has been saved previously in the out directory (forcings)
        T = (f[:, :, 3] + f[:, :, 4]) / 2  # mean air T for T_residual
        T_air = np.expand_dims(T, axis=2)
        pred_res = pred - T_air
        obs_res = obs - T_air
        predLst_res.append(pred_res)
        obsLst_res.append(obs_res)
    # calculate statistic metrics
    # statDict = stat.statError(pred.squeeze(), obs.squeeze())
    #  statDictLst.append([statDict])
#    statDictLst1 = [stat.statError(x.squeeze(), obs.squeeze()) for x, y in predLst]
    statDictLst = [
        stat.statError(x.squeeze(), y.squeeze())
        for (x, y) in zip(predLst, obsLst)
    ]
    statDictLst_res = [
        stat.statError_res(x.squeeze(), y.squeeze(), z.squeeze(), w.squeeze())
        for (x, y, z, w) in zip(predLst, obsLst, predLst_res, obsLst_res)
    ]

    # median and STD calculation
    count = 0
    mdstd = np.zeros([len(statDictLst_res[0]), 3])
    for i in statDictLst_res[0].values():
        median = np.nanmedian((i))  # abs(i)
        STD = np.nanstd((i))  # abs(i)
        mean = np.nanmean((i))  #abs(i)
        k = np.array([[median, STD, mean]])
Exemple #23
0
cDir = os.path.dirname(os.path.abspath(__file__))

out = os.path.join(cDir, 'output', 'CONUSv4f1')
rootDB = os.path.join(cDir, 'data')
nEpoch = 100
tRange = [20160401, 20170401]

# load data
df, yp, yt = master.test(
    out, tRange=[20160401, 20170401], subset='CONUSv4f1', epoch=100, reTest=True)
yp = yp.squeeze()
yt = yt.squeeze()

# calculate stat
statErr = stat.statError(yp, yt)
dataGrid = [statErr['RMSE'], statErr['Corr']]
dataTs = [yp, yt]
t = df.getT()
crd = df.getGeo()
mapNameLst = ['RMSE', 'Correlation']
tsNameLst = ['LSTM', 'SMAP']

# plot map and time series
plot.plotTsMap(
    dataGrid,
    dataTs,
    lat=crd[0],
    lon=crd[1],
    t=t,
    mapNameLst=mapNameLst,
Exemple #24
0
dfC = dbCsv.DataframeCsv(rootDB=rootDB, subset='CONUSv2f1', tRange=tRange)
latC, lonC = dfC.getGeo()

errLstAll = list()
for case in caseLst:
    testName = subsetPattern.format(case, 3)
    errLst = list()
    for k in levLst:
        if k in [0, 1]:
            subset = 'ecoReg_{}_L{}_v2f1'.format(case, k)
        else:
            subset = subsetPattern.format(case, k)
        outName = subset + '_Forcing'
        out = os.path.join(pathSMAP['Out_L3_NA'], 'ecoRegionCase', outName)
        df, yp, yt = master.test(out, tRange=tRange, subset=testName)
        err = stat.statError(yp[:, :, 0], yt[:, :, 0])
        errLst.append(err)
    errLstAll.append(errLst)

# plot box
cLst = 'ygbr'
keyLst = ['RMSE', 'Corr']
for key in keyLst:
    dataBox = list()
    for errLst in errLstAll:
        temp = list()
        for err in errLst:
            temp.append(err[key])
        dataBox.append(temp)
    fig = figplot.boxPlot(dataBox,
                          label1=caseLabLst,
Exemple #25
0
        model = train.loadModel(outFolder, nEpoch, modelName=modelName)
        yP = train.testModel(model, (x, obs), c, batchSize=100).squeeze()
        ypLstmLst.append(
            dbCsv.transNorm(yP,
                            rootDB=rootDB,
                            fieldName='SMAP_AM',
                            fromRaw=False))

if 'post' in doLst:
    # stat
    ypLst = [ypLstmLst, ypAnnLst]
    statDictLst = list()
    for i in range(0, len(ypLst)):
        tempLst = list()
        for j in range(0, len(ypLst[i])):
            tempLst.append(stat.statError(ypLst[i][j], yT))
        statDictLst.append(tempLst)
    keyLst = list(tempLst[0].keys())

    # plot box
    dataBox = list()
    caseLst1 = keyLst
    caseLst2 = ['LSTM', 'LSTM-DA']
    for iS in range(len(keyLst)):
        statStr = keyLst[iS]
        dataBox = list()
        for iS in range(len(keyLst)):
            statStr = keyLst[iS]
            temp = list()
            for k in range(len(statDictLst)):
                temp.append(statDictLst[k][statStr])
Exemple #26
0
dfz2 = camels.DataframeCsv(subset='all', tRange=[20141227, 20091227])
z2 = dfz2.getDataObs(doNorm=True, rmNan=False)

df2 = camels.DataframeCsv(subset='all', tRange=[20100101, 20150101])
x2 = df2.getDataTS(varLst=camels.forcingLst, doNorm=True, rmNan=True)
c2 = df2.getDataConst(varLst=camels.attrLstSel, doNorm=True, rmNan=True)
yt2 = df2.getDataObs(doNorm=False, rmNan=False).squeeze()

model = train.loadModel(outFolder, 100, modelName='test')
yp1 = train.testModel(model, x1, c1)
yp1 = camels.transNorm(yp1, 'usgsFlow', toNorm=False).squeeze()
yp2 = train.testModel(model, x2, c2)
yp2 = camels.transNorm(yp2, 'usgsFlow', toNorm=False).squeeze()

statErr1 = stat.statError(yp1, yt2)
statErr2 = stat.statError(yp2, yt2)
dataMap = [statErr2['Corr'], statErr1['Corr'] - statErr2['Corr']]
dataTs = [yt2, yp2]
t = df2.getT()
crd = df2.getGeo()
mapNameLst = ['Test Corr', 'Train Corr - Test Corr']
tsNameLst = ['USGS', 'LSTM']
colorMap = None
colorTs = None

import imp
imp.reload(plot)
plot.plotTsMap(dataMap,
               dataTs,
               lat=crd[:, 0],