Esempio n. 1
0
            matP1[ind1, ic] = modelYC.predict(xT1)
            if len(ind2) > 0:
                xT2 = x2[ind2, :]
                matP1[ind2, ic] = modelYC.predict(xT2)
matO1 = wqData.transOut(matP1, statYC, varYC)
matO2 = wqData.transOut(matP2, statYC, varYC)

errMatL1 = wqData.errBySiteC(matO1, varYC, subset=trainset)
errMatL2 = wqData.errBySiteC(matO2, varYC, subset=testset)

# box
dataBox = list()
for k in range(nc):
    temp = [errMatL1[:, k, 1], errMatL2[:, k, 1]]
    dataBox.append(temp)
fig = figplot.boxPlot(dataBox)
fig.show()

# auto regression
x1 = dataTup1[0]
yc1 = dataTup1[3]
x2 = dataTup2[0]

siteNo = siteNoLst[0]
ind1 = infoTrain[infoTrain['siteNo'] == siteNo].index
ind2 = infoTest[infoTest['siteNo'] == siteNo].index
xT1 = x1[:, ind1, :]
ycT1 = yc1[ind1, :]
xT2 = x1[:, ind2, :]
for ic in range(nc):
    [xx, yy], iv = utils.rmNan([xT1, ycT1[:, ic]])
Esempio n. 2
0
    dataPlot = [yOut[:, k, :], d1.Y[:, k, :], d2.Y[:, k, :]]
    cLst = ['red', 'grey', 'black']
    fig, axes = figplot.multiTS(DF.t, dataPlot, labelLst=labelLst, cLst=cLst)
    fig.show()

# correlation matrix
mat1 = np.ndarray([len(siteNoLst), len(codeSel), 4])
mat2 = np.ndarray([len(siteNoLst), len(codeSel), 4])
for indS, siteNo in enumerate(siteNoLst):
    for indC, code in enumerate(codeSel):
        stat = utils.stat.calStat(yOut[:, indS, indC], d2.Y[:, indS, indC])
        mat1[indS, indC, 0] = stat['Bias']
        mat1[indS, indC, 1] = stat['RMSE']
        mat1[indS, indC, 2] = stat['NSE']
        mat1[indS, indC, 3] = stat['Corr']

statStrLst = ['Bias', 'RMSE', 'NSE', 'Corr']
dataPlot = list()
for k, statStr in enumerate(statStrLst):
    temp = list()
    for ic, code in enumerate(codeSel):
        temp.append(mat1[:, ic, k])
    dataPlot.append(temp)
fig = figplot.boxPlot(dataPlot,
                      widths=0.5,
                      figsize=(12, 4),
                      label1=statStrLst,
                      label2=codeSel,
                      sharey=False)
fig.show()
Esempio n. 3
0

# plot box
labLst1 = [usgs.codePdf.loc[code]['shortName'] +
           '\n'+code for code in codeLst]
# labLst2 = ['WRTDS train', 'WRTDS test', 'LSTM train', 'LSTM test']
labLst2 = ['WRTDS test', 'LSTM test']
dataBox = list()
for k in range(len(codeLst)):
    code = codeLst[k]
    temp = list()
    # for i in [2, 3, 0 ,1]:
    for i in [3, 1]:
        temp.append(corrMat[:, k, i])
    dataBox.append(temp)
fig = figplot.boxPlot(dataBox, label1=labLst1, widths=0.5, cLst='br',
                      label2=labLst2, figsize=(12, 4), yRange=[0, 1])
# fig = figplot.boxPlot(dataBox, label1=labLst1, widths=0.5,
#                       label2=labLst2, figsize=(12, 4), sharey=False)
fig.show()

# p-values
testLst = ['p-value']
indLst = [[1, 3]]
codeStrLst = ['{} {}'.format(
    code, usgs.codePdf.loc[code]['shortName']) for code in codeLst]
dfS = pd.DataFrame(index=codeStrLst, columns=testLst)
for (test, ind) in zip(testLst, indLst):
    for k, code in enumerate(codeLst):
        data = [corrMat[:, k, x] for x in ind]
        [a, b], _ = utils.rmNan(data)
        # s, p = scipy.stats.ttest_ind(a, b, equal_var=False)
Esempio n. 4
0
        p2, o2 = basins.testModel(outName, testSet, wqData=wqData)
        errMat1 = wqData.errBySite(p1, subset=trainSet)
        errMat2 = wqData.errBySite(p2, subset=testSet)
        pLst1.append(p1)
        pLst2.append(p2)
        errMatLst1.append(errMat1)
        errMatLst2.append(errMat2)

    codePdf = usgs.codePdf
    groupLst = codePdf.group.unique().tolist()
    for group in groupLst:
        codeLst = codePdf[codePdf.group == group].index.tolist()
        indLst = [wqData.varC.index(code) for code in codeLst]
        labLst1 = [codePdf.loc[code]['shortName'] +
                   '\n'+code for code in codeLst]
        labLst2 = ['opt1-train', 'opt2-train', 'opt1-test', 'opt2-test']
        dataBox = list()
        for ic in indLst:
            temp = list()
            for errMat in errMatLst1:
                temp.append(errMat[:, ic, 1])
            for errMat in errMatLst2:
                temp.append(errMat[:, ic, 1])
            dataBox.append(temp)
        title = 'correlation of {} group on {}'.format(group, yr)
        figName = 'box_{}_{}'.format(group, yr)
        fig = figplot.boxPlot(dataBox, label1=labLst1, label2=labLst2)
        fig.suptitle(title)
        fig.show()
        fig.savefig(os.path.join(figFolder, figName))
Esempio n. 5
0
statStrLst = ['Bias', 'RMSE', 'NSE', 'Corr']
dataPlot = list()
labelLst = [
    usgs.codePdf.loc[code]['shortName'] + '\n' + code for code in codeLst
]
for k, statStr in enumerate(statStrLst):
    temp = list()
    for ic, code in enumerate(codeLst):
        [a, b,
         c], _ = utils.rmNan([mat1[:, ic, k], mat2[:, ic, k], mat3[:, ic, k]])
        temp.append([a, b, c])
    sharey = False if statStr in ['Bias', 'RMSE'] else True
    fig, axes = figplot.boxPlot(temp,
                                widths=0.5,
                                figsize=(12, 4),
                                label2=['LSTM w/ Q', 'LSTM w/o Q', 'WRTDS'],
                                label1=labelLst,
                                sharey=sharey)
    if statStr == 'Bias':
        for ax in axes:
            _ = ax.axhline(0)
    fig.show()

#
# DF2 = dbBasin.DataFrameBasin('G400')

# labelLst = [usgs.codePdf.loc[code]['shortName'] + code for code in codeLst]
# d1 = dbBasin.DataModelBasin(DF2, subset=trainSet, varY=codeLst)
# d2 = dbBasin.DataModelBasin(DF2, subset=testSet, varY=codeLst)
# k = 60
# dataPlot = [yW[:, k, :], d1.Y[:, k, :], d2.Y[:, k, :]]
Esempio n. 6
0
errMatC2 = wqData.errBySiteC(ycP2, subset=testset, varC=master['varYC'])
if master['varY'] is not None:
    errMatQ2 = wqData.errBySiteQ(yP2, subset=testset, varQ=master['varY'])

# box
dataBox = list()
for k in range(2):
    for var in plotVar:
        if var == '00060':
            temp = [errMatQ1[:, 0, k], errMatQ2[:, 0, k]]
        else:
            ic = master['varYC'].index(var)
            temp = [errMatC1[:, ic, k], errMatC2[:, ic, k]]
            dataBox.append(temp)
fig = figplot.boxPlot(dataBox,
                      label1=['RMSE', 'Corr'],
                      label2=['train', 'test'],
                      sharey=False)
fig.show()

# seq test
siteNoLst = wqData.info['siteNo'].unique().tolist()
basins.testModelSeq(outName, siteNoLst, wqData=wqData)

# time series map
dfCrd = gageII.readData(varLst=['LAT_GAGE', 'LNG_GAGE'], siteNoLst=siteNoLst)
lat = dfCrd['LAT_GAGE'].values
lon = dfCrd['LNG_GAGE'].values
codePdf = usgs.codePdf


def funcMap():
Esempio n. 7
0
            o = wqData.c[ind, ic]
        for siteNo in dictSite[code[:5]]:
            iS = siteNoLst.index(siteNo)
            indS = info[info['siteNo'] == siteNo].index.values
            rmse, corr = utils.stat.calErr(p[indS], o[indS])
            corrMat[iS, iCode, iLab] = corr
            rmseMat[iS, iCode, iLab] = rmse

# plot box
labLst1 = [
    usgs.codePdf.loc[code]['shortName'] + '\n' + code for code in codeLst
]
labLst3 = [lab + '-WRTDS' for lab in labLst2]
dataBox = list()
for k in range(len(codeLst)):
    code = codeLst[k]
    temp = list()
    for i in range(len(labelLst)):
        temp.append(corrMat[:, k, i])
    dataBox.append(temp)
fig = figplot.boxPlot(dataBox,
                      label1=labLst1,
                      widths=0.5,
                      cLst=cLst,
                      label2=labLst3,
                      figsize=(12, 4),
                      yRange=[-1, 1])
# fig = figplot.boxPlot(dataBox, label1=labLst1, widths=0.5,
#                       label2=labLst2, figsize=(12, 4), sharey=False)
fig.show()
Esempio n. 8
0
        indS2 = info2[info2['siteNo'] == siteNo].index.values
        for iC in range(nc):
            countMat[i, iC, 0] = np.count_nonzero(~np.isnan(ycT1[indS1, iC]))
            countMat[i, iC, 1] = np.count_nonzero(~np.isnan(ycT2[indS2, iC]))
countMatLst = [countMat1, countMat2, countMat1, countMat2]

# plot box
codePdf = usgs.codePdf
codeLst = ['00660', '00665', '00600', '00605', '00618', '71846', '00950']
# codeLst = codePdf[codePdf.group == group].index.tolist()
indLst = [wqData.varC.index(code) for code in codeLst]
labLst1 = [codePdf.loc[code]['shortName'] + '\n' + code for code in codeLst]
labLst2 = [
    'train all test all', 'train all test rmFlag', 'train rmFlag test all',
    'train rmFlag test rmFlag'
]
dataBox = list()
rho = 20
for ic in indLst:
    temp = list()
    for errMat, countMat in zip(errMatLst2, countMatLst):
        ind = np.where((countMat[:, ic, 0] > rho)
                       & (countMat[:, ic, 1] > rho))[0]
        temp.append(errMat[ind, ic, 1])
        # temp.append(errMat[:, ic, 1])
    dataBox.append(temp)
title = 'test correlation of referenced sites with >{} samples'.format(rho)
fig = figplot.boxPlot(dataBox, label1=labLst1, label2=labLst2, figsize=(12, 6))
fig.suptitle(title)
fig.show()
Esempio n. 9
0
# plot box
labLst1 = [
    usgs.codePdf.loc[code]['shortName'] + '\n' + code for code in codeLst
]
labLst2 = ['LSTM vs WRTDS', 'LSTM vs Obs', 'WRTDS vs Obs']
dataBox = list()
for k in range(len(codeLst)):
    code = codeLst[k]
    temp = list()
    for i in [0, 1, 2]:
        temp.append(corrMat[:, k, i])
    dataBox.append(temp)
fig = figplot.boxPlot(dataBox,
                      label1=labLst1,
                      widths=0.5,
                      cLst='grb',
                      label2=labLst2,
                      figsize=(20, 5),
                      yRange=[0, 1])
fig.show()

# plot 121
importlib.reload(axplot)
codeLst2 = [
    '00095', '00400', '00405', '00600', '00605', '00618', '00660', '00665',
    '00681', '00915', '00925', '00930', '00935', '00940', '00945', '00950',
    '00955', '70303', '71846', '80154'
]
fig, axes = plt.subplots(5, 4)
ticks = [-0.5, 0, 0.5, 1]
for k, code in enumerate(codeLst2):
Esempio n. 10
0
    name = nameLst[kk]
    mat = matLst[kk]
    yRange = rangeLst[kk]
    label1 = ecoIdLst
    label2 = ['Local', 'CONUS']
    dataBox = list()
    for k in range(len(subsetLst)):
        temp = list()
        temp.append(mat[0][k])
        temp.append(mat[1][k])
        dataBox.append(temp)
    if kk == 0:
        label2 = ['Local', 'CONUS']
    else:
        label2 = None
    fig = figplot.boxPlot(dataBox, widths=0.5, cLst='rb', label1=label1,
                          label2=label2, figsize=(12, 4), yRange=yRange)
    saveFile = os.path.join(saveFolder, 'q_ecoR_{}'.format(name))
    fig.savefig(saveFile)
    fig.show()


# # significance test
# testLst = ['Q as target', 'Q as input']
# indLst = [[0, 2], [1, 2]]
# codeStrLst = ['{} {}'.format(
#     code, usgs.codePdf.loc[code]['shortName']) for code in codeLst]
# dfS = pd.DataFrame(index=codeStrLst, columns=testLst)
# for (test, ind) in zip(testLst, indLst):
#     for k, code in enumerate(codeLst):
#         data = [corrMat[:, k, x] for x in ind]
#         [a, b], _ = utils.rmNan(data)
Esempio n. 11
0
            indS = info[info['siteNo'] == siteNo].index.values
            rmse, corr = utils.stat.calErr(p[indS], o[indS])
            corrMat[iS, iLab, iT] = corr
            # rmseMat[iS, iCode, iT*2] = rmse

# # WRTDS
# dirWrtds = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS-W', 'B10')
# # dirWrtds = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS')
# file1 = os.path.join(dirWrtds, '{}-{}-corr'.format('B10N5', 'B10N5'))
# dfCorr1 = pd.read_csv(file1, dtype={'siteNo': str}).set_index('siteNo')
# file2 = os.path.join(dirWrtds, '{}-{}-corr'.format('B10N5', 'A10N5'))
# dfCorr2 = pd.read_csv(file2, dtype={'siteNo': str}).set_index('siteNo')
# for iCode, code in enumerate(codeLst):
#     indS = [siteNoLst.index(siteNo) for siteNo in dictSite[code]]
#     corrMat[indS, iCode, 4] = dfCorr1.iloc[indS][code].values
#     corrMat[indS, iCode, 5] = dfCorr2.iloc[indS][code].values

# plot box
dataBox = list()
for k in range(len(codeLst)):
    code = codeLst[k]
    temp = list()
    # for i in [2, 3, 0 ,1]:
    for i in range(len(labelLst)):
        temp.append(corrMat[:, i, 1])
    dataBox.append(temp)
fig = figplot.boxPlot(dataBox, widths=0.5,  figsize=(12, 4), yRange=[0, 1])
# fig = figplot.boxPlot(dataBox, label1=labLst1, widths=0.5,
#                       label2=labLst2, figsize=(12, 4), sharey=False)
fig.show()
Esempio n. 12
0
y = dfR1['corr'].values
x[x < -900] = np.nan

# density plot
vLst = np.arange(0, 1, 0.1)
dataBox = list()
labLst = list()
for k in range(1, len(vLst)):
    v1 = vLst[k - 1]
    v2 = vLst[k]
    ind = np.where((y >= v1) & (y < v2))[0]
    if len(ind) > 0:
        dataBox.append(x[ind])
        labLst.append('{:.2f}'.format(v1))
vRange = [np.nanmin(x), np.nanmax(x)]
fig = figplot.boxPlot(dataBox, label1=labLst, figsize=(8, 4), widths=0.3)
plt.subplots_adjust(wspace=0)
fig.show()

# cum plot
yr, ind = utils.rankData(y)
yr = yr[::-1]
ind = ind[::-1]
xr = np.cumsum(x[ind]) / np.arange(1, len(ind) + 1)
fig, ax = plt.subplots(1, 1)
ax.plot(yr[10:], xr[10:], '-')
fig.show()

# 121
fig, ax = plt.subplots(1, 1)
ax.plot(x, y, '*')
Esempio n. 13
0
for subset in subsetLst:
    testSet = subset
    outName = '{}-{}-B10-gs'.format(dataName, subset)
    yP, ycP = basinFull.testModel(
        outName, DM=dm, batchSize=20, testSet=testSet,reTest=False)
    yO, ycO = basinFull.getObs(outName, testSet, DM=dm)
    nash2 = utils.stat.calNash(yP[indT:, :, 0], yO[indT:, :, 0])
    rmse2 = utils.stat.calRmse(yP[indT:, :, 0], yO[indT:, :, 0])
    corr2 = utils.stat.calCorr(yP[indT:, :, 0], yO[indT:, :, 0])
    # nash2 = utils.stat.calNash(yP[:indT, :, 0], yO[:indT, :, 0])
    # rmse2 = utils.stat.calRmse(yP[:indT, :, 0], yO[:indT, :, 0])
    # corr2 = utils.stat.calCorr(yP[:indT, :, 0], yO[:indT, :, 0])
    nashLst2.append(nash2)
    rmseLst2.append(rmse2)
    corrLst2.append(corr2)

# plot box
# matLst = [nashLst1, nashLst2]
matLst = [corrLst1, corrLst2]
label1 = subsetLst
label2 = ['CONUS', 'Local']
dataBox = list()
for k in range(len(subsetLst)):
    temp = list()
    temp.append(matLst[0][k])
    temp.append(matLst[1][k])
    dataBox.append(temp)
fig = figplot.boxPlot(dataBox, widths=0.5, cLst='brgk', label1=label1,
                      label2=label2, figsize=(6, 4), yRange=[0, 1])
fig.show()
Esempio n. 14
0
matplotlib.rcParams.update({'font.size': 18})
matplotlib.rcParams.update({'lines.linewidth': 2})
matplotlib.rcParams.update({'lines.markersize': 12})
# # plot box
# labLst1 = [usgs.codePdf.loc[code]['shortName'] +
#            '\n'+code for code in codeLst]
label2 = ['train', 'test']
label1 = ['correlation', 'RMSE']
dataBox = list()
ic = 0
# dataBox = [[corrLSTM[:, 0], corrLSTM[:, 1],],
#            [rmseLSTM[:, 0], rmseLSTM[:, 1]]]
dataBox = [[corrLSTM[:, 0], corrLSTM[:, 1], corrWRTDS[:, 0], corrWRTDS[:, 1]],
           [rmseLSTM[:, 0], rmseLSTM[:, 1], rmseWRTDS[:, 0], rmseWRTDS[:, 1]]]
fig = figplot.boxPlot(dataBox, widths=0.5, cLst='brgb',
                      label2=label2, label1=label1,
                      figsize=(8, 5), sharey=False)
fig.show()


# map
figM, axM = plt.subplots(1, 1, figsize=(8, 4))
siteNoLstCode = dictSite[code]
indS = [siteNoLst.index(siteNo) for siteNo in siteNoLstCode]
dfCrd = gageII.readData(
    varLst=['LAT_GAGE', 'LNG_GAGE'], siteNoLst=siteNoLstCode)
lat = dfCrd['LAT_GAGE'].values
lon = dfCrd['LNG_GAGE'].values
shortName = usgs.codePdf.loc[code]['shortName']
matMap = corrLSTM[indS, 1]
axplot.mapPoint(axM, lat, lon, matMap, s=24)
Esempio n. 15
0
dfCorr1 = pd.read_csv(file1, dtype={'siteNo': str}).set_index('siteNo')
file2 = os.path.join(dirWrtds, '{}-{}-corr'.format('B10N5', 'A10N5'))
dfCorr2 = pd.read_csv(file2, dtype={'siteNo': str}).set_index('siteNo')
for iCode, code in enumerate(codeLst):
    indS = [siteNoLst.index(siteNo) for siteNo in dictSite[code]]
    corrWRTDS[indS, iCode, 0] = dfCorr1.iloc[indS][code].values
    corrWRTDS[indS, iCode, 1] = dfCorr2.iloc[indS][code].values

# plot box
labLst1 = [
    usgs.codePdf.loc[code]['shortName'] + '\n' + code for code in codeLst
]
dataBox = list()
for k in range(len(codeLst)):
    code = codeLst[k]
    temp = list()
    for i in range(len(labelLst)):
        temp.append(corrMat[:, k, i])
    temp.append(corrWRTDS[:, k, 1])
    dataBox.append(temp)
fig = figplot.boxPlot(dataBox,
                      label1=labLst1,
                      widths=0.5,
                      cLst=cLst,
                      label2=labLst2 + ['WRTDS'],
                      figsize=(12, 4),
                      yRange=[0, 1])
# fig = figplot.boxPlot(dataBox, label1=labLst1, widths=0.5,
#                       label2=labLst2, figsize=(12, 4), sharey=False)
fig.show()
Esempio n. 16
0
for kk in range(3):
    name = nameLst[kk]
    mat = matLst[kk]
    yRange = rangeLst[kk]
    label1 = ecoIdLst
    label2 = ['Local', 'CONUS']
    dataBox = list()
    for k in range(len(subsetLst)):
        temp = list()
        temp.append(mat[0][k])
        temp.append(mat[1][k])
        dataBox.append(temp)
    fig = figplot.boxPlot(dataBox,
                          widths=0.5,
                          cLst='rb',
                          label1=label1,
                          label2=None,
                          figsize=(12, 4),
                          yRange=yRange)
    saveFile = os.path.join(saveFolder, 'q_ref_ecoR_{}'.format(name))
    fig.savefig(saveFile)
    fig.savefig(saveFile + '.eps')
    fig.show()
fig = figplot.boxPlot(dataBox,
                      widths=0.5,
                      cLst='rb',
                      label1=label1,
                      label2=['Local', 'CONUS'],
                      legOnly=True)
saveFile = os.path.join(saveFolder, 'q_ecoR_legend')
fig.savefig(saveFile)
Esempio n. 17
0
# plot box
labLst1 = [
    usgs.codePdf.loc[code]['shortName'] + '\n' + code for code in codeLst
]
labLst2 = ['LSTM w/o Q', 'LSTM w/ Q', 'WRTDS']
dataBox = list()
for k in range(len(codeLst)):
    code = codeLst[k]
    temp = list()
    for i in [0, 1, 2]:
        temp.append(corrMat[:, k, i])
    dataBox.append(temp)
fig = figplot.boxPlot(dataBox,
                      label1=labLst1,
                      widths=0.5,
                      cLst='bgr',
                      label2=labLst2,
                      figsize=(20, 5),
                      yRange=[0, 1])
fig.show()

# plot box
labLst1 = [
    usgs.codePdf.loc[code]['shortName'] + '\n' + code for code in codeLst
]
labLst2 = ['LSTM w/o Q', 'LSTM w/ Q', 'WRTDS']
dataBox = list()
for k in range(len(codeLst)):
    code = codeLst[k]
    temp = list()
    for i in [0, 1, 2]:
Esempio n. 18
0
saveFolder = r'C:\Users\geofk\work\paper\SMAP-regional'

# tempLst = ['080401', '080305', '080304', '090203', '080301', '050301']
tempLst = caseLst
rangeLst = [[0, 1], [0.4, 1], [0.0, 1]]
for kk in range(3):
    kk
    name = nameLst[kk]
    mat = [matLst[kk][caseLst.index(x)] for x in tempLst]
    yRange = rangeLst[kk]
    lab1 = [labLst[caseLst.index(x)] for x in tempLst]
    if kk == 0:
        label2 = ['lev II', 'lev I', 'CONUS']
    else:
        label2 = None
    fig = figplot.boxPlot(mat, widths=0.5, cLst='ygbr', label1=lab1,
                          label2=label2, figsize=(12, 4), yRange=yRange)
    plt.tight_layout()
    plt.subplots_adjust(wspace=0, hspace=0)
    saveFile = os.path.join(saveFolder, 'q_sim_{}'.format(name))
    # fig.savefig(saveFile)
    fig.show()


# # another
# tempLst = ['090402', '090403']
# rangeLst = [[0, 1], [0.0, 1], [-0.4, 1]]
# for kk in range(3):
#     name = nameLst[kk]
#     mat = [matLst[kk][caseLst.index(x)] for x in tempLst]
#     yRange = rangeLst[kk]
#     lab1 = [labLst[caseLst.index(x)] for x in tempLst]
Esempio n. 19
0
testLst = ['rmYr5', 'pkR20', 'pkL20', 'pkRT20', 'A10']
df = pd.DataFrame(index=DF.varC, columns=trainLst)
aLst = list()
bLst = list()
for trainSet, testSet in zip(trainLst, testLst):
    a = DF.extractSubset(DF.c, trainSet)
    b = DF.extractSubset(DF.c, testSet)
    aLst.append(a)
    bLst.append(b)

dataBox = list()
for code in DF.varC:
    indC = DF.varC.index(code)
    temp = list()
    for trainSet, a, b in zip(trainLst, aLst, bLst):
        x = ~np.isnan(a[:, :, indC])
        y = ~np.isnan(b[:, :, indC])
        n1 = np.sum(x, axis=0)
        n2 = np.sum(y, axis=0)
        indS = np.where((n1 > 160) & (n2 > 40))[0]
        temp.append(n2[indS] / n1[indS])
        df.at[code, trainSet] = len(indS)
        temp.append(n1[indS])
    dataBox.append(temp)
labLst1 = [
    '{}\n{}'.format(usgs.codePdf.loc[code]['shortName'], code)
    for code in DF.varC
]
fig, ax = figplot.boxPlot(dataBox, label1=labLst1, figsize=(6, 4))
fig.show()
Esempio n. 20
0
yP2, ycP2 = basins.testModel(outName, testset, wqData=wqData)
errMatC2 = wqData.errBySiteC(ycP2, subset=testset, varC=master['varYC'])
if master['varY'] is not None:
    errMatQ2 = wqData.errBySiteQ(yP2, subset=testset, varQ=master['varY'])

# box
dataBox = list()
for k in range(2):
    for var in plotVar:
        if var == '00060':
            temp = [errMatQ1[:, 0, k], errMatQ2[:, 0, k]]
        else:
            ic = master['varYC'].index(var)
            temp = [errMatC1[:, ic, k], errMatC2[:, ic, k]]
        dataBox.append(temp)
fig = figplot.boxPlot(dataBox, sharey=False)
fig.show()

# seq test
siteNoLst = wqData.info['siteNo'].unique().tolist()
basins.testModelSeq(outName, siteNoLst, wqData=wqData)

# time series map
dfCrd = gageII.readData(varLst=['LAT_GAGE', 'LNG_GAGE'], siteNoLst=siteNoLst)
lat = dfCrd['LAT_GAGE'].values
lon = dfCrd['LNG_GAGE'].values
codePdf = usgs.codePdf


def funcMap():
    nM = len(plotVar)
Esempio n. 21
0
cVar = 'STOR_NID_2009'
cMat = dfG[cVar].values
# cMat = np.log(cMat+1)
# cR = [np.nanpercentile(cMat, 10), np.nanpercentile(cMat, 90)]
cR = [np.nanmin(cMat), np.nanmax(cMat)]
code = '00618'
pcLst = np.arange(0, 101, 20)
nBox = len(pcLst) - 1
labelLst = list()
dataBox = list()
for k in range(nBox):
    temp = list()
    v1 = np.nanpercentile(cMat, pcLst[k])
    v2 = np.nanpercentile(cMat, pcLst[k + 1])
    labelLst.append('{:.2f}\n{:.2f}'.format(v1, v2))
    if k == 0:
        ind = np.where((cMat >= v1) & (cMat <= v2))[0]
    else:
        ind = np.where((cMat > v1) & (cMat <= v2))[0]
    ic = codeLst.index(code)
    temp.append(corrMat[ind, ic, 1])
    temp.append(corrMat[ind, ic, 2])
    dataBox.append(temp)
fig = figplot.boxPlot(dataBox,
                      label1=labelLst,
                      widths=0.5,
                      label2=['LSTM', 'WRTDS'],
                      figsize=(12, 4),
                      yRange=[0, 1])
fig.show()
Esempio n. 22
0
errMatLst1 = list()
errMatLst2 = list()
for outName in outLst:
    master = basins.loadMaster(outName)
    dataName = master['dataName']
    # wqData = waterQuality.DataModelWQ(dataName)
    # point test
    yP1, ycP1 = basins.testModel(outName, trainset, wqData=wqData)
    errMatC1 = wqData.errBySiteC(ycP1, subset=trainset, varC=master['varYC'])
    yP2, ycP2 = basins.testModel(outName, testset, wqData=wqData)
    errMatC2 = wqData.errBySiteC(ycP2, subset=testset, varC=master['varYC'])
    ic = master['varYC'].index(code)
    errMatLst1.append(errMatC1[:, ic, :])
    errMatLst2.append(errMatC2[:, ic, :])

# box
for k in range(2):
    dataBox = list()
    for errMatLst in [errMatLst1, errMatLst2]:
        temp = [errMat[:, k] for errMat in errMatLst]
        dataBox.append(temp)
    label1 = ['B2000', 'A2000']
    label2 = [
        'all C, Q in', 'all C, Q out', 'all C, Q in, messed',
        'all C, Q out, messed'
    ]
    fig = figplot.boxPlot(dataBox, label1=label1, label2=label2, sharey=True)
    fig.suptitle('RMSE') if k == 0 else fig.suptitle('Correlation')
    fig.show()
Esempio n. 23
0
        errLst.append(err)
    errLstAll.append(errLst)

# plot box
cLst = 'ygbr'
keyLst = ['RMSE', 'Corr']
for key in keyLst:
    dataBox = list()
    for errLst in errLstAll:
        temp = list()
        for err in errLst:
            temp.append(err[key])
        dataBox.append(temp)
    fig = figplot.boxPlot(dataBox,
                          label1=caseLabLst,
                          cLst=cLst,
                          figsize=(12, 4),
                          sharey=True)
    plt.tight_layout()
    plt.subplots_adjust(wspace=0, hspace=0)
    fig.show()
    saveFile = os.path.join(saveFolder, 'sm_sim_{}'.format(key))
    fig.savefig(saveFile)
    fig.savefig(saveFile + '.eps')

label2 = ['local', 'local+close', 'local+far', 'local+dissimilar']
fig = figplot.boxPlot(dataBox, label2=label2, cLst=cLst, legOnly=True)
saveFile = os.path.join(saveFolder, 'sm_sim_legend')
fig.savefig(saveFile)
fig.savefig(saveFile + '.eps')
fig.show()
Esempio n. 24
0
import matplotlib.pyplot as plt
import pandas as pd
import os
import json

# load WRTDS results
dirRoot1 = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS_weekly')
dirRoot2 = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS_weekly_rmq')

code = '00955'
dfRes1 = pd.read_csv(os.path.join(dirRoot1, 'result', code), dtype={
    'siteNo': str}).set_index('siteNo')
dfRes2 = pd.read_csv(os.path.join(dirRoot2, 'result', code), dtype={
    'siteNo': str}).set_index('siteNo')


dirInv = os.path.join(kPath.dirData, 'USGS', 'inventory')
fileSiteNo = os.path.join(dirInv, 'siteNoLst-1979')
siteNoLstAll = pd.read_csv(fileSiteNo, header=None, dtype=str)[0].tolist()
countMatW = np.load(os.path.join(dirInv, 'matCountWeekly.npy'))
codeLst = sorted(usgs.codeLst)
ic = codeLst.index(code)
ny = 3
count = np.sum(countMatW[:, -ny:, ic], axis=1)
nsLst = np.arange(5, 20)*ny
dataBox = list()
dataBox.append(dfRes1[dfRes1['count']>10]['corr'].values)
for j, ns in enumerate(nsLst):
    dataBox.append(dfRes1[count >= ns]['corr'].values)
fig = figplot.boxPlot(dataBox,  figsize=(12, 4), yRange=[0, 1])
fig.show()
Esempio n. 25
0
        ind = wqData.subset[testSet]
        info = wqData.info.iloc[ind].reset_index()
        ic = wqData.varC.index(code)
        if len(wqData.c.shape) == 3:
            p = yP[-1, :, master['varY'].index(code)]
            o = wqData.c[-1, ind, ic]
        elif len(wqData.c.shape) == 2:
            p = ycP[:, master['varYC'].index(code)]
            o = wqData.c[ind, ic]
        for siteNo in dictSite[code]:
            iS = siteNoLst.index(siteNo)
            indS = info[info['siteNo'] == siteNo].index.values
            rmse, corr = utils.stat.calErr(p[indS], o[indS])
            corrMat[iS, iCode, k] = corr
            rmseMat[iS, iCode, k] = rmse

# plot box
labLst1 = [usgs.codePdf.loc[code]['shortName'] +
           '\n'+code for code in codeLst]
dataBox = list()
for k in range(len(codeLst)):
    code = codeLst[k]
    temp = list()
    for i in range(len(dataLst)):
        temp.append(corrMat[:, k, i])
    dataBox.append(temp)
fig = figplot.boxPlot(dataBox, label1=labLst1, widths=0.5, cLst='rb',
                      label2=['weekly,daily'], figsize=(12, 4), yRange=[0, 1])
fig.show()

Esempio n. 26
0
            [
                '00915', '00925', '00935', '00930', '00940', '00945', '00955',
                '00410', '00405', '00300', '00950', '00440'
            ]]
strLst = ['physical and nutrient variables', 'inorganics variables']
for k in range(2):
    codeLst = groupLst[k]
    indLst = [wqData.varC.index(code) for code in codeLst]
    labLst1 = [
        codePdf.loc[code]['shortName'] + '\n' + code for code in codeLst
    ]
    labLst2 = ['train LSTM', 'test LSTM', 'train WRTDS', 'test WRTDS']
    dataBox = list()
    for ic in indLst:
        temp = list()
        for errMat in [errMatC1, errMatC2, errMatC3, errMatC4]:
            ind = np.where((countMat[:, ic, 0] > 20)
                           & (countMat[:, ic, 1] > 20))[0]
            temp.append(errMat[ind, ic, 1])
        dataBox.append(temp)
    fig = figplot.boxPlot(dataBox,
                          label1=labLst1,
                          widths=0.4,
                          label2=labLst2,
                          figsize=(16, 4),
                          yRange=[0, 1])
    title = 'correlation of {}'.format(strLst[k])
    fig.suptitle(title)
    fig.show()
    # fig.savefig(os.path.join(saveDir, 'box_group{}'.format(k)))
Esempio n. 27
0
        yOut[:, :, indC] = yP[:, :, indC] * s + m
    for indC, code in enumerate(codeLst):
        indS = [
            siteNoLst.index(siteNo) for siteNo in dictSite[code]
            if siteNo in siteNoLst
        ]
        corr = utils.stat.calCorr(yOut[:, indS, indC], d2.Y[:, indS, indC])
        corrMat[indS, indC, iEp] = corr

# plot
labelLst = [
    usgs.codePdf.loc[code]['shortName'] + '\n' + code for code in codeLst
]
dataBox = list()
for ic, code in enumerate(codeLst):
    temp = list()
    for iEp, ep in enumerate(epLst):
        temp.append(corrMat[:, ic, iEp])
    dataBox.append(temp)
fig, axes = figplot.boxPlot(
    dataBox,
    widths=0.5,
    figsize=(12, 4),
    label1=labelLst,
    sharey=True,
    cLst='rrrrrrrrrr',
)
# for ax in axes:
#     ax.axhline(0)
fig.show()