Exemple #1
0
wqData = waterQuality.DataModelWQ('HBN')
figFolder = os.path.join(kPath.dirWQ, 'HBN', 'years')

# compare of opt1-4
yrLst = ['80s', '90s', '00s', '10s']
for yr in yrLst:
    outLst = ['HBN-{}-rm-opt1'.format(yr), 'HBN-{}-rm-opt2'.format(yr)]
    trainSet = '{}-rm'.format(yr)
    testSet = yr
    # outLst = ['HBN-opt1', 'HBN-opt2',
    #           'HBN-opt3', 'HBN-opt4']
    # trainSet = 'first80'
    # testSet = 'last20'
    pLst1, pLst2, errMatLst1, errMatLst2 = [list() for x in range(4)]
    for outName in outLst:
        p1, o1 = basins.testModel(outName, trainSet, wqData=wqData)
        p2, o2 = basins.testModel(outName, testSet, wqData=wqData)
        errMat1 = wqData.errBySite(p1, subset=trainSet)
        errMat2 = wqData.errBySite(p2, subset=testSet)
        pLst1.append(p1)
        pLst2.append(p2)
        errMatLst1.append(errMat1)
        errMatLst2.append(errMat2)

    codePdf = usgs.codePdf
    groupLst = codePdf.group.unique().tolist()
    for group in groupLst:
        codeLst = codePdf[codePdf.group == group].index.tolist()
        indLst = [wqData.varC.index(code) for code in codeLst]
        labLst1 = [codePdf.loc[code]['shortName'] +
                   '\n'+code for code in codeLst]
Exemple #2
0
import os
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

wqData = waterQuality.DataModelWQ('basinRef', rmFlag=True)

outName = 'basinRef-Yeven-opt2'
trainSet = 'Yeven'
testSet = 'Yodd'
siteNoLst = wqData.info['siteNo'].unique().tolist()

master = basins.loadMaster(outName)
ep = 300
yP1, ycP1 = basins.testModel(outName, trainSet, wqData=wqData, ep=ep)
yP2, ycP2 = basins.testModel(outName, testSet, wqData=wqData, ep=ep)
errMatC1 = wqData.errBySiteC(ycP1,
                             varC=master['varYC'],
                             subset=trainSet,
                             rmExt=True)
errMatC2 = wqData.errBySiteC(ycP2,
                             varC=master['varYC'],
                             subset=testSet,
                             rmExt=True)

dirWrtds = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS-F')
dfCorr1 = pd.read_csv(os.path.join(dirWrtds,
                                   '{}-{}-corr'.format(trainSet, trainSet)),
                      index_col=0)
dfCorr2 = pd.read_csv(os.path.join(dirWrtds,
Exemple #3
0
siteNoLst = wqData.info.siteNo.unique()
nSite = len(siteNoLst)

# single
corrMat = np.full([nSite, len(codeLst), 4], np.nan)
rmseMat = np.full([nSite, len(codeLst), 4], np.nan)
for iCode, code in enumerate(codeLst):
    trainSet = '{}-Y1'.format(code)
    testSet = '{}-Y2'.format(code)
    outName = '{}-{}-{}-{}'.format(dataName, code, 'ntnS', trainSet)
    master = basins.loadMaster(outName)
    ic = wqData.varC.index(code)
    for iT, subset in enumerate([trainSet, testSet]):
        yP, ycP = basins.testModel(outName,
                                   subset,
                                   wqData=wqData,
                                   ep=ep,
                                   reTest=reTest)
        ind = wqData.subset[subset]
        info = wqData.info.iloc[ind].reset_index()
        o = wqData.c[-1, ind, ic]
        p = yP[-1, :, 1]
        for iS, siteNo in enumerate(siteNoLst):
            sd = np.datetime64('1980-01-01')
            infoS = info[info['siteNo'] == siteNo]
            indS = infoS[infoS['date'] >= sd].index.values
            rmse, corr = utils.stat.calErr(p[indS], o[indS])
            corrMat[iS, iCode, iT] = corr
            rmseMat[iS, iCode, iT] = rmse

# plot box
Exemple #4
0
dataName = 'chloride'
wqData = waterQuality.DataModelWQ(dataName)
outLst = ['chloride-Yodd-ntn', 'chloride-Yodd']
# outLst = ['sulfateNE-Yodd-ntn-silica', 'sulfateNE-Yodd-silica']
trainSet = 'Yodd'
testSet = 'Yeven'
# outLst = ['sulfateNE-Yeven-ntn', 'sulfateNE-Yeven']
# trainSet = 'Yeven'
# testSet = 'Yodd'

errMatLst1, errMatLst2, ypLst1, ypLst2 = [list() for x in range(4)]
for outName in outLst:
    master = basins.loadMaster(outName)
    yP1, ycP1 = basins.testModel(outName,
                                 trainSet,
                                 wqData=wqData,
                                 ep=100,
                                 reTest=True)
    yP2, ycP2 = basins.testModel(outName,
                                 testSet,
                                 wqData=wqData,
                                 ep=100,
                                 reTest=True)
    ypLst1.append(ycP1)
    ypLst2.append(ycP2)

ypLst1[1][np.isnan(ypLst1[0])] = np.nan
ypLst2[1][np.isnan(ypLst2[0])] = np.nan
for k in range(2):
    errMatC1 = wqData.errBySiteC(ypLst1[k],
                                 subset=trainSet,
Exemple #5
0
import torch
import os
import json
import numpy as np
import matplotlib.pyplot as plt

wqData = waterQuality.DataModelWQ('HBN')
figFolder = os.path.join(kPath.dirWQ, 'HBN')
codeLst = ['00955', '00915', '00405']
outLst = ['HBN-first50-{}'.format(x) for x in codeLst]
trainSet = 'first50'
testSet = 'last50'

outName = 'HBN-first50-opt1'
p1, o1 = basins.testModel(outName, trainSet)
p2, o2 = basins.testModel(outName, testSet)
errMat1 = wqData.errBySite(p1, subset=trainSet)
errMat2 = wqData.errBySite(p2, subset=testSet)
dataBox1 = list()
dataBox2 = list()
for code in codeLst:
    outName = 'HBN-first50-{}'.format(code)
    p1, o1 = basins.testModel(outName, trainSet)
    p2, o2 = basins.testModel(outName, testSet)
    varC = [code]
    err1 = wqData.errBySite(p1, subset=trainSet, varC=varC)
    err2 = wqData.errBySite(p2, subset=testSet, varC=varC)
    temp = list()
    ic = wqData.varC.index(code)
    temp.append(errMat2[:, ic, 1])
Exemple #6
0
outLst = [
    'Silica64-Y8090-opt1', 'Silica64-Y8090-opt2', 'Silica64Mess-Y8090-opt1',
    'Silica64Mess-Y8090-opt2'
]
code = '00955'
trainset = 'Y8090'
testset = 'Y0010'

errMatLst1 = list()
errMatLst2 = list()
for outName in outLst:
    master = basins.loadMaster(outName)
    dataName = master['dataName']
    # wqData = waterQuality.DataModelWQ(dataName)
    # point test
    yP1, ycP1 = basins.testModel(outName, trainset, wqData=wqData)
    errMatC1 = wqData.errBySiteC(ycP1, subset=trainset, varC=master['varYC'])
    yP2, ycP2 = basins.testModel(outName, testset, wqData=wqData)
    errMatC2 = wqData.errBySiteC(ycP2, subset=testset, varC=master['varYC'])
    ic = master['varYC'].index(code)
    errMatLst1.append(errMatC1[:, ic, :])
    errMatLst2.append(errMatC2[:, ic, :])

# box
for k in range(2):
    dataBox = list()
    for errMatLst in [errMatLst1, errMatLst2]:
        temp = [errMat[:, k] for errMat in errMatLst]
        dataBox.append(temp)
    label1 = ['B2000', 'A2000']
    label2 = [
Exemple #7
0
import os
import json
import numpy as np
import matplotlib.pyplot as plt

outLst = ['basinRef-Y8090-opt1', 'basinRef-Y8090-rmF-opt1']
trainSet = 'Y8090'
testSet = 'Y0010'
errMatLst1 = list()
errMatLst2 = list()
wqData1 = waterQuality.DataModelWQ('basinRef')
wqData2 = waterQuality.DataModelWQ('basinRef', rmFlag=True)

for outName in outLst:
    master = basins.loadMaster(outName)
    yP1, ycP1 = basins.testModel(outName, trainSet)
    yP2, ycP2 = basins.testModel(outName, testSet)
    for wqData in [wqData1, wqData2]:
        errMatC1 = wqData.errBySiteC(ycP1,
                                     subset=trainSet,
                                     varC=master['varYC'])
        errMatC2 = wqData.errBySiteC(ycP2,
                                     subset=testSet,
                                     varC=master['varYC'])
        errMatLst1.append(errMatC1)
        errMatLst2.append(errMatC2)

# figure out number of sample
siteNoLst = wqData1.info['siteNo'].unique().tolist()
nc = ycP1.shape[1]
countMat1 = np.full([len(siteNoLst), nc, 2], 0)
Exemple #8
0
    dfO1 = dfO[yr % 2 == 1]
    dfP2 = dfP[yr % 2 == 0]
    dfO2 = dfO[yr % 2 == 0]
    rmse1, corr1 = utils.stat.calErr(dfP1[code].values, dfO1[code].values)
    rmse2, corr2 = utils.stat.calErr(dfP2[code].values, dfO2[code].values)
    rmseMat[k, :] = [rmse1, rmse2]
    corrMat[k, :] = [corr1, corr2]

rmseMat2 = np.ndarray([len(siteNoLst), 2])
corrMat2 = np.ndarray([len(siteNoLst), 2])
trainSet = '{}-Y1'.format(code)
testSet = '{}-Y2'.format(code)
master = basins.loadMaster(outName)
ic = wqData.varC.index(code)
for iT, subset in enumerate([trainSet, testSet]):
    yP, ycP = basins.testModel(outName, subset, wqData=wqData)
    ind = wqData.subset[subset]
    info = wqData.info.iloc[ind].reset_index()
    if dataName == 'sbWT':
        o = wqData.c[-1, ind, ic]
        p = yP[-1, :, 1]
    elif dataName == 'sbW':
        o = wqData.c[ind, ic]
        p = ycP[:, 0]
    for iS, siteNo in enumerate(siteNoLst):
        indS = info[info['siteNo'] == siteNo].index.values
        if len(indS) > 0:
            [a, b], indV = utils.rmNan([o[indS], p[indS]])
            corr = np.corrcoef(a, b)[0, 1]
            rmse = np.sqrt(np.nanmean((a - b)**2))
            corrMat2[iS, iT] = corr
Exemple #9
0
varT = ['sinT', 'cosT']
varF = gridMET.varLst

varX = varQ + varF + varP
varXC = gageII.varLst
varY = varQ
varYC = usgs.newC

saveName = 'test'
trainSet = 'comb-A10'
outName = basins.wrapMaster(dataName=dataName,
                            trainName=trainSet,
                            batchSize=[None, 500],
                            outName=saveName,
                            varX=varX,
                            varY=varY,
                            varYC=varYC,
                            crit='SigmaLoss',
                            nEpoch=10,
                            saveEpoch=10)

wqData = waterQuality.DataModelWQ('test')
basins.trainModelTS(outName)

importlib.reload(basins)
yp, sp, ycp, scp = basins.testModel(outName,
                                    trainSet,
                                    wqData=wqData,
                                    ep=10,
                                    reTest=True)
Exemple #10
0
import torch
import os
import json
import numpy as np
import matplotlib.pyplot as plt

wqData = waterQuality.DataModelWQ('HBN')
figFolder = os.path.join(kPath.dirWQ, 'HBN')

# compare of opt1-4
outLst = ['HBN-Y8090-opt1', 'HBN-Y8090-opt2']
trainSet = 'Y8090'
testSet = 'Y0010'
errMatLst = list()
for outName in outLst:
    yp1, ycp1 = basins.testModel(outName, trainSet, wqData=wqData)
    yp2, ycp2 = basins.testModel(outName, testSet, wqData=wqData)
    errMat1 = wqData.errBySiteC(ycp1, wqData.varC, subset=trainSet)
    errMat2 = wqData.errBySiteC(ycp2, wqData.varC, subset=testSet)
    errMatLst.append(errMat1)
    errMatLst.append(errMat2)

codePdf = usgs.codePdf
groupLst = codePdf.group.unique().tolist()
for group in groupLst:
    codeLst = codePdf[codePdf.group == group].index.tolist()
    indLst = [wqData.varC.index(code) for code in codeLst]
    labLst1 = [codePdf.loc[code]['shortName'] +
                '\n'+code for code in codeLst]
    labLst2 = ['train opt1','test opt1','train opt2', 'test opt2']
    dataBox = list()
Exemple #11
0
wqData = waterQuality.DataModelWQ('HBN')

doLst = list()
doLst.append('subset')

dataName = 'HBN'
# outLst = ['HBN-00618-00955-all-Y8090-opt2', 'HBN-00618-00955-all-Y8090-opt4']
# testset = '00618-00955-all-Y0010'
outLst = ['HBN-Y8090-opt1', 'HBN-Y8090-opt4']
testset = 'Y0010'
siteNoLst = wqData.info['siteNo'].unique().tolist()

errMatLst = list()
for out in outLst:
    basins.testModelSeq(out, siteNoLst, wqData=wqData)
    yP2, ycP2 = basins.testModel(out, testset, wqData=wqData)
    errMat = wqData.errBySiteQ(yP2, ['00060'], subset=testset)
    errMatLst.append(errMat)

# # calculate error - adhoc
# siteNo = siteNoLst[0]

# tB = np.datetime64('2000-01-01')
# dfPred1, dfObs1 = basins.loadSeq(outLst[0], siteNo)
# a1 = dfPred1['00060']
# dfPred2, dfObs2 = basins.loadSeq(outLst[1], siteNo)
# b = dfPred2['00060']
# obs = dfObs1['00060']


a=errMatLst[0][:,0,1]