def getObs(outName, testset, wqData=None): master = loadMaster(outName) varTup = (master['varX'], master['varXC'], master['varY'], master['varYC']) if wqData is None: wqData = waterQuality.DataModelWQ(master['dataName']) dataTup = wqData.extractData(varTup=varTup, subset=testset) yT, ycT = dataTup[2:] return yT, ycT
def modelLinear(outName, testset, trainset=None, wqData=None): master = loadMaster(outName) dataName = master['dataName'] if wqData is None: wqData = waterQuality.DataModelWQ(dataName) if trainset is None: trainset = master['trainName'] infoTrain = wqData.info.iloc[wqData.subset[trainset]].reset_index() infoTest = wqData.info.iloc[wqData.subset[testset]].reset_index() # linear reg data statTup = loadStat(outName) varTup = (master['varX'], master['varXC'], master['varY'], master['varYC']) dataTup1 = wqData.transIn(subset=trainset, varTup=varTup, statTup=statTup) dataTup2 = wqData.transIn(subset=testset, varTup=varTup, statTup=statTup) dataTup1 = trainTS.dealNaN(dataTup1, master['optNaN']) dataTup2 = trainTS.dealNaN(dataTup2, master['optNaN']) varYC = varTup[3] statYC = statTup[3] x1 = dataTup1[0][-1, :, :] yc1 = dataTup1[3] x2 = dataTup2[0][-1, :, :] # point test l2 - linear nc = len(varYC) matP1 = np.full([len(infoTrain), nc], np.nan) matP2 = np.full([len(infoTest), nc], np.nan) siteNoLst = infoTest['siteNo'].unique().tolist() for siteNo in siteNoLst: ind1 = infoTrain[infoTrain['siteNo'] == siteNo].index ind2 = infoTest[infoTest['siteNo'] == siteNo].index xT1 = x1[ind1, :] ycT1 = yc1[ind1, :] for ic in range(nc): [xx, yy], iv = utils.rmNan([xT1, ycT1[:, ic]]) if len(iv) > 0: modelYC = LinearRegression().fit(xx, yy) matP1[ind1, ic] = modelYC.predict(xT1) if len(ind2) > 0: xT2 = x2[ind2, :] matP1[ind2, ic] = modelYC.predict(xT2) matO1 = wqData.transOut(matP1, statYC, varYC) matO2 = wqData.transOut(matP2, statYC, varYC) return matO1, matO2
def trainModelTS(outName): outFolder = nameFolder(outName) dictP = loadMaster(outName) # load data rmFlag = dictP['rmFlag'] if 'rmFlag' in dictP else False wqData = waterQuality.DataModelWQ(dictP['dataName'], rmFlag) varTup = (dictP['varX'], dictP['varXC'], dictP['varY'], dictP['varYC']) dataTup, statTup = wqData.transIn(subset=dictP['trainName'], varTup=varTup) dataTup = trainTS.dealNaN(dataTup, dictP['optNaN']) wrapStat(outName, statTup) # train model [nx, nxc, ny, nyc, nt, ns] = trainTS.getSize(dataTup) # define loss if dictP['crit'] == 'RmseLoss': lossFun = crit.RmseLoss() elif dictP['crit'] == 'RmseLoss2D': lossFun = crit.RmseLoss2D() elif dictP['crit'] == 'SigmaLoss': lossFun = crit.SigmaLoss() ny = ny * 2 nyc = nyc * 2 else: raise RuntimeError('loss function not specified') # define model if dictP['modelName'] == 'CudnnLSTM': model = rnn.CudnnLstmModel(nx=nx + nxc, ny=ny + nyc, hiddenSize=dictP['hiddenSize']) elif dictP['modelName'] == 'LstmModel': model = rnn.LstmModel(nx=nx + nxc, ny=ny + nyc, hiddenSize=dictP['hiddenSize']) elif dictP['modelName'] == 'AgeLSTM': model = rnn.AgeLSTM2(nx=nx + nxc, ny=ny, nyc=nyc, rho=365, nh=dictP['hiddenSize']) else: raise RuntimeError('Model not specified') if torch.cuda.is_available(): lossFun = lossFun.cuda() model = model.cuda() if dictP['optim'] == 'AdaDelta': optim = torch.optim.Adadelta(model.parameters()) else: raise RuntimeError('optimizor function not specified') lossLst = list() nEp = dictP['nEpoch'] sEp = dictP['saveEpoch'] logFile = os.path.join(outFolder, 'log') if os.path.exists(logFile): os.remove(logFile) for k in range(0, nEp, sEp): model, optim, lossEp = trainTS.trainModel(dataTup, model, lossFun, optim, batchSize=dictP['batchSize'], nEp=sEp, cEp=k, logFile=logFile) # save model saveModel(outName, k + sEp, model, optim=optim) lossLst = lossLst + lossEp lossFile = os.path.join(outFolder, 'loss.csv') pd.DataFrame(lossLst).to_csv(lossFile, index=False, header=False)
from hydroDL import kPath, utils from hydroDL.app import waterQuality from hydroDL.master import basins from hydroDL.data import usgs, gageII, gridMET, ntn, transform from hydroDL.master import slurm from hydroDL.post import axplot, figplot import numpy as np import matplotlib.pyplot as plt from hydroDL.model import rnn, crit, trainTS import torch dataName = 'ssW' wqData = waterQuality.DataModelWQ(dataName) code = '00945' label = 'plain' trainSet = '{}-Y1'.format(code) testSet = '{}-Y2'.format(code) outName = '{}-{}-{}-{}'.format(dataName, code, label, trainSet) outFolder = basins.nameFolder(outName) dictP = basins.loadMaster(outName) # load data rmFlag = dictP['rmFlag'] if 'rmFlag' in dictP else False wqData = waterQuality.DataModelWQ(dictP['dataName'], rmFlag) varTup = (dictP['varX'], dictP['varXC'], dictP['varY'], dictP['varYC']) dataTup, statTup = wqData.transIn(subset=dictP['trainName'], varTup=varTup) dataTup = trainTS.dealNaN(dataTup, dictP['optNaN']) # wrapStat(outName, statTup) [nx, nxc, ny, nyc, nt, ns] = trainTS.getSize(dataTup)
import os import json import numpy as np import pandas as pd import matplotlib.pyplot as plt from hydroDL.model import rnn, crit, trainTS import time siteNo = '07060710' codeLst = ['00660', '00600'] # codeLst = ['00915', '00955'] nh = 256 batchSize = [365, 50] # if not waterQuality.exist(siteNo): # wqData = waterQuality.DataModelWQ.new(siteNo, [siteNo]) wqData = waterQuality.DataModelWQ(siteNo, rmFlag=False) varX = wqData.varF varXC = wqData.varG varY = [wqData.varQ[0]] varYC = codeLst varTup = (varX, varXC, varY, varYC) dataTup, statTup = wqData.transIn(varTup=varTup) dataTup = trainTS.dealNaN(dataTup, [1, 1, 0, 0]) sizeLst = trainTS.getSize(dataTup) [nx, nxc, ny, nyc, nt, ns] = sizeLst tabG = gageII.readData(varLst=varXC, siteNoLst=[siteNo]) tabG = gageII.updateCode(tabG) dfX = waterQuality.readSiteX(siteNo, varX, nFill=5) dfY = waterQuality.readSiteY(siteNo, varY) dfYC = waterQuality.readSiteY(siteNo, varYC)
from hydroDL import kPath from hydroDL.app import waterQuality from hydroDL.master import basins from hydroDL.data import usgs, gageII, gridMET, ntn import numpy as np import pandas as pd import json import os d1 = waterQuality.DataModelWQ('sbWT') d2 = waterQuality.DataModelWQ('sbWTQ') d1.q.shape d2.q.shape np.sum(d1.c - d2.c) len(d2.varC) d2.c.shape len(d2.varF) d2.f.shape np.nansum(d1.f[:, :, :-3] - d2.f)
from hydroDL.master import basins from hydroDL.app import waterQuality from hydroDL import kPath from hydroDL.model import trainTS from hydroDL.data import gageII, usgs from hydroDL.post import axplot, figplot import torch import os import json import numpy as np import matplotlib.pyplot as plt wqData = waterQuality.DataModelWQ('HBN') figFolder = os.path.join(kPath.dirWQ, 'HBN', 'years') # compare of opt1-4 yrLst = ['80s', '90s', '00s', '10s'] for yr in yrLst: outLst = ['HBN-{}-rm-opt1'.format(yr), 'HBN-{}-rm-opt2'.format(yr)] trainSet = '{}-rm'.format(yr) testSet = yr # outLst = ['HBN-opt1', 'HBN-opt2', # 'HBN-opt3', 'HBN-opt4'] # trainSet = 'first80' # testSet = 'last20' pLst1, pLst2, errMatLst1, errMatLst2 = [list() for x in range(4)] for outName in outLst: p1, o1 = basins.testModel(outName, trainSet, wqData=wqData) p2, o2 = basins.testModel(outName, testSet, wqData=wqData) errMat1 = wqData.errBySite(p1, subset=trainSet)
from hydroDL import kPath from hydroDL.model import trainTS from hydroDL.data import gageII, usgs from hydroDL.post import axplot, figplot import torch import os import json import numpy as np import pandas as pd import matplotlib.pyplot as plt # test outName = 'Silica64-00955-Y8090-opt1' wqData = waterQuality.DataModelWQ('Silica64') code = '00955' trainset = 'Y8090' testset = 'Y0010' master = basins.loadMaster(outName) # seq test siteNoLst = wqData.info['siteNo'].unique().tolist() basins.testModelSeq(outName, siteNoLst, wqData=wqData) siteNo = siteNoLst[0] dfPred, dfObs = basins.loadSeq(outName, siteNo) fig, axes = plt.subplots(2, 1) axes[0].plot(np.log(dfPred['00060']), dfPred['00955'], '*') axes[1].plot(np.log(dfObs['00060']), dfObs['00955'], '*')
def testModelSeq(outName, siteNoLst, wqData=None, ep=None, returnOut=False, retest=False, sd=np.datetime64('1979-01-01'), ed=np.datetime64('2019-12-31')): # run sequence test for all sites, default to be from first date to last date if type(siteNoLst) is not list: siteNoLst = [siteNoLst] master = loadMaster(outName) if master['crit'] == 'SigmaLoss': doSigma = True else: doSigma = False if ep is None: ep = master['nEpoch'] outDir = nameFolder(outName) sdS = pd.to_datetime(sd).strftime('%Y%m%d') edS = pd.to_datetime(ed).strftime('%Y%m%d') saveDir = os.path.join(outDir, 'seq-{}-{}-ep{}'.format(sdS, edS, ep)) if not os.path.exists(saveDir): os.mkdir(saveDir) siteSaveLst = os.listdir(saveDir) if retest is True: sitePredLst = siteNoLst else: sitePredLst = [ siteNo for siteNo in siteNoLst if siteNo not in siteSaveLst ] if len(sitePredLst) != 0: if wqData is None: wqData = waterQuality.DataModelWQ(master['dataName']) (varX, varXC, varY, varYC) = (master['varX'], master['varXC'], master['varY'], master['varYC']) (statX, statXC, statY, statYC) = loadStat(outName) model = loadModel(outName, ep=ep) tabG = gageII.readData(varLst=varXC, siteNoLst=siteNoLst) tabG = gageII.updateCode(tabG) for siteNo in sitePredLst: if 'DRAIN_SQKM' in varXC: area = tabG[tabG.index == siteNo]['DRAIN_SQKM'].values[0] else: area = None # test model print('testing {} from {} to {}'.format(siteNo, sdS, edS)) freq = wqData.freq dfX = waterQuality.readSiteTS(siteNo, varX, freq=freq, area=area, sd=sd, ed=ed) # dfX = waterQuality.readSiteX( # siteNo, varX, sd=sd, ed=ed, area=area, nFill=5) xA = np.expand_dims(dfX.values, axis=1) xcA = np.expand_dims(tabG.loc[siteNo].values.astype(np.float), axis=0) mtdX = waterQuality.extractVarMtd(varX) x = transform.transInAll(xA, mtdX, statLst=statX) mtdXC = waterQuality.extractVarMtd(varXC) xc = transform.transInAll(xcA, mtdXC, statLst=statXC) [x, xc] = trainTS.dealNaN([x, xc], master['optNaN'][:2]) yOut = trainTS.testModel(model, x, xc) # transfer out nt = len(dfX) ny = len(varY) if varY is not None else 0 nyc = len(varYC) if varYC is not None else 0 if doSigma: yP = np.full([nt, ny + nyc], np.nan) sP = np.full([nt, ny + nyc], np.nan) yP[:, :ny] = wqData.transOut(yOut[:, 0, :ny * 2:2], statY, varY) yP[:, ny:] = wqData.transOut(yOut[:, 0, ny * 2::2], statYC, varYC) sP[:, :ny] = wqData.transOut( np.sqrt(np.exp(yOut[:, 0, 1:ny * 2:2])), statY, varY) sP[:, ny:] = wqData.transOut( np.sqrt(np.exp(yOut[:, 0, ny * 2 + 1::2])), statYC, varYC) else: yP = np.full([nt, ny + nyc], np.nan) yP[:, :ny] = wqData.transOut(yOut[:, 0, :ny], statY, varY) yP[:, ny:] = wqData.transOut(yOut[:, 0, ny:], statYC, varYC) # save output t = dfX.index.values.astype('datetime64[D]') colY = [] if varY is None else varY colYC = [] if varYC is None else varYC dfOut = pd.DataFrame(data=yP, columns=[colY + colYC], index=t) dfOut.index.name = 'date' dfOut = dfOut.reset_index() dfOut.to_csv(os.path.join(saveDir, siteNo), index=False) if doSigma: dfOutS = pd.DataFrame(data=sP, columns=[colY + colYC], index=t) dfOutS.index.name = 'date' dfOutS = dfOut.reset_index() dfOutS.to_csv(os.path.join(saveDir, siteNo + '_sigma'), index=False) # load all csv if returnOut: dictOut = dict() for siteNo in siteNoLst: # print('loading {} from {} to {}'.format(siteNo, sdS, edS)) dfOut = pd.read_csv(os.path.join(saveDir, siteNo)) dictOut[siteNo] = dfOut if doSigma: dfOut = pd.read_csv(os.path.join(saveDir, siteNo + '_sigma')) dictOut[siteNo + '_sigma'] = dfOut return dictOut
from hydroDL.master import basins from hydroDL.app import waterQuality from hydroDL.master import slurm from hydroDL.data import gageII, usgs, gridMET # wqData = waterQuality.DataModelWQ('basinAll') # ind1 = wqData.indByRatio(0.8) # ind2 = wqData.indByRatio(0.8, first=False) # wqData.saveSubset(['first80', 'last20'], [ind1, ind2]) # devide to 8090 and 0010 wqData = waterQuality.DataModelWQ('basinAll') indYr1 = waterQuality.indYr(wqData.info, yrLst=[1979, 2000])[0] wqData.saveSubset('Y8090', indYr1) indYr2 = waterQuality.indYr(wqData.info, yrLst=[2000, 2020])[0] wqData.saveSubset('Y0010', indYr2) caseLst = list() subsetLst = ['Y8090', 'Y0010'] for subset in subsetLst: saveName = 'basinAll-{}-opt1'.format(subset) caseName = basins.wrapMaster(dataName='basinAll', trainName=subset, saveEpoch=50, batchSize=[None, 2000], outName=saveName) caseLst.append(caseName) cmdP = 'python /home/users/kuaifang/GitHUB/geolearn/app/waterQual/model/cmdTrain.py -M {}' for caseName in caseLst: slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=48, nM=64)
import os import json import numpy as np import pandas as pd import matplotlib.pyplot as plt from hydroDL.model import rnn, crit, trainTS import time siteNo = '401733105392404' codeLst = ['00915', '00940', '00955'] # codeLst = ['00915', '00955'] nh = 256 batchSize = [365, 50] if not waterQuality.exist(siteNo): wqData = waterQuality.DataModelWQ.new(siteNo, [siteNo]) wqData = waterQuality.DataModelWQ(siteNo) varX = wqData.varF varXC = wqData.varG varY = [wqData.varQ[0]] varYC = codeLst varTup = (varX, varXC, varY, varYC) dataTup, statTup = wqData.transIn(varTup=varTup) dataTup = trainTS.dealNaN(dataTup, [1, 1, 0, 0]) sizeLst = trainTS.getSize(dataTup) [nx, nxc, ny, nyc, nt, ns] = sizeLst tabG = gageII.readData(varLst=varXC, siteNoLst=[siteNo]) tabG = gageII.updateCode(tabG) dfX = waterQuality.readSiteX(siteNo, varX, nFill=5) dfY = waterQuality.readSiteY(siteNo, varY) dfYC = waterQuality.readSiteY(siteNo, varYC)
varT = ['sinT', 'cosT'] varF = gridMET.varLst varX = varQ + varF + varP varXC = gageII.varLst varY = varQ varYC = usgs.newC saveName = 'test' trainSet = 'comb-A10' outName = basins.wrapMaster(dataName=dataName, trainName=trainSet, batchSize=[None, 500], outName=saveName, varX=varX, varY=varY, varYC=varYC, crit='SigmaLoss', nEpoch=10, saveEpoch=10) wqData = waterQuality.DataModelWQ('test') basins.trainModelTS(outName) importlib.reload(basins) yp, sp, ycp, scp = basins.testModel(outName, trainSet, wqData=wqData, ep=10, reTest=True)
from hydroDL.app import waterQuality from hydroDL.data import gageII from hydroDL.master import basins import pandas as pd import numpy as np import os import time # all gages fileSiteNo = os.path.join(kPath.dirData, 'USGS', 'inventory', 'siteNoLst-1979') siteNoLstAll = pd.read_csv(fileSiteNo, header=None, dtype=str)[0].tolist() dfHBN = pd.read_csv(os.path.join(kPath.dirData, 'USGS', 'inventory', 'HBN.csv'), dtype={ 'siteNo': str}).set_index('siteNo') siteNoHBN = [siteNo for siteNo in dfHBN.index.tolist() if siteNo in siteNoLstAll] # wrap up data caseName = 'HBN-30d' if waterQuality.exist(caseName): wqData = waterQuality.DataModelWQ(caseName) else: wqData = waterQuality.DataModelWQ.new(caseName, siteNoHBN, rho=30) ind1 = wqData.indByRatio(0.8) ind2 = wqData.indByRatio(0.2, first=False) wqData.saveSubset(['first80', 'last20'], [ind1, ind2]) basins.trainModelTS('HBN', 'first80', batchSize=[None, 500], saveName='HBN_opt1', optQ=1) a, b = [200, None]
import importlib from hydroDL.master import basins from hydroDL.app import waterQuality from hydroDL import kPath from hydroDL.model import trainTS from hydroDL.data import gageII, usgs from hydroDL.post import axplot, figplot import torch import os import json import numpy as np import matplotlib.pyplot as plt wqData = waterQuality.DataModelWQ('loadRef', rmFlag=True) wqData.c = wqData.c / wqData.q[-1, :, 0:1] outName = 'loadRef-Yodd' trainSet = 'Yodd' testSet = 'Yeven' # outLst = ['basinRef-Yodd-opt1', 'basinRef-Yodd-opt2'] # trainSet = 'Yodd' # testSet = 'Yeven' master = basins.loadMaster(outName) yP1, ycP1 = basins.testModel(outName, trainSet, wqData=wqData) yP2, ycP2 = basins.testModel(outName, testSet, wqData=wqData) ycP1 = ycP1 / yP1[-1, :, :] ycP2 = ycP2 / yP2[-1, :, :] errMatC1 = wqData.errBySiteC(ycP1,
from hydroDL.data import gageII, usgs, transform from hydroDL.post import axplot, figplot import torch import os import json import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression # input outName = 'Silica64-Y8090-opt1' testset = 'Y0010' wqData = waterQuality.DataModelWQ('Silica64') master = basins.loadMaster(outName) dataName = master['dataName'] if wqData is None: wqData = waterQuality.DataModelWQ(dataName) trainset = master['trainName'] infoTrain = wqData.info.iloc[wqData.subset[trainset]].reset_index() infoTest = wqData.info.iloc[wqData.subset[testset]].reset_index() # linear reg data statTup = basins.loadStat(outName) varTup = (master['varX'], master['varXC'], master['varY'], master['varYC']) dataTup1 = wqData.transIn(subset=trainset, varTup=varTup, statTup=statTup) dataTup2 = wqData.transIn(subset=testset, varTup=varTup, statTup=statTup) dataTup1 = trainTS.dealNaN(dataTup1, master['optNaN'])
def testModel(outName, testset, wqData=None, ep=None, reTest=False): # load master master = loadMaster(outName) if master['crit'] == 'SigmaLoss': doSigma = True else: doSigma = False if ep is None: ep = master['nEpoch'] outFolder = nameFolder(outName) testFileName = 'testP-{}-Ep{}.npz'.format(testset, ep) testFile = os.path.join(outFolder, testFileName) if os.path.exists(testFile) and reTest is False: print('load saved test result') npz = np.load(testFile, allow_pickle=True) yP = npz['yP'] ycP = npz['ycP'] if doSigma: sP = npz['sP'] scP = npz['scP'] else: statTup = loadStat(outName) model = loadModel(outName, ep=ep) # load test data if wqData is None: wqData = waterQuality.DataModelWQ(master['dataName']) varTup = (master['varX'], master['varXC'], master['varY'], master['varYC']) testDataLst = wqData.transIn(subset=testset, statTup=statTup, varTup=varTup) sizeLst = trainTS.getSize(testDataLst) if master['optNaN'] == [2, 2, 0, 0]: master['optNaN'] = [0, 0, 0, 0] testDataLst = trainTS.dealNaN(testDataLst, master['optNaN']) x = testDataLst[0] xc = testDataLst[1] ny = sizeLst[2] if not doSigma: # test model - point by point yOut, ycOut = trainTS.testModel(model, x, xc, ny) yP = wqData.transOut(yOut, statTup[2], master['varY']) ycP = wqData.transOut(ycOut, statTup[3], master['varYC']) np.savez(testFile, yP=yP, ycP=ycP) else: print('sigma model') ny = ny * 2 yOut, ycOut = trainTS.testModel(model, x, xc, ny) yP = wqData.transOut(yOut[:, :, ::2], statTup[2], master['varY']) sP = wqData.transOut(np.sqrt(np.exp(yOut[:, :, 1::2])), statTup[2], master['varY']) ycP = wqData.transOut(ycOut[:, ::2], statTup[3], master['varYC']) scP = wqData.transOut(np.sqrt(np.exp(ycOut[:, 1::2])), statTup[3], master['varYC']) np.savez(testFile, yP=yP, ycP=ycP, sP=sP, scP=scP) if doSigma: return yP, ycP, sP, scP else: return yP, ycP
from hydroDL.master import basins from hydroDL.app import waterQuality from hydroDL import kPath from hydroDL.model import trainTS from hydroDL.data import gageII, usgs from hydroDL.post import axplot, figplot import torch import os import json import pandas as pd import numpy as np import matplotlib.pyplot as plt import time wqData = waterQuality.DataModelWQ('sbWT') siteNoLst = wqData.info['siteNo'].unique().tolist() # trainSetLst = ['Y1', 'Y2'] trainSet = 'Y1' dfCorrLst = [ pd.DataFrame(index=siteNoLst, columns=usgs.varC) for x in range(2) ] dfRmseLst = [ pd.DataFrame(index=siteNoLst, columns=usgs.varC) for x in range(2) ] t0 = time.time() for kk, siteNo in enumerate(siteNoLst): print('{}/{} {:.2f}'.format(kk, len(siteNoLst), time.time() - t0)) outFolder = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS-F')
from hydroDL.post import axplot, figplot from sklearn.linear_model import LinearRegression from hydroDL.data import usgs, gageII, gridMET, ntn, transform import torch import os import json import numpy as np import pandas as pd import time import matplotlib.pyplot as plt startDate = pd.datetime(1979, 1, 1) endDate = pd.datetime(2020, 1, 1) sn = 1 wqData = waterQuality.DataModelWQ('nbW') siteNoLst = wqData.siteNoLst t0 = time.time() addF = False # for addF in [True, False]: if addF is True: saveFolder = os.path.join( kPath.dirWQ, 'modelStat', 'WRTDS-F', 'B16') else: saveFolder = os.path.join( kPath.dirWQ, 'modelStat', 'WRTDS', 'B16') if not os.path.exists(saveFolder): os.mkdir(saveFolder) for kk, siteNo in enumerate(siteNoLst): print('{}/{} {:.2f}'.format( kk, len(siteNoLst), time.time()-t0))
from hydroDL.master import basins from hydroDL.app import waterQuality from hydroDL import kPath, utils from hydroDL.model import trainTS from hydroDL.data import gageII, usgs from hydroDL.post import axplot, figplot import torch import os import json import pandas as pd import numpy as np import matplotlib.pyplot as plt import time wqData = waterQuality.DataModelWQ('rbWN5') siteNoLst = wqData.siteNoLst trainSet = 'B10N5' testSet = 'A10N5' df = pd.DataFrame(index=siteNoLst, columns=usgs.newC) df.index.name = 'siteNo' dirWRTDS = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS-W', 'B10') dirOut = os.path.join(dirWRTDS, 'output') dirPar = os.path.join(dirWRTDS, 'params') dfCorr1 = df.copy() dfCorr2 = df.copy() dfRmse1 = df.copy() dfRmse2 = df.copy()
from hydroDL.master import basins from hydroDL.app import waterQuality from hydroDL import kPath, utils from hydroDL.model import trainTS from hydroDL.data import gageII, usgs from hydroDL.post import axplot, figplot import torch import os import json import numpy as np import matplotlib.pyplot as plt dataName = 'chloride' wqData = waterQuality.DataModelWQ(dataName) outLst = ['chloride-Yodd-ntn', 'chloride-Yodd'] # outLst = ['sulfateNE-Yodd-ntn-silica', 'sulfateNE-Yodd-silica'] trainSet = 'Yodd' testSet = 'Yeven' # outLst = ['sulfateNE-Yeven-ntn', 'sulfateNE-Yeven'] # trainSet = 'Yeven' # testSet = 'Yodd' errMatLst1, errMatLst2, ypLst1, ypLst2 = [list() for x in range(4)] for outName in outLst: master = basins.loadMaster(outName) yP1, ycP1 = basins.testModel(outName, trainSet, wqData=wqData, ep=100, reTest=True) yP2, ycP2 = basins.testModel(outName,
from hydroDL.master import basins from hydroDL.app import waterQuality from hydroDL import kPath from hydroDL.model import trainTS from hydroDL.data import gageII, usgs from hydroDL.post import axplot, figplot import torch import os import json import numpy as np import matplotlib.pyplot as plt wqData = waterQuality.DataModelWQ('basinRef') outLst = ['basinRef-first50-opt1', 'basinRef-first50-opt2'] trainSet = 'first50' testSet = 'last50' pLst1, pLst2, errMatLst1, errMatLst2 = [list() for x in range(4)] for outName in outLst: master = basins.loadMaster(outName) yP1, ycP1 = basins.testModel(outName, trainSet, wqData=wqData) yP2, ycP2 = basins.testModel(outName, testSet, wqData=wqData) errMatC1 = wqData.errBySiteC(ycP1, subset=trainSet, varC=master['varYC']) errMatC2 = wqData.errBySiteC(ycP2, subset=testSet, varC=master['varYC']) pLst1.append(ycP1) pLst2.append(ycP2) errMatLst1.append(errMatC1) errMatLst2.append(errMatC2)
import importlib from hydroDL.master import basins from hydroDL.app import waterQuality from hydroDL import kPath from hydroDL.model import trainTS from hydroDL.data import gageII, usgs from hydroDL.post import axplot, figplot import torch import os import json import pandas as pd import numpy as np import matplotlib.pyplot as plt wqData = waterQuality.DataModelWQ('basinRef', rmFlag=True) outName = 'basinRef-Yeven-opt2' trainSet = 'Yeven' testSet = 'Yodd' siteNoLst = wqData.info['siteNo'].unique().tolist() master = basins.loadMaster(outName) ep = 300 yP1, ycP1 = basins.testModel(outName, trainSet, wqData=wqData, ep=ep) yP2, ycP2 = basins.testModel(outName, testSet, wqData=wqData, ep=ep) errMatC1 = wqData.errBySiteC(ycP1, varC=master['varYC'], subset=trainSet, rmExt=True) errMatC2 = wqData.errBySiteC(ycP2,
import numpy as np import pandas as pd import matplotlib.pyplot as plt import time doLst = list() # doLst.append('data') # doLst.append('subset') # doLst.append('train') if 'data' in doLst: # only look at 5 site with most 00955 obs # ['11264500', '07083000', '01466500', '04063700', '10343500'] dataName = 'HBN' codeLst = ['00618', '00955'] wqData = waterQuality.DataModelWQ(dataName) icLst = [wqData.varC.index(code) for code in codeLst] indAll = np.where(~np.isnan(wqData.c[:, icLst]).all(axis=1))[0] siteNoHBN = wqData.info['siteNo'].unique() info = wqData.info.iloc[indAll] tabCount = info.groupby('siteNo').count() siteNoLst = tabCount.nlargest(5, 'date').index.tolist() wqData = waterQuality.DataModelWQ.new('HBN5', siteNoLst) if 'subset' in doLst: wqData = waterQuality.DataModelWQ('HBN5') codeLst = ['00618', '00955'] icLst = [wqData.varC.index(code) for code in codeLst] indAll = np.where(~np.isnan(wqData.c[:, icLst]).all(axis=1))[0] indAny = np.where(~np.isnan(wqData.c[:, icLst]).any(axis=1))[0] wqData.saveSubset('-'.join(sorted(codeLst) + ['all']), indAll)