def testModelSeq(outName, siteNoLst, wqData=None, ep=None, returnOut=False, retest=False, sd=np.datetime64('1979-01-01'), ed=np.datetime64('2019-12-31')): # run sequence test for all sites, default to be from first date to last date if type(siteNoLst) is not list: siteNoLst = [siteNoLst] master = loadMaster(outName) if master['crit'] == 'SigmaLoss': doSigma = True else: doSigma = False if ep is None: ep = master['nEpoch'] outDir = nameFolder(outName) sdS = pd.to_datetime(sd).strftime('%Y%m%d') edS = pd.to_datetime(ed).strftime('%Y%m%d') saveDir = os.path.join(outDir, 'seq-{}-{}-ep{}'.format(sdS, edS, ep)) if not os.path.exists(saveDir): os.mkdir(saveDir) siteSaveLst = os.listdir(saveDir) if retest is True: sitePredLst = siteNoLst else: sitePredLst = [ siteNo for siteNo in siteNoLst if siteNo not in siteSaveLst ] if len(sitePredLst) != 0: if wqData is None: wqData = waterQuality.DataModelWQ(master['dataName']) (varX, varXC, varY, varYC) = (master['varX'], master['varXC'], master['varY'], master['varYC']) (statX, statXC, statY, statYC) = loadStat(outName) model = loadModel(outName, ep=ep) tabG = gageII.readData(varLst=varXC, siteNoLst=siteNoLst) tabG = gageII.updateCode(tabG) for siteNo in sitePredLst: if 'DRAIN_SQKM' in varXC: area = tabG[tabG.index == siteNo]['DRAIN_SQKM'].values[0] else: area = None # test model print('testing {} from {} to {}'.format(siteNo, sdS, edS)) freq = wqData.freq dfX = waterQuality.readSiteTS(siteNo, varX, freq=freq, area=area, sd=sd, ed=ed) # dfX = waterQuality.readSiteX( # siteNo, varX, sd=sd, ed=ed, area=area, nFill=5) xA = np.expand_dims(dfX.values, axis=1) xcA = np.expand_dims(tabG.loc[siteNo].values.astype(np.float), axis=0) mtdX = waterQuality.extractVarMtd(varX) x = transform.transInAll(xA, mtdX, statLst=statX) mtdXC = waterQuality.extractVarMtd(varXC) xc = transform.transInAll(xcA, mtdXC, statLst=statXC) [x, xc] = trainTS.dealNaN([x, xc], master['optNaN'][:2]) yOut = trainTS.testModel(model, x, xc) # transfer out nt = len(dfX) ny = len(varY) if varY is not None else 0 nyc = len(varYC) if varYC is not None else 0 if doSigma: yP = np.full([nt, ny + nyc], np.nan) sP = np.full([nt, ny + nyc], np.nan) yP[:, :ny] = wqData.transOut(yOut[:, 0, :ny * 2:2], statY, varY) yP[:, ny:] = wqData.transOut(yOut[:, 0, ny * 2::2], statYC, varYC) sP[:, :ny] = wqData.transOut( np.sqrt(np.exp(yOut[:, 0, 1:ny * 2:2])), statY, varY) sP[:, ny:] = wqData.transOut( np.sqrt(np.exp(yOut[:, 0, ny * 2 + 1::2])), statYC, varYC) else: yP = np.full([nt, ny + nyc], np.nan) yP[:, :ny] = wqData.transOut(yOut[:, 0, :ny], statY, varY) yP[:, ny:] = wqData.transOut(yOut[:, 0, ny:], statYC, varYC) # save output t = dfX.index.values.astype('datetime64[D]') colY = [] if varY is None else varY colYC = [] if varYC is None else varYC dfOut = pd.DataFrame(data=yP, columns=[colY + colYC], index=t) dfOut.index.name = 'date' dfOut = dfOut.reset_index() dfOut.to_csv(os.path.join(saveDir, siteNo), index=False) if doSigma: dfOutS = pd.DataFrame(data=sP, columns=[colY + colYC], index=t) dfOutS.index.name = 'date' dfOutS = dfOut.reset_index() dfOutS.to_csv(os.path.join(saveDir, siteNo + '_sigma'), index=False) # load all csv if returnOut: dictOut = dict() for siteNo in siteNoLst: # print('loading {} from {} to {}'.format(siteNo, sdS, edS)) dfOut = pd.read_csv(os.path.join(saveDir, siteNo)) dictOut[siteNo] = dfOut if doSigma: dfOut = pd.read_csv(os.path.join(saveDir, siteNo + '_sigma')) dictOut[siteNo + '_sigma'] = dfOut return dictOut
import pandas as pd import matplotlib.pyplot as plt import torch.nn as nn from hydroDL.model import rnn, crit import os siteNo = '01434025' # siteNo = '01364959' codeLst = ['00915', '00940', '00955'] varX = gridMET.varLst varY = ['00060'] dfX = waterQuality.readSiteX(siteNo, varX) dfY = waterQuality.readSiteY(siteNo, varY) mtdX = waterQuality.extractVarMtd(varX) normX, statX = transform.transInAll(dfX.values, mtdX) dfXN = pd.DataFrame(data=normX, index=dfX.index, columns=dfX.columns) mtdY = waterQuality.extractVarMtd(varY) normY, statY = transform.transInAll(dfY.values, mtdY) dfYN = pd.DataFrame(data=normY, index=dfY.index, columns=dfY.columns) matX1 = dfXN[dfXN.index < np.datetime64('2000-01-01')].values matY1 = dfYN[dfYN.index < np.datetime64('2000-01-01')].values matX2 = dfXN[dfXN.index >= np.datetime64('2000-01-01')].values matY2 = dfYN[dfYN.index >= np.datetime64('2000-01-01')].values matX = dfXN.values matY = dfYN.values nx = len(varX) ny = len(varY)
def loadSeq(siteNo, varY, model, optX='F', optT='Y8090', order=(5, 0, 5)): if model == 'ARMA': dirAR = os.path.join(kPath.dirWQ, 'modelStat', 'ARMA') strOrder = '-'.join([str(k) for k in order]) saveFolderName = '{}-{}-{}-{}'.format(optX, optT, varY, strOrder) saveFolder = os.path.join(dirAR, saveFolderName) elif model == 'LR': dirLR = os.path.join(kPath.dirWQ, 'modelStat', 'LR') saveFolderName = '{}-{}-{}'.format(optX, optT, varY) saveFolder = os.path.join(dirLR, saveFolderName) else: raise Exception('model {} invalid!'.format(model)) predFile = os.path.join(saveFolder, siteNo) if not os.path.exists(saveFolder): os.mkdir(saveFolder) if os.path.exists(predFile): dfP = pd.read_csv(predFile, index_col=None) dfP = utils.time.datePdf(dfP) else: if optX == 'F': varX = gridMET.varLst elif optX == 'QF': varX = ['00060'] + gridMET.varLst else: raise Exception('optX {} invalid!'.format(optX)) dfX = waterQuality.readSiteX(siteNo, varX) dfY = waterQuality.readSiteY(siteNo, [varY]) # normalize mtdX = waterQuality.extractVarMtd(varX) normX, statX = transform.transInAll(dfX.values, mtdX) dfXN = pd.DataFrame(data=normX, index=dfX.index, columns=dfX.columns) mtdY = waterQuality.extractVarMtd([varY]) normY, statY = transform.transInAll(dfY.values, mtdY) dfYN = pd.DataFrame(data=normY, index=dfY.index, columns=dfY.columns) if optT == 'Y8090': dfXT = dfXN[dfXN.index < np.datetime64('2000-01-01')] dfYT = dfYN[dfYN.index < np.datetime64('2000-01-01')] elif optT == 'Y0010': dfXT = dfXN[dfXN.index >= np.datetime64('2000-01-01')] dfYT = dfYN[dfYN.index >= np.datetime64('2000-01-01')] else: raise Exception('optT {} invalid!'.format(optT)) # train and test if model == 'ARMA': dfPN, resT = trainARMA(dfXT, dfYT, dfXN, dfYN, order) if model == 'LR': dfPN = trainLR(dfXT, dfYT, dfXN, dfYN) yP = transform.transOut(dfPN.values, mtdY[0], statY[0]) dfP = pd.DataFrame(data=yP, index=dfYN.index, columns=dfYN.columns) # save result, model, stat dfP.reset_index().to_csv(predFile, index=False) statFile = os.path.join(saveFolder, siteNo + '_stat.json') with open(statFile, 'w') as fp: json.dump(dict(statX=statX, statY=statY), fp, indent=4) # save model # if model == 'ARMA': # modelFile = os.path.join(saveFolder, siteNo+'_model.p') # resT.save(modelFile) return dfP
from hydroDL.data import usgs, gageII, gridMET, ntn, transform from hydroDL.master import slurm from hydroDL.post import axplot, figplot import numpy as np import matplotlib.pyplot as plt codeLst = sorted(usgs.newC) # dataName = 'nbWT' dataName = 'nbW' wqData = waterQuality.DataModelWQ(dataName) siteNoLst = wqData.info.siteNo.unique() codeLst = usgs.newC icLst = [wqData.varC.index(code) for code in codeLst] data = wqData.c[:, np.array(icLst)] mtdLst = waterQuality.extractVarMtd(codeLst) dataNorm, stat = transform.transInAll(data, mtdLst) info = wqData.info code = '00660' ic = codeLst.index(code) fig, axes = plt.subplots(2, 1, figsize=(6, 8)) for siteNo in siteNoLst: indS = info[info['siteNo'] == siteNo].index.values yr = utils.sortData(data[indS, ic]) yn = utils.sortData(dataNorm[indS, ic]) x = np.arange(len(yr)) / len(yr) _ = axes[0].plot(x, yr, 'k-', alpha=0.2) _ = axes[1].plot(x, yn, 'k-', alpha=0.2) shortName = usgs.codePdf.loc[code]['shortName'] axes[1].set_ylim([-0.2, 1.2])
matR[kk, :, :] = dfP.values - dfC.values matC[kk, :, :] = dfC.values codeLst2 = [ '00095', '00400', '00405', '00600', '00605', '00618', '00660', '00665', '00681', '00915', '00925', '00930', '00935', '00940', '00945', '00950', '00955', '70303', '71846', '80154' ] # plot hist importlib.reload(axplot) importlib.reload(transform) importlib.reload(usgs) varRLst = [code + '-R' for code in usgs.newC] mtdLst = waterQuality.extractVarMtd(varRLst) matRN, stat = transform.transInAll(matR, mtdLst) matRN2 = transform.transOutAll(matRN, mtdLst, stat) fig, axes = plt.subplots(5, 4) ticks = [-0.5, 0, 0.5, 1] for k, code in enumerate(codeLst2): j, i = utils.index2d(k, 5, 4) ax = axes[j, i] siteNoCode = dictSite[code] indS = [siteNoLst.index(siteNo) for siteNo in siteNoCode] ic = usgs.newC.index(code) data = matRN2[indS, :, ic] x1 = utils.flatData(data) x2 = utils.rmExt(x1, p=5)