def funcPoint(iP, axP): siteNo = siteNoLst[iP] dfPred, dfObs = basins.loadSeq(outName, siteNo) t = dfPred['date'].values.astype(np.datetime64) tBar = np.datetime64('2000-01-01') # linear model ind1 = infoTrain[infoTrain['siteNo'] == siteNo].index [x1, y1, yc1], _ = utils.rmNan([xL1[ind1, :], yL1[ind1, :], ycL1[ind1, :]]) modelY = LinearRegression().fit(x1, y1) modelYC = LinearRegression().fit(x1, yc1) sd = np.datetime64('1979-01-01') ed = np.datetime64('2020-01-01') dfX = waterQuality.readSiteX(siteNo, sd, ed, varX) x2 = transform.transInAll(dfX.values, mtdX, statLst=statX) y2 = modelY.predict(x2) yc2 = modelYC.predict(x2) yp = wqData.transOut(y2, statY, varY) ycp = wqData.transOut(yc2, statYC, varYC) code = codeLst[0] axplot.plotTS(axP[0], t, [dfPred['00060'], yp, dfObs['00060']], tBar=tBar, legLst=['lstm', 'lr', 'obs'], styLst='---', cLst='bgr') axplot.plotTS(axP[1], t, [dfPred[code], ycp, dfObs[code]], tBar=tBar, legLst=['lstm', 'lr', 'obs'], styLst='--*', cLst='bgr')
def funcPoint(iP, axP): siteNo = siteNoLst[iP] dfY = waterQuality.readSiteY(siteNo, ['00955']) dfY = dfY.dropna() dfX = waterQuality.readSiteX(siteNo, varX) t = dfY.index y = dfY['00955'].values corrMat = np.zeros([nt, nx]) for k in range(nt): x = dfX.loc[t.values - np.timedelta64(k, 'D')].values ind = np.where(~np.isnan(x))[0] for i in range(nx): corrMat[k, i] = np.corrcoef(x[ind, i], y[ind])[0, 1] axP[0].plot(dfX['00060'], '-b', label='streamflow') axP[1].plot(dfY, '-*r', label='silica') axP[2].plot(np.arange(nt), corrMat[:, 1].T, '-*') axP[2].set_ylabel('correlation') axP[2].set_xlabel('lag day')
import torch import time import numpy as np import pandas as pd import matplotlib.pyplot as plt import torch.nn as nn from hydroDL.model import rnn, crit import os siteNo = '01434025' # siteNo = '01364959' codeLst = ['00915', '00940', '00955'] varX = gridMET.varLst varY = ['00060'] dfX = waterQuality.readSiteX(siteNo, varX) dfY = waterQuality.readSiteY(siteNo, varY) mtdX = waterQuality.extractVarMtd(varX) normX, statX = transform.transInAll(dfX.values, mtdX) dfXN = pd.DataFrame(data=normX, index=dfX.index, columns=dfX.columns) mtdY = waterQuality.extractVarMtd(varY) normY, statY = transform.transInAll(dfY.values, mtdY) dfYN = pd.DataFrame(data=normY, index=dfY.index, columns=dfY.columns) matX1 = dfXN[dfXN.index < np.datetime64('2000-01-01')].values matY1 = dfYN[dfYN.index < np.datetime64('2000-01-01')].values matX2 = dfXN[dfXN.index >= np.datetime64('2000-01-01')].values matY2 = dfYN[dfYN.index >= np.datetime64('2000-01-01')].values matX = dfXN.values matY = dfYN.values
# if not waterQuality.exist(siteNo): # wqData = waterQuality.DataModelWQ.new(siteNo, [siteNo]) wqData = waterQuality.DataModelWQ(siteNo, rmFlag=False) varX = wqData.varF varXC = wqData.varG varY = [wqData.varQ[0]] varYC = codeLst varTup = (varX, varXC, varY, varYC) dataTup, statTup = wqData.transIn(varTup=varTup) dataTup = trainTS.dealNaN(dataTup, [1, 1, 0, 0]) sizeLst = trainTS.getSize(dataTup) [nx, nxc, ny, nyc, nt, ns] = sizeLst tabG = gageII.readData(varLst=varXC, siteNoLst=[siteNo]) tabG = gageII.updateCode(tabG) dfX = waterQuality.readSiteX(siteNo, varX, nFill=5) dfY = waterQuality.readSiteY(siteNo, varY) dfYC = waterQuality.readSiteY(siteNo, varYC) importlib.reload(rnn) model = rnn.AgeLSTM(nx=nx + nxc, ny=ny, nyc=nyc, nh=nh) optim = torch.optim.Adadelta(model.parameters()) lossFun = crit.RmseMix() if torch.cuda.is_available(): lossFun = lossFun.cuda() model = model.cuda() # train model.train() model.zero_grad() for k in range(500):
def loadSeq(siteNo, varY, model, optX='F', optT='Y8090', order=(5, 0, 5)): if model == 'ARMA': dirAR = os.path.join(kPath.dirWQ, 'modelStat', 'ARMA') strOrder = '-'.join([str(k) for k in order]) saveFolderName = '{}-{}-{}-{}'.format(optX, optT, varY, strOrder) saveFolder = os.path.join(dirAR, saveFolderName) elif model == 'LR': dirLR = os.path.join(kPath.dirWQ, 'modelStat', 'LR') saveFolderName = '{}-{}-{}'.format(optX, optT, varY) saveFolder = os.path.join(dirLR, saveFolderName) else: raise Exception('model {} invalid!'.format(model)) predFile = os.path.join(saveFolder, siteNo) if not os.path.exists(saveFolder): os.mkdir(saveFolder) if os.path.exists(predFile): dfP = pd.read_csv(predFile, index_col=None) dfP = utils.time.datePdf(dfP) else: if optX == 'F': varX = gridMET.varLst elif optX == 'QF': varX = ['00060'] + gridMET.varLst else: raise Exception('optX {} invalid!'.format(optX)) dfX = waterQuality.readSiteX(siteNo, varX) dfY = waterQuality.readSiteY(siteNo, [varY]) # normalize mtdX = waterQuality.extractVarMtd(varX) normX, statX = transform.transInAll(dfX.values, mtdX) dfXN = pd.DataFrame(data=normX, index=dfX.index, columns=dfX.columns) mtdY = waterQuality.extractVarMtd([varY]) normY, statY = transform.transInAll(dfY.values, mtdY) dfYN = pd.DataFrame(data=normY, index=dfY.index, columns=dfY.columns) if optT == 'Y8090': dfXT = dfXN[dfXN.index < np.datetime64('2000-01-01')] dfYT = dfYN[dfYN.index < np.datetime64('2000-01-01')] elif optT == 'Y0010': dfXT = dfXN[dfXN.index >= np.datetime64('2000-01-01')] dfYT = dfYN[dfYN.index >= np.datetime64('2000-01-01')] else: raise Exception('optT {} invalid!'.format(optT)) # train and test if model == 'ARMA': dfPN, resT = trainARMA(dfXT, dfYT, dfXN, dfYN, order) if model == 'LR': dfPN = trainLR(dfXT, dfYT, dfXN, dfYN) yP = transform.transOut(dfPN.values, mtdY[0], statY[0]) dfP = pd.DataFrame(data=yP, index=dfYN.index, columns=dfYN.columns) # save result, model, stat dfP.reset_index().to_csv(predFile, index=False) statFile = os.path.join(saveFolder, siteNo + '_stat.json') with open(statFile, 'w') as fp: json.dump(dict(statX=statX, statY=statY), fp, indent=4) # save model # if model == 'ARMA': # modelFile = os.path.join(saveFolder, siteNo+'_model.p') # resT.save(modelFile) return dfP
(varX, varXC, varY, varYC) = ( master['varX'], master['varXC'], master['varY'], master['varYC']) (statX, statXC, statY, statYC) = basins.loadStat(outName) model = basins.loadModel(outName, ep=ep) tabG = gageII.readData(varLst=varXC, siteNoLst=siteNoLst) tabG = gageII.updateCode(tabG) # for siteNo in sitePredLst: siteNo = sitePredLst[0] if 'DRAIN_SQKM' in varXC: area = tabG[tabG.index == siteNo]['DRAIN_SQKM'].values[0] else: area = None # test model print('testing {} from {} to {}'.format(siteNo, sdS, edS)) dfX = waterQuality.readSiteX( siteNo, varX, sd=sd, ed=ed, area=area, nFill=5) xA = np.expand_dims(dfX.values, axis=1) xcA = np.expand_dims( tabG.loc[siteNo].values.astype(np.float), axis=0) mtdX = wqData.extractVarMtd(varX) x = transform.transInAll(xA, mtdX, statLst=statX) mtdXC = wqData.extractVarMtd(varXC) xc = transform.transInAll(xcA, mtdXC, statLst=statXC) [x, xc] = trainTS.dealNaN([x, xc], master['optNaN'][:2]) yOut = trainTS.testModel(model, x, xc) # transfer out nt = len(dfX) ny = len(varY) if varY is not None else 0 nyc = len(varYC) if varYC is not None else 0 yP = np.full([nt, ny+nyc], np.nan) yP[:, :ny] = wqData.transOut(yOut[:, 0, :ny], statY, varY)