Example #1
0
def funcPoint(iP, axP):
    siteNo = siteNoLst[iP]
    dfPred, dfObs = basins.loadSeq(outName, siteNo)
    t = dfPred['date'].values.astype(np.datetime64)
    tBar = np.datetime64('2000-01-01')
    # linear model
    ind1 = infoTrain[infoTrain['siteNo'] == siteNo].index
    [x1, y1, yc1], _ = utils.rmNan([xL1[ind1, :], yL1[ind1, :], ycL1[ind1, :]])
    modelY = LinearRegression().fit(x1, y1)
    modelYC = LinearRegression().fit(x1, yc1)
    sd = np.datetime64('1979-01-01')
    ed = np.datetime64('2020-01-01')
    dfX = waterQuality.readSiteX(siteNo, sd, ed, varX)
    x2 = transform.transInAll(dfX.values, mtdX, statLst=statX)
    y2 = modelY.predict(x2)
    yc2 = modelYC.predict(x2)
    yp = wqData.transOut(y2, statY, varY)
    ycp = wqData.transOut(yc2, statYC, varYC)
    code = codeLst[0]
    axplot.plotTS(axP[0],
                  t, [dfPred['00060'], yp, dfObs['00060']],
                  tBar=tBar,
                  legLst=['lstm', 'lr', 'obs'],
                  styLst='---',
                  cLst='bgr')
    axplot.plotTS(axP[1],
                  t, [dfPred[code], ycp, dfObs[code]],
                  tBar=tBar,
                  legLst=['lstm', 'lr', 'obs'],
                  styLst='--*',
                  cLst='bgr')
Example #2
0
def funcPoint(iP, axP):
    siteNo = siteNoLst[iP]
    dfY = waterQuality.readSiteY(siteNo, ['00955'])
    dfY = dfY.dropna()
    dfX = waterQuality.readSiteX(siteNo, varX)
    t = dfY.index
    y = dfY['00955'].values
    corrMat = np.zeros([nt, nx])
    for k in range(nt):
        x = dfX.loc[t.values - np.timedelta64(k, 'D')].values
        ind = np.where(~np.isnan(x))[0]
        for i in range(nx):
            corrMat[k, i] = np.corrcoef(x[ind, i], y[ind])[0, 1]
    axP[0].plot(dfX['00060'], '-b', label='streamflow')
    axP[1].plot(dfY, '-*r', label='silica')
    axP[2].plot(np.arange(nt), corrMat[:, 1].T, '-*')
    axP[2].set_ylabel('correlation')
    axP[2].set_xlabel('lag day')
Example #3
0
import torch
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn as nn
from hydroDL.model import rnn, crit
import os

siteNo = '01434025'
# siteNo = '01364959'
codeLst = ['00915', '00940', '00955']

varX = gridMET.varLst
varY = ['00060']
dfX = waterQuality.readSiteX(siteNo, varX)
dfY = waterQuality.readSiteY(siteNo, varY)

mtdX = waterQuality.extractVarMtd(varX)
normX, statX = transform.transInAll(dfX.values, mtdX)
dfXN = pd.DataFrame(data=normX, index=dfX.index, columns=dfX.columns)
mtdY = waterQuality.extractVarMtd(varY)
normY, statY = transform.transInAll(dfY.values, mtdY)
dfYN = pd.DataFrame(data=normY, index=dfY.index, columns=dfY.columns)

matX1 = dfXN[dfXN.index < np.datetime64('2000-01-01')].values
matY1 = dfYN[dfYN.index < np.datetime64('2000-01-01')].values
matX2 = dfXN[dfXN.index >= np.datetime64('2000-01-01')].values
matY2 = dfYN[dfYN.index >= np.datetime64('2000-01-01')].values
matX = dfXN.values
matY = dfYN.values
Example #4
0
# if not waterQuality.exist(siteNo):
#     wqData = waterQuality.DataModelWQ.new(siteNo, [siteNo])
wqData = waterQuality.DataModelWQ(siteNo, rmFlag=False)
varX = wqData.varF
varXC = wqData.varG
varY = [wqData.varQ[0]]
varYC = codeLst
varTup = (varX, varXC, varY, varYC)
dataTup, statTup = wqData.transIn(varTup=varTup)
dataTup = trainTS.dealNaN(dataTup, [1, 1, 0, 0])
sizeLst = trainTS.getSize(dataTup)
[nx, nxc, ny, nyc, nt, ns] = sizeLst

tabG = gageII.readData(varLst=varXC, siteNoLst=[siteNo])
tabG = gageII.updateCode(tabG)
dfX = waterQuality.readSiteX(siteNo, varX, nFill=5)
dfY = waterQuality.readSiteY(siteNo, varY)
dfYC = waterQuality.readSiteY(siteNo, varYC)

importlib.reload(rnn)
model = rnn.AgeLSTM(nx=nx + nxc, ny=ny, nyc=nyc, nh=nh)
optim = torch.optim.Adadelta(model.parameters())
lossFun = crit.RmseMix()
if torch.cuda.is_available():
    lossFun = lossFun.cuda()
    model = model.cuda()

# train
model.train()
model.zero_grad()
for k in range(500):
Example #5
0
def loadSeq(siteNo, varY, model, optX='F', optT='Y8090', order=(5, 0, 5)):
    if model == 'ARMA':
        dirAR = os.path.join(kPath.dirWQ, 'modelStat', 'ARMA')
        strOrder = '-'.join([str(k) for k in order])
        saveFolderName = '{}-{}-{}-{}'.format(optX, optT, varY, strOrder)
        saveFolder = os.path.join(dirAR, saveFolderName)
    elif model == 'LR':
        dirLR = os.path.join(kPath.dirWQ, 'modelStat', 'LR')
        saveFolderName = '{}-{}-{}'.format(optX, optT, varY)
        saveFolder = os.path.join(dirLR, saveFolderName)
    else:
        raise Exception('model {} invalid!'.format(model))
    predFile = os.path.join(saveFolder, siteNo)
    if not os.path.exists(saveFolder):
        os.mkdir(saveFolder)

    if os.path.exists(predFile):
        dfP = pd.read_csv(predFile, index_col=None)
        dfP = utils.time.datePdf(dfP)
    else:
        if optX == 'F':
            varX = gridMET.varLst
        elif optX == 'QF':
            varX = ['00060'] + gridMET.varLst
        else:
            raise Exception('optX {} invalid!'.format(optX))
        dfX = waterQuality.readSiteX(siteNo, varX)
        dfY = waterQuality.readSiteY(siteNo, [varY])
        # normalize
        mtdX = waterQuality.extractVarMtd(varX)
        normX, statX = transform.transInAll(dfX.values, mtdX)
        dfXN = pd.DataFrame(data=normX, index=dfX.index, columns=dfX.columns)
        mtdY = waterQuality.extractVarMtd([varY])
        normY, statY = transform.transInAll(dfY.values, mtdY)
        dfYN = pd.DataFrame(data=normY, index=dfY.index, columns=dfY.columns)
        if optT == 'Y8090':
            dfXT = dfXN[dfXN.index < np.datetime64('2000-01-01')]
            dfYT = dfYN[dfYN.index < np.datetime64('2000-01-01')]
        elif optT == 'Y0010':
            dfXT = dfXN[dfXN.index >= np.datetime64('2000-01-01')]
            dfYT = dfYN[dfYN.index >= np.datetime64('2000-01-01')]
        else:
            raise Exception('optT {} invalid!'.format(optT))

        # train and test
        if model == 'ARMA':
            dfPN, resT = trainARMA(dfXT, dfYT, dfXN, dfYN, order)
        if model == 'LR':
            dfPN = trainLR(dfXT, dfYT, dfXN, dfYN)
        yP = transform.transOut(dfPN.values, mtdY[0], statY[0])
        dfP = pd.DataFrame(data=yP, index=dfYN.index, columns=dfYN.columns)

        # save result, model, stat
        dfP.reset_index().to_csv(predFile, index=False)
        statFile = os.path.join(saveFolder, siteNo + '_stat.json')
        with open(statFile, 'w') as fp:
            json.dump(dict(statX=statX, statY=statY), fp, indent=4)
        # save model
        # if model == 'ARMA':
        #     modelFile = os.path.join(saveFolder, siteNo+'_model.p')
        #     resT.save(modelFile)
    return dfP
Example #6
0
(varX, varXC, varY, varYC) = (
    master['varX'], master['varXC'], master['varY'], master['varYC'])
(statX, statXC, statY, statYC) = basins.loadStat(outName)
model = basins.loadModel(outName, ep=ep)
tabG = gageII.readData(varLst=varXC, siteNoLst=siteNoLst)
tabG = gageII.updateCode(tabG)
# for siteNo in sitePredLst:

  siteNo = sitePredLst[0]
   if 'DRAIN_SQKM' in varXC:
        area = tabG[tabG.index == siteNo]['DRAIN_SQKM'].values[0]
    else:
        area = None
    # test model
    print('testing {} from {} to {}'.format(siteNo, sdS, edS))
    dfX = waterQuality.readSiteX(
        siteNo, varX, sd=sd, ed=ed, area=area, nFill=5)
    xA = np.expand_dims(dfX.values, axis=1)
    xcA = np.expand_dims(
        tabG.loc[siteNo].values.astype(np.float), axis=0)
    mtdX = wqData.extractVarMtd(varX)
    x = transform.transInAll(xA, mtdX, statLst=statX)
    mtdXC = wqData.extractVarMtd(varXC)
    xc = transform.transInAll(xcA, mtdXC, statLst=statXC)
    [x, xc] = trainTS.dealNaN([x, xc], master['optNaN'][:2])
    yOut = trainTS.testModel(model, x, xc)
    # transfer out
    nt = len(dfX)
    ny = len(varY) if varY is not None else 0
    nyc = len(varYC) if varYC is not None else 0
    yP = np.full([nt, ny+nyc], np.nan)
    yP[:, :ny] = wqData.transOut(yOut[:, 0, :ny], statY, varY)