Exemple #1
0
def testModelSeq(outName,
                 siteNoLst,
                 wqData=None,
                 ep=None,
                 returnOut=False,
                 retest=False,
                 sd=np.datetime64('1979-01-01'),
                 ed=np.datetime64('2019-12-31')):
    # run sequence test for all sites, default to be from first date to last date
    if type(siteNoLst) is not list:
        siteNoLst = [siteNoLst]
    master = loadMaster(outName)
    if master['crit'] == 'SigmaLoss':
        doSigma = True
    else:
        doSigma = False
    if ep is None:
        ep = master['nEpoch']
    outDir = nameFolder(outName)
    sdS = pd.to_datetime(sd).strftime('%Y%m%d')
    edS = pd.to_datetime(ed).strftime('%Y%m%d')
    saveDir = os.path.join(outDir, 'seq-{}-{}-ep{}'.format(sdS, edS, ep))
    if not os.path.exists(saveDir):
        os.mkdir(saveDir)
    siteSaveLst = os.listdir(saveDir)
    if retest is True:
        sitePredLst = siteNoLst
    else:
        sitePredLst = [
            siteNo for siteNo in siteNoLst if siteNo not in siteSaveLst
        ]
    if len(sitePredLst) != 0:
        if wqData is None:
            wqData = waterQuality.DataModelWQ(master['dataName'])
        (varX, varXC, varY, varYC) = (master['varX'], master['varXC'],
                                      master['varY'], master['varYC'])
        (statX, statXC, statY, statYC) = loadStat(outName)
        model = loadModel(outName, ep=ep)
        tabG = gageII.readData(varLst=varXC, siteNoLst=siteNoLst)
        tabG = gageII.updateCode(tabG)
        for siteNo in sitePredLst:
            if 'DRAIN_SQKM' in varXC:
                area = tabG[tabG.index == siteNo]['DRAIN_SQKM'].values[0]
            else:
                area = None
            # test model
            print('testing {} from {} to {}'.format(siteNo, sdS, edS))
            freq = wqData.freq
            dfX = waterQuality.readSiteTS(siteNo,
                                          varX,
                                          freq=freq,
                                          area=area,
                                          sd=sd,
                                          ed=ed)
            # dfX = waterQuality.readSiteX(
            #     siteNo, varX, sd=sd, ed=ed, area=area, nFill=5)
            xA = np.expand_dims(dfX.values, axis=1)
            xcA = np.expand_dims(tabG.loc[siteNo].values.astype(np.float),
                                 axis=0)
            mtdX = waterQuality.extractVarMtd(varX)
            x = transform.transInAll(xA, mtdX, statLst=statX)
            mtdXC = waterQuality.extractVarMtd(varXC)
            xc = transform.transInAll(xcA, mtdXC, statLst=statXC)
            [x, xc] = trainTS.dealNaN([x, xc], master['optNaN'][:2])
            yOut = trainTS.testModel(model, x, xc)
            # transfer out
            nt = len(dfX)
            ny = len(varY) if varY is not None else 0
            nyc = len(varYC) if varYC is not None else 0
            if doSigma:
                yP = np.full([nt, ny + nyc], np.nan)
                sP = np.full([nt, ny + nyc], np.nan)
                yP[:, :ny] = wqData.transOut(yOut[:, 0, :ny * 2:2], statY,
                                             varY)
                yP[:, ny:] = wqData.transOut(yOut[:, 0, ny * 2::2], statYC,
                                             varYC)
                sP[:, :ny] = wqData.transOut(
                    np.sqrt(np.exp(yOut[:, 0, 1:ny * 2:2])), statY, varY)
                sP[:, ny:] = wqData.transOut(
                    np.sqrt(np.exp(yOut[:, 0, ny * 2 + 1::2])), statYC, varYC)
            else:
                yP = np.full([nt, ny + nyc], np.nan)
                yP[:, :ny] = wqData.transOut(yOut[:, 0, :ny], statY, varY)
                yP[:, ny:] = wqData.transOut(yOut[:, 0, ny:], statYC, varYC)
            # save output
            t = dfX.index.values.astype('datetime64[D]')
            colY = [] if varY is None else varY
            colYC = [] if varYC is None else varYC
            dfOut = pd.DataFrame(data=yP, columns=[colY + colYC], index=t)
            dfOut.index.name = 'date'
            dfOut = dfOut.reset_index()
            dfOut.to_csv(os.path.join(saveDir, siteNo), index=False)
            if doSigma:
                dfOutS = pd.DataFrame(data=sP, columns=[colY + colYC], index=t)
                dfOutS.index.name = 'date'
                dfOutS = dfOut.reset_index()
                dfOutS.to_csv(os.path.join(saveDir, siteNo + '_sigma'),
                              index=False)
    # load all csv
    if returnOut:
        dictOut = dict()
        for siteNo in siteNoLst:
            # print('loading {} from {} to {}'.format(siteNo, sdS, edS))
            dfOut = pd.read_csv(os.path.join(saveDir, siteNo))
            dictOut[siteNo] = dfOut
            if doSigma:
                dfOut = pd.read_csv(os.path.join(saveDir, siteNo + '_sigma'))
                dictOut[siteNo + '_sigma'] = dfOut
        return dictOut
Exemple #2
0
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn as nn
from hydroDL.model import rnn, crit
import os

siteNo = '01434025'
# siteNo = '01364959'
codeLst = ['00915', '00940', '00955']

varX = gridMET.varLst
varY = ['00060']
dfX = waterQuality.readSiteX(siteNo, varX)
dfY = waterQuality.readSiteY(siteNo, varY)

mtdX = waterQuality.extractVarMtd(varX)
normX, statX = transform.transInAll(dfX.values, mtdX)
dfXN = pd.DataFrame(data=normX, index=dfX.index, columns=dfX.columns)
mtdY = waterQuality.extractVarMtd(varY)
normY, statY = transform.transInAll(dfY.values, mtdY)
dfYN = pd.DataFrame(data=normY, index=dfY.index, columns=dfY.columns)

matX1 = dfXN[dfXN.index < np.datetime64('2000-01-01')].values
matY1 = dfYN[dfYN.index < np.datetime64('2000-01-01')].values
matX2 = dfXN[dfXN.index >= np.datetime64('2000-01-01')].values
matY2 = dfYN[dfYN.index >= np.datetime64('2000-01-01')].values
matX = dfXN.values
matY = dfYN.values

nx = len(varX)
ny = len(varY)
Exemple #3
0
def loadSeq(siteNo, varY, model, optX='F', optT='Y8090', order=(5, 0, 5)):
    if model == 'ARMA':
        dirAR = os.path.join(kPath.dirWQ, 'modelStat', 'ARMA')
        strOrder = '-'.join([str(k) for k in order])
        saveFolderName = '{}-{}-{}-{}'.format(optX, optT, varY, strOrder)
        saveFolder = os.path.join(dirAR, saveFolderName)
    elif model == 'LR':
        dirLR = os.path.join(kPath.dirWQ, 'modelStat', 'LR')
        saveFolderName = '{}-{}-{}'.format(optX, optT, varY)
        saveFolder = os.path.join(dirLR, saveFolderName)
    else:
        raise Exception('model {} invalid!'.format(model))
    predFile = os.path.join(saveFolder, siteNo)
    if not os.path.exists(saveFolder):
        os.mkdir(saveFolder)

    if os.path.exists(predFile):
        dfP = pd.read_csv(predFile, index_col=None)
        dfP = utils.time.datePdf(dfP)
    else:
        if optX == 'F':
            varX = gridMET.varLst
        elif optX == 'QF':
            varX = ['00060'] + gridMET.varLst
        else:
            raise Exception('optX {} invalid!'.format(optX))
        dfX = waterQuality.readSiteX(siteNo, varX)
        dfY = waterQuality.readSiteY(siteNo, [varY])
        # normalize
        mtdX = waterQuality.extractVarMtd(varX)
        normX, statX = transform.transInAll(dfX.values, mtdX)
        dfXN = pd.DataFrame(data=normX, index=dfX.index, columns=dfX.columns)
        mtdY = waterQuality.extractVarMtd([varY])
        normY, statY = transform.transInAll(dfY.values, mtdY)
        dfYN = pd.DataFrame(data=normY, index=dfY.index, columns=dfY.columns)
        if optT == 'Y8090':
            dfXT = dfXN[dfXN.index < np.datetime64('2000-01-01')]
            dfYT = dfYN[dfYN.index < np.datetime64('2000-01-01')]
        elif optT == 'Y0010':
            dfXT = dfXN[dfXN.index >= np.datetime64('2000-01-01')]
            dfYT = dfYN[dfYN.index >= np.datetime64('2000-01-01')]
        else:
            raise Exception('optT {} invalid!'.format(optT))

        # train and test
        if model == 'ARMA':
            dfPN, resT = trainARMA(dfXT, dfYT, dfXN, dfYN, order)
        if model == 'LR':
            dfPN = trainLR(dfXT, dfYT, dfXN, dfYN)
        yP = transform.transOut(dfPN.values, mtdY[0], statY[0])
        dfP = pd.DataFrame(data=yP, index=dfYN.index, columns=dfYN.columns)

        # save result, model, stat
        dfP.reset_index().to_csv(predFile, index=False)
        statFile = os.path.join(saveFolder, siteNo + '_stat.json')
        with open(statFile, 'w') as fp:
            json.dump(dict(statX=statX, statY=statY), fp, indent=4)
        # save model
        # if model == 'ARMA':
        #     modelFile = os.path.join(saveFolder, siteNo+'_model.p')
        #     resT.save(modelFile)
    return dfP
Exemple #4
0
from hydroDL.data import usgs, gageII, gridMET, ntn, transform
from hydroDL.master import slurm
from hydroDL.post import axplot, figplot
import numpy as np
import matplotlib.pyplot as plt

codeLst = sorted(usgs.newC)
# dataName = 'nbWT'
dataName = 'nbW'
wqData = waterQuality.DataModelWQ(dataName)
siteNoLst = wqData.info.siteNo.unique()

codeLst = usgs.newC
icLst = [wqData.varC.index(code) for code in codeLst]
data = wqData.c[:, np.array(icLst)]
mtdLst = waterQuality.extractVarMtd(codeLst)
dataNorm, stat = transform.transInAll(data, mtdLst)
info = wqData.info

code = '00660'
ic = codeLst.index(code)
fig, axes = plt.subplots(2, 1, figsize=(6, 8))
for siteNo in siteNoLst:
    indS = info[info['siteNo'] == siteNo].index.values
    yr = utils.sortData(data[indS, ic])
    yn = utils.sortData(dataNorm[indS, ic])
    x = np.arange(len(yr)) / len(yr)
    _ = axes[0].plot(x, yr, 'k-', alpha=0.2)
    _ = axes[1].plot(x, yn, 'k-', alpha=0.2)
shortName = usgs.codePdf.loc[code]['shortName']
axes[1].set_ylim([-0.2, 1.2])
Exemple #5
0
    matR[kk, :, :] = dfP.values - dfC.values
    matC[kk, :, :] = dfC.values

codeLst2 = [
    '00095', '00400', '00405', '00600', '00605', '00618', '00660', '00665',
    '00681', '00915', '00925', '00930', '00935', '00940', '00945', '00950',
    '00955', '70303', '71846', '80154'
]

# plot hist
importlib.reload(axplot)
importlib.reload(transform)
importlib.reload(usgs)

varRLst = [code + '-R' for code in usgs.newC]
mtdLst = waterQuality.extractVarMtd(varRLst)
matRN, stat = transform.transInAll(matR, mtdLst)
matRN2 = transform.transOutAll(matRN, mtdLst, stat)

fig, axes = plt.subplots(5, 4)
ticks = [-0.5, 0, 0.5, 1]
for k, code in enumerate(codeLst2):
    j, i = utils.index2d(k, 5, 4)
    ax = axes[j, i]
    siteNoCode = dictSite[code]
    indS = [siteNoLst.index(siteNo) for siteNo in siteNoCode]
    ic = usgs.newC.index(code)
    data = matRN2[indS, :, ic]
    x1 = utils.flatData(data)
    x2 = utils.rmExt(x1, p=5)