Exemple #1
0
 def transIn(self, statTup=None, subset=None, varTup=None):
     # normalize data in
     dataTup = self.extractData(varTup=varTup, subset=subset)
     t0 = time.time()
     if statTup is None:
         [outDataLst, outStatLst] = [list(), list()]
         for (data, var) in zip(dataTup, varTup):
             if data is not None:
                 mtd = self.extractVarMtd(var)
                 outData, outStat = transform.transInAll(data, mtd)
             else:
                 (outData, outStat) = (None, None)
             outDataLst.append(outData)
             outStatLst.append(outStat)
         print('transform time {:.3f}'.format(time.time() - t0))
         return outDataLst, outStatLst
     else:
         outDataLst = list()
         for (data, var, stat) in zip(dataTup, varTup, statTup):
             if data is not None:
                 mtd = self.extractVarMtd(var)
                 outData = transform.transInAll(data, mtd, statLst=stat)
             else:
                 outData = None
             outDataLst.append(outData)
         print('transform time {:.3f}'.format(time.time() - t0))
         return outDataLst
def funcPoint(iP, axP):
    siteNo = siteNoLst[iP]
    dfPred, dfObs = basins.loadSeq(outName, siteNo)
    t = dfPred['date'].values.astype(np.datetime64)
    tBar = np.datetime64('2000-01-01')
    # linear model
    ind1 = infoTrain[infoTrain['siteNo'] == siteNo].index
    [x1, y1, yc1], _ = utils.rmNan([xL1[ind1, :], yL1[ind1, :], ycL1[ind1, :]])
    modelY = LinearRegression().fit(x1, y1)
    modelYC = LinearRegression().fit(x1, yc1)
    sd = np.datetime64('1979-01-01')
    ed = np.datetime64('2020-01-01')
    dfX = waterQuality.readSiteX(siteNo, sd, ed, varX)
    x2 = transform.transInAll(dfX.values, mtdX, statLst=statX)
    y2 = modelY.predict(x2)
    yc2 = modelYC.predict(x2)
    yp = wqData.transOut(y2, statY, varY)
    ycp = wqData.transOut(yc2, statYC, varYC)
    code = codeLst[0]
    axplot.plotTS(axP[0],
                  t, [dfPred['00060'], yp, dfObs['00060']],
                  tBar=tBar,
                  legLst=['lstm', 'lr', 'obs'],
                  styLst='---',
                  cLst='bgr')
    axplot.plotTS(axP[1],
                  t, [dfPred[code], ycp, dfObs[code]],
                  tBar=tBar,
                  legLst=['lstm', 'lr', 'obs'],
                  styLst='--*',
                  cLst='bgr')
Exemple #3
0
 def transIn(self, dataTup, varTup, statTup=None):
     # normalize data in
     if statTup is None:
         [outDataLst, outStatLst] = [list(), list()]
         for (data, var) in zip(dataTup, varTup):
             if data is not None:
                 mtd = io.extractVarMtd(var)
                 outData, outStat = transform.transInAll(data, mtd)
             else:
                 (outData, outStat) = (None, None)
             outDataLst.append(outData)
             outStatLst.append(outStat)
         return outDataLst, outStatLst
     else:
         outDataLst = list()
         for (data, var, stat) in zip(dataTup, varTup, statTup):
             if data is not None:
                 mtd = io.extractVarMtd(var)
                 outData = transform.transInAll(data, mtd, statLst=stat)
             else:
                 outData = None
             outDataLst.append(outData)
         return outDataLst
Exemple #4
0
def runModel(dfX, dfG):
    # test model
    xA = np.expand_dims(dfX.values, axis=1)
    xcA = np.expand_dims(dfG.values.astype(np.float), axis=0)
    mtdX = wqData.extractVarMtd(varX)
    x = transform.transInAll(xA, mtdX, statLst=statX)
    mtdXC = wqData.extractVarMtd(varXC)
    xc = transform.transInAll(xcA, mtdXC, statLst=statXC)
    yOut = trainTS.testModel(model, x, xc)
    # transfer out
    nt = len(dfX)
    ny = len(varY) if varY is not None else 0
    nyc = len(varYC) if varYC is not None else 0
    yP = np.full([nt, ny + nyc], np.nan)
    yP[:, :ny] = wqData.transOut(yOut[:, 0, :ny], statY, varY)
    yP[:, ny:] = wqData.transOut(yOut[:, 0, ny:], statYC, varYC)
    # save output
    t = dfX.index.values.astype('datetime64[D]')
    colY = [] if varY is None else varY
    colYC = [] if varYC is None else varYC
    dfOut = pd.DataFrame(data=yP, columns=colY + colYC, index=t)
    dfOut.index.name = 'date'
    dfOut = dfOut.reset_index()
    return dfOut
Exemple #5
0
def testModelSeq(outName,
                 siteNoLst,
                 wqData=None,
                 ep=None,
                 returnOut=False,
                 retest=False,
                 sd=np.datetime64('1979-01-01'),
                 ed=np.datetime64('2019-12-31')):
    # run sequence test for all sites, default to be from first date to last date
    if type(siteNoLst) is not list:
        siteNoLst = [siteNoLst]
    master = loadMaster(outName)
    if master['crit'] == 'SigmaLoss':
        doSigma = True
    else:
        doSigma = False
    if ep is None:
        ep = master['nEpoch']
    outDir = nameFolder(outName)
    sdS = pd.to_datetime(sd).strftime('%Y%m%d')
    edS = pd.to_datetime(ed).strftime('%Y%m%d')
    saveDir = os.path.join(outDir, 'seq-{}-{}-ep{}'.format(sdS, edS, ep))
    if not os.path.exists(saveDir):
        os.mkdir(saveDir)
    siteSaveLst = os.listdir(saveDir)
    if retest is True:
        sitePredLst = siteNoLst
    else:
        sitePredLst = [
            siteNo for siteNo in siteNoLst if siteNo not in siteSaveLst
        ]
    if len(sitePredLst) != 0:
        if wqData is None:
            wqData = waterQuality.DataModelWQ(master['dataName'])
        (varX, varXC, varY, varYC) = (master['varX'], master['varXC'],
                                      master['varY'], master['varYC'])
        (statX, statXC, statY, statYC) = loadStat(outName)
        model = loadModel(outName, ep=ep)
        tabG = gageII.readData(varLst=varXC, siteNoLst=siteNoLst)
        tabG = gageII.updateCode(tabG)
        for siteNo in sitePredLst:
            if 'DRAIN_SQKM' in varXC:
                area = tabG[tabG.index == siteNo]['DRAIN_SQKM'].values[0]
            else:
                area = None
            # test model
            print('testing {} from {} to {}'.format(siteNo, sdS, edS))
            freq = wqData.freq
            dfX = waterQuality.readSiteTS(siteNo,
                                          varX,
                                          freq=freq,
                                          area=area,
                                          sd=sd,
                                          ed=ed)
            # dfX = waterQuality.readSiteX(
            #     siteNo, varX, sd=sd, ed=ed, area=area, nFill=5)
            xA = np.expand_dims(dfX.values, axis=1)
            xcA = np.expand_dims(tabG.loc[siteNo].values.astype(np.float),
                                 axis=0)
            mtdX = waterQuality.extractVarMtd(varX)
            x = transform.transInAll(xA, mtdX, statLst=statX)
            mtdXC = waterQuality.extractVarMtd(varXC)
            xc = transform.transInAll(xcA, mtdXC, statLst=statXC)
            [x, xc] = trainTS.dealNaN([x, xc], master['optNaN'][:2])
            yOut = trainTS.testModel(model, x, xc)
            # transfer out
            nt = len(dfX)
            ny = len(varY) if varY is not None else 0
            nyc = len(varYC) if varYC is not None else 0
            if doSigma:
                yP = np.full([nt, ny + nyc], np.nan)
                sP = np.full([nt, ny + nyc], np.nan)
                yP[:, :ny] = wqData.transOut(yOut[:, 0, :ny * 2:2], statY,
                                             varY)
                yP[:, ny:] = wqData.transOut(yOut[:, 0, ny * 2::2], statYC,
                                             varYC)
                sP[:, :ny] = wqData.transOut(
                    np.sqrt(np.exp(yOut[:, 0, 1:ny * 2:2])), statY, varY)
                sP[:, ny:] = wqData.transOut(
                    np.sqrt(np.exp(yOut[:, 0, ny * 2 + 1::2])), statYC, varYC)
            else:
                yP = np.full([nt, ny + nyc], np.nan)
                yP[:, :ny] = wqData.transOut(yOut[:, 0, :ny], statY, varY)
                yP[:, ny:] = wqData.transOut(yOut[:, 0, ny:], statYC, varYC)
            # save output
            t = dfX.index.values.astype('datetime64[D]')
            colY = [] if varY is None else varY
            colYC = [] if varYC is None else varYC
            dfOut = pd.DataFrame(data=yP, columns=[colY + colYC], index=t)
            dfOut.index.name = 'date'
            dfOut = dfOut.reset_index()
            dfOut.to_csv(os.path.join(saveDir, siteNo), index=False)
            if doSigma:
                dfOutS = pd.DataFrame(data=sP, columns=[colY + colYC], index=t)
                dfOutS.index.name = 'date'
                dfOutS = dfOut.reset_index()
                dfOutS.to_csv(os.path.join(saveDir, siteNo + '_sigma'),
                              index=False)
    # load all csv
    if returnOut:
        dictOut = dict()
        for siteNo in siteNoLst:
            # print('loading {} from {} to {}'.format(siteNo, sdS, edS))
            dfOut = pd.read_csv(os.path.join(saveDir, siteNo))
            dictOut[siteNo] = dfOut
            if doSigma:
                dfOut = pd.read_csv(os.path.join(saveDir, siteNo + '_sigma'))
                dictOut[siteNo + '_sigma'] = dfOut
        return dictOut
Exemple #6
0
import matplotlib.pyplot as plt
import torch.nn as nn
from hydroDL.model import rnn, crit
import os

siteNo = '01434025'
# siteNo = '01364959'
codeLst = ['00915', '00940', '00955']

varX = gridMET.varLst
varY = ['00060']
dfX = waterQuality.readSiteX(siteNo, varX)
dfY = waterQuality.readSiteY(siteNo, varY)

mtdX = waterQuality.extractVarMtd(varX)
normX, statX = transform.transInAll(dfX.values, mtdX)
dfXN = pd.DataFrame(data=normX, index=dfX.index, columns=dfX.columns)
mtdY = waterQuality.extractVarMtd(varY)
normY, statY = transform.transInAll(dfY.values, mtdY)
dfYN = pd.DataFrame(data=normY, index=dfY.index, columns=dfY.columns)

matX1 = dfXN[dfXN.index < np.datetime64('2000-01-01')].values
matY1 = dfYN[dfYN.index < np.datetime64('2000-01-01')].values
matX2 = dfXN[dfXN.index >= np.datetime64('2000-01-01')].values
matY2 = dfYN[dfYN.index >= np.datetime64('2000-01-01')].values
matX = dfXN.values
matY = dfYN.values

nx = len(varX)
ny = len(varY)
ind1 = np.where(~np.isnan(matY1))[0]
Exemple #7
0
            print(
                'first iteration failed again for CUDNN_STATUS_EXECUTION_FAILED '
            )
    yP, ycP = model(xT)
    loss = lossFun(yP, ycP, yT[:, :, :ny], yT[-1, :, ny:])
    loss.backward()
    optim.step()
    model.zero_grad()
    print('{} {:.3f} {:.3f}'.format(k, loss, time.time() - t0))

# test
statX, statXC, statY, statYC = statTup
xA = np.expand_dims(dfX.values, axis=1)
xcA = np.expand_dims(tabG.loc[siteNo].values.astype(np.float), axis=0)
mtdX = wqData.extractVarMtd(varX)
x = transform.transInAll(xA, mtdX, statLst=statX)
mtdXC = wqData.extractVarMtd(varXC)
xc = transform.transInAll(xcA, mtdXC, statLst=statXC)

yA = np.expand_dims(dfY.values, axis=1)
ycA = np.expand_dims(dfYC.values, axis=1)
mtdY = wqData.extractVarMtd(varY)
y = transform.transInAll(yA, mtdY, statLst=statY)
mtdYC = wqData.extractVarMtd(varYC)
yc = transform.transInAll(ycA, mtdYC, statLst=statYC)

(x, xc) = trainTS.dealNaN((x, xc), [1, 1])
nt = x.shape[0]
xT = torch.from_numpy(np.concatenate([x, np.tile(xc, [nt, 1, 1])],
                                     axis=-1)).float()
if torch.cuda.is_available():
Exemple #8
0
from hydroDL.master import slurm
from hydroDL.post import axplot, figplot
import numpy as np
import matplotlib.pyplot as plt

codeLst = sorted(usgs.newC)
# dataName = 'nbWT'
dataName = 'nbW'
wqData = waterQuality.DataModelWQ(dataName)
siteNoLst = wqData.info.siteNo.unique()

codeLst = usgs.newC
icLst = [wqData.varC.index(code) for code in codeLst]
data = wqData.c[:, np.array(icLst)]
mtdLst = waterQuality.extractVarMtd(codeLst)
dataNorm, stat = transform.transInAll(data, mtdLst)
info = wqData.info

code = '00660'
ic = codeLst.index(code)
fig, axes = plt.subplots(2, 1, figsize=(6, 8))
for siteNo in siteNoLst:
    indS = info[info['siteNo'] == siteNo].index.values
    yr = utils.sortData(data[indS, ic])
    yn = utils.sortData(dataNorm[indS, ic])
    x = np.arange(len(yr)) / len(yr)
    _ = axes[0].plot(x, yr, 'k-', alpha=0.2)
    _ = axes[1].plot(x, yn, 'k-', alpha=0.2)
shortName = usgs.codePdf.loc[code]['shortName']
axes[1].set_ylim([-0.2, 1.2])
axes[0].set_title('{} {} CDFs '.format(code, shortName))
Exemple #9
0
yT2 = obsLst2[2][:, :, 0:1]

errMatC1 = wqData.errBySiteC(ycP1, subset=trainSet, varC=master['varYC'])
errMatC2 = wqData.errBySiteC(ycP2, subset=testSet, varC=master['varYC'])
# errMatQ1 = wqData.errBySiteQ(
#     yP1, subset=trainSet, varQ=master['varY'])
# errMatQ2 = wqData.errBySiteQ(
#     yP2, subset=testSet, varQ=master['varY'])

# np.nanmean(errMatQ2[:, 0, 1])
np.nanmean(errMatC1[:, 0, 1])
np.nanmean(errMatC2[:, 0, 1])

# transfer - validate if training error is correct
mtd = wqData.extractVarMtd(master['varYC'])
xcP = transform.transInAll(ycP2, mtd, statLst=statTup[3])
xcT = transform.transInAll(ycT2, mtd, statLst=statTup[3])
mtd = wqData.extractVarMtd(master['varY'])
xP = transform.transInAll(yP2, mtd, statLst=statTup[2])
xT = transform.transInAll(yT2, mtd, statLst=statTup[2])

np.sqrt(np.nanmean((xT - xP)**2))
np.sqrt(np.nanmean((xcT - xcP)**2))
(np.sqrt(np.nanmean((xT - xP)**2)) + np.sqrt(np.nanmean((xcT - xcP)**2))) / 2

# see correlation
info = wqData.subsetInfo(testSet)
siteNoLst = info.siteNo.unique()
corrMat = np.full([len(siteNoLst), 2], np.nan)
for i, siteNo in enumerate(siteNoLst):
    indS = info[info['siteNo'] == siteNo].index.values
Exemple #10
0
def loadSeq(siteNo, varY, model, optX='F', optT='Y8090', order=(5, 0, 5)):
    if model == 'ARMA':
        dirAR = os.path.join(kPath.dirWQ, 'modelStat', 'ARMA')
        strOrder = '-'.join([str(k) for k in order])
        saveFolderName = '{}-{}-{}-{}'.format(optX, optT, varY, strOrder)
        saveFolder = os.path.join(dirAR, saveFolderName)
    elif model == 'LR':
        dirLR = os.path.join(kPath.dirWQ, 'modelStat', 'LR')
        saveFolderName = '{}-{}-{}'.format(optX, optT, varY)
        saveFolder = os.path.join(dirLR, saveFolderName)
    else:
        raise Exception('model {} invalid!'.format(model))
    predFile = os.path.join(saveFolder, siteNo)
    if not os.path.exists(saveFolder):
        os.mkdir(saveFolder)

    if os.path.exists(predFile):
        dfP = pd.read_csv(predFile, index_col=None)
        dfP = utils.time.datePdf(dfP)
    else:
        if optX == 'F':
            varX = gridMET.varLst
        elif optX == 'QF':
            varX = ['00060'] + gridMET.varLst
        else:
            raise Exception('optX {} invalid!'.format(optX))
        dfX = waterQuality.readSiteX(siteNo, varX)
        dfY = waterQuality.readSiteY(siteNo, [varY])
        # normalize
        mtdX = waterQuality.extractVarMtd(varX)
        normX, statX = transform.transInAll(dfX.values, mtdX)
        dfXN = pd.DataFrame(data=normX, index=dfX.index, columns=dfX.columns)
        mtdY = waterQuality.extractVarMtd([varY])
        normY, statY = transform.transInAll(dfY.values, mtdY)
        dfYN = pd.DataFrame(data=normY, index=dfY.index, columns=dfY.columns)
        if optT == 'Y8090':
            dfXT = dfXN[dfXN.index < np.datetime64('2000-01-01')]
            dfYT = dfYN[dfYN.index < np.datetime64('2000-01-01')]
        elif optT == 'Y0010':
            dfXT = dfXN[dfXN.index >= np.datetime64('2000-01-01')]
            dfYT = dfYN[dfYN.index >= np.datetime64('2000-01-01')]
        else:
            raise Exception('optT {} invalid!'.format(optT))

        # train and test
        if model == 'ARMA':
            dfPN, resT = trainARMA(dfXT, dfYT, dfXN, dfYN, order)
        if model == 'LR':
            dfPN = trainLR(dfXT, dfYT, dfXN, dfYN)
        yP = transform.transOut(dfPN.values, mtdY[0], statY[0])
        dfP = pd.DataFrame(data=yP, index=dfYN.index, columns=dfYN.columns)

        # save result, model, stat
        dfP.reset_index().to_csv(predFile, index=False)
        statFile = os.path.join(saveFolder, siteNo + '_stat.json')
        with open(statFile, 'w') as fp:
            json.dump(dict(statX=statX, statY=statY), fp, indent=4)
        # save model
        # if model == 'ARMA':
        #     modelFile = os.path.join(saveFolder, siteNo+'_model.p')
        #     resT.save(modelFile)
    return dfP
Exemple #11
0
yrTrain = [2000, 2005]
yr = df.index.year.values
indTrain = np.where((yr >= yrTrain[0]) & (yr < yrTrain[1]))[0]

# data
sn = 1
# varX = varF
varX = ['pr']
varY = ['runoff']
nx = len(varX)
ny = len(varY)
X = df[varX].values
Y = df[varY].values
mtdX = waterQuality.extractVarMtd(varX)
# mtdY = waterQuality.extractVarMtd(varY)
x, statX = transform.transInAll(X, mtdX)
# y, statY = transform.transInAll(Y, mtdY)
# y = np.log(Y+sn)
# x[np.isnan(x)] = -1
y = np.log(Y+sn)
xx = x[indTrain, :]
yy = y[indTrain, :]

# conv
nt = len(indTrain)
nbatch = 100
rho = 1000
aLst = np.exp(np.arange(0, 2, 0.1))
m = 30
nq = len(aLst)
nd = 365
Exemple #12
0
    matC[kk, :, :] = dfC.values

codeLst2 = [
    '00095', '00400', '00405', '00600', '00605', '00618', '00660', '00665',
    '00681', '00915', '00925', '00930', '00935', '00940', '00945', '00950',
    '00955', '70303', '71846', '80154'
]

# plot hist
importlib.reload(axplot)
importlib.reload(transform)
importlib.reload(usgs)

varRLst = [code + '-R' for code in usgs.newC]
mtdLst = waterQuality.extractVarMtd(varRLst)
matRN, stat = transform.transInAll(matR, mtdLst)
matRN2 = transform.transOutAll(matRN, mtdLst, stat)

fig, axes = plt.subplots(5, 4)
ticks = [-0.5, 0, 0.5, 1]
for k, code in enumerate(codeLst2):
    j, i = utils.index2d(k, 5, 4)
    ax = axes[j, i]
    siteNoCode = dictSite[code]
    indS = [siteNoLst.index(siteNo) for siteNo in siteNoCode]
    ic = usgs.newC.index(code)
    data = matRN2[indS, :, ic]
    x1 = utils.flatData(data)
    x2 = utils.rmExt(x1, p=5)

    s, p = scipy.stats.kstest(x2 / np.std(x2) - np.mean(x2), 'laplace')
Exemple #13
0
dfQ['runoff'] = dfQ['00060'] / area * unitConv
if '00060' in varX or 'runoff' in varX:
    dfX = dfX.join(dfQ)
elif '00060' in varY or 'runoff' in varY:
    dfY = dfY.join(dfQ)
dfX = dfX.join(dfF)
dfY = dfY.join(dfC)
dfX = dfX[varX]
dfY = dfY[varY + varYC]

# normalize concat input data
dfX = dfX.interpolate(limit=nFill, limit_direction='both')
xA = np.expand_dims(dfX.values, axis=1)
xcA = np.expand_dims(tabG.loc[siteNo].values.astype(np.float), axis=0)
mtdX = wqData.extractVarMtd(varX)
x = transform.transInAll(xA, mtdX, statLst=statX)
mtdXC = wqData.extractVarMtd(varXC)
xc = transform.transInAll(xcA, mtdXC, statLst=statXC)

yP = trainTS.testModel(model, x, xc)

# # test
# nt = len(dfX)
# x, xc = trainTS.dealNaN((x, xc), dictP['optNaN'][:2])
# xx = np.concatenate([x, np.tile(xc[0, :], [1, nt, 1])], axis=-1).swapaxes(0, 1)
# xT = torch.from_numpy(xx).float()
# if torch.cuda.is_available():
#     xT = xT.cuda()
# # if i == 0 and ind1 == 0:
# #     try:
# #         yT = model(xT)
Exemple #14
0
# training / testing
yrTrain = [2000, 2005]
yr = df.index.year.values
indTrain = np.where((yr >= yrTrain[0]) & (yr < yrTrain[1]))[0]

# data
# varX = varF
varX = ['pr']
varY = ['00060']
nx = len(varX)
ny = len(varY)
X = df[varX].values
Y = df[varY].values
mtdX = waterQuality.extractVarMtd(varX)
mtdY = waterQuality.extractVarMtd(varY)
x, statX = transform.transInAll(X, mtdX)
y, statY = transform.transInAll(Y, mtdY)
x[np.isnan(x)] = -1

xx = x[indTrain, :]
yy = y[indTrain, :]

model = rnn.LstmModel(nx=nx, ny=ny, hiddenSize=256).cuda()
lossFun = crit.RmseLoss().cuda()
optim = torch.optim.Adadelta(model.parameters())

nt = len(indTrain)
nbatch = 100
rho = 1000
# train
nEp = 500