Example #1
0
def testWRTDS(dataName, trainSet, testSet, codeLst):
    DF = dbBasin.DataFrameBasin(dataName)
    # Calculate WRTDS from train and test set
    varX = ['00060']
    varY = codeLst
    d1 = dbBasin.DataModelBasin(DF, subset=trainSet, varX=varX, varY=varY)
    d2 = dbBasin.DataModelBasin(DF, subset=testSet, varX=varX, varY=varY)
    tt1 = pd.to_datetime(d1.t)
    yr1 = tt1.year.values
    t1 = yr1 + tt1.dayofyear.values / 365
    sinT1 = np.sin(2 * np.pi * t1)
    cosT1 = np.cos(2 * np.pi * t1)
    tt2 = pd.to_datetime(d2.t)
    yr2 = tt2.year.values
    t2 = yr2 + tt2.dayofyear.values / 365
    sinT2 = np.sin(2 * np.pi * t2)
    cosT2 = np.cos(2 * np.pi * t2)
    ###
    yOut = np.full([len(d2.t), len(d2.siteNoLst), len(varY)], np.nan)
    t0 = time.time()
    for indS, siteNo in enumerate(d2.siteNoLst):
        for indC, code in enumerate(varY):
            print('{} {} {} {}'.format(indS, siteNo, code, time.time() - t0))
            y1 = d1.Y[:, indS, indC].copy()
            q1 = d1.X[:, indS, 0].copy()
            q1[q1 < 0] = 0
            logq1 = np.log(q1 + sn)
            x1 = np.stack([logq1, yr1, sinT1, cosT1]).T
            y2 = d2.Y[:, indS, indC].copy()
            q2 = d2.X[:, indS, 0].copy()
            q2[q2 < 0] = 0
            logq2 = np.log(q2 + sn)
            x2 = np.stack([logq2, yr2, sinT2, cosT2]).T
            [xx1, yy1], ind1 = utils.rmNan([x1, y1])
            if testSet == 'all':
                [xx2], ind2 = utils.rmNan([x2])
            else:
                [xx2, yy2], ind2 = utils.rmNan([x2, y2])
            if len(ind1) < 40:
                continue
            for k in ind2:
                dY = np.abs(t2[k] - t1[ind1])
                dQ = np.abs(logq2[k] - logq1[ind1])
                dS = np.min(np.stack(
                    [abs(np.ceil(dY) - dY),
                     abs(dY - np.floor(dY))]),
                            axis=0)
                d = np.stack([dY, dQ, dS])
                ww, ind = calWeight(d)
                model = sm.WLS(yy1[ind], xx1[ind], weights=ww).fit()
                yp = model.predict(x2[k, :])[0]
                yOut[k, indS, indC] = yp
    return yOut
Example #2
0
def trainModel(outName):
    outFolder = nameFolder(outName)
    dictP = loadMaster(outName)

    # load data
    DF = dbBasin.DataFrameBasin(dictP['dataName'])
    dictVar = {k: dictP[k]
               for k in ('varX', 'varXC', 'varY', 'varYC')}
    DM = dbBasin.DataModelBasin(DF, subset=dictP['trainSet'], **dictVar)
    if dictP['borrowStat'] is not None:
        DM.loadStat(dictP['borrowStat'])
    DM.trans(mtdX=dictP['mtdX'], mtdXC=dictP['mtdXC'],
             mtdY=dictP['mtdY'], mtdYC=dictP['mtdYC'])
    DM.saveStat(outFolder)
    dataTup = DM.getData()
    dataTup = trainBasin.dealNaN(dataTup, dictP['optNaN'])

    # define loss
    lossFun = getattr(crit, dictP['crit'])()
    # define model
    model = defineModel(dataTup, dictP)

    if torch.cuda.is_available():
        lossFun = lossFun.cuda()
        model = model.cuda()

    if dictP['optim'] == 'AdaDelta':
        optim = torch.optim.Adadelta(model.parameters())
    else:
        raise RuntimeError('optimizor function not specified')

    lossLst = list()
    nEp = dictP['nEpoch']
    sEp = dictP['saveEpoch']
    logFile = os.path.join(outFolder, 'log')
    if os.path.exists(logFile):
        os.remove(logFile)
    for k in range(0, nEp, sEp):
        model, optim, lossEp = trainBasin.trainModel(
            dataTup, model, lossFun, optim, batchSize=dictP['batchSize'],
            nEp=sEp, cEp=k, logFile=logFile,
            optBatch=dictP['optBatch'], nIterEp=dictP['nIterEp'])
        # save model
        saveModelState(outName, k+sEp, model, optim=optim)
        lossLst = lossLst+lossEp

    lossFile = os.path.join(outFolder, 'loss.csv')
    pd.DataFrame(lossLst).to_csv(lossFile, index=False, header=False)
Example #3
0
def testModel(outName,  DF=None, testSet='all', ep=None, reTest=False, batchSize=20):
    # load master
    dictP = loadMaster(outName)
    if ep is None:
        ep = dictP['nEpoch']
    outFolder = nameFolder(outName)
    testFileName = 'testP-{}-Ep{}.npz'.format(testSet, ep)
    testFile = os.path.join(outFolder, testFileName)

    if os.path.exists(testFile) and reTest is False:
        print('load saved test result')
        npz = np.load(testFile, allow_pickle=True)
        yP = npz['yP']
        ycP = npz['ycP']
    else:
        # load test data
        if DF is None:
            DF = dbBasin.DataFrameBasin(dictP['dataName'])
        dictVar = {k: dictP[k]
                   for k in ('varX', 'varXC', 'varY', 'varYC')}
        DM = dbBasin.DataModelBasin(DF, subset=testSet, **dictVar)
        DM.loadStat(outFolder)
        dataTup = DM.getData()
        dataTup = trainBasin.dealNaN(dataTup, dictP['optNaN'])

        model = defineModel(dataTup, dictP)
        model = loadModelState(outName, ep, model)
        # test
        x = dataTup[0]
        xc = dataTup[1]
        ny = np.shape(dataTup[2])[2]
        # test model - point by point
        yOut, ycOut = trainBasin.testModel(
            model, x, xc, ny, batchSize=batchSize)
        yP = DM.transOutY(yOut)
        ycP = DM.transOutYC(ycOut)
        np.savez(testFile, yP=yP, ycP=ycP)
    return yP, ycP
Example #4
0
# pick by year
yrIn = np.arange(1985, 2020, 5).tolist()
t1 = dbBasin.func.pickByYear(DF.t, yrIn)
t2 = dbBasin.func.pickByYear(DF.t, yrIn, pick=False)
DF.createSubset('pkYr5', dateLst=t1)
DF.createSubset('rmYr5', dateLst=t2)

# pick by day
t1 = dbBasin.func.pickByDay(DF.t, dBase=5, dSel=1)
t2 = dbBasin.func.pickByDay(DF.t, dBase=5, dSel=1, pick=False)
DF.createSubset('pkD5', dateLst=t1)
DF.createSubset('rmD5', dateLst=t2)

# pick by random
t1 = dbBasin.func.pickRandT(DF.t, 0.2)
t2 = dbBasin.func.pickRandT(DF.t, 0.2, pick=False)
DF.createSubset('pkRT20', dateLst=t1)
DF.createSubset('rmRT20', dateLst=t2)

# plot
codeSel = ['00915', '00925', '00930', '00935', '00940', '00945', '00955']
d1 = dbBasin.DataModelBasin(DF, subset='pkR20', varY=codeSel)
d2 = dbBasin.DataModelBasin(DF, subset='rmR20', varY=codeSel)

k = 0
fig, axes = figplot.multiTS(d2.t, [d2.Y[:, k, :], d1.Y[:, k, :]],
                            cLst='br',
                            styLst='..')
fig.show()
Example #5
0
yOut1 = np.ndarray(yP1.shape)
yOut2 = np.ndarray(yP2.shape)
for k, code in enumerate(codeLst):
    m = DF.g[:, DF.varG.index(code + '-M')]
    s = DF.g[:, DF.varG.index(code + '-S')]
    yOut1[:, :, k] = yP1[:, :, k] * s + m
    yOut2[:, :, k] = yP2[:, :, k] * s + m

# WRTDS
# yW = WRTDS.testWRTDS(dataName, trainSet, testSet, codeLst)
dirRoot = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS-dbBasin')
fileName = '{}-{}-{}'.format(dataName, trainSet, testSet)
yW = np.load(os.path.join(dirRoot, fileName) + '.npz')['arr_0']

# correlation matrix
d1 = dbBasin.DataModelBasin(DF, subset=trainSet, varY=codeLst)
d2 = dbBasin.DataModelBasin(DF, subset=testSet, varY=codeLst)
siteNoLst = DF.siteNoLst
mat1 = np.full([len(siteNoLst), len(codeLst), 4], np.nan)
mat2 = np.full([len(siteNoLst), len(codeLst), 4], np.nan)
mat3 = np.full([len(siteNoLst), len(codeLst), 4], np.nan)
for indS, siteNo in enumerate(siteNoLst):
    for indC, code in enumerate(codeLst):
        n1 = np.sum(~np.isnan(d1.Y[:, indS, indC]), axis=0)
        n2 = np.sum(~np.isnan(d2.Y[:, indS, indC]), axis=0)
        if n1 >= 160 and n2 >= 40:
            stat1 = utils.stat.calStat(yOut1[:, indS, indC], d2.Y[:, indS,
                                                                  indC])
            stat2 = utils.stat.calStat(yOut2[:, indS, indC], d2.Y[:, indS,
                                                                  indC])
            stat3 = utils.stat.calStat(yW[:, indS, indC], d2.Y[:, indS, indC])
Example #6
0
outName = 'weathering-FPR2QC-t365-B10'
ep = 100

# save
outFolder = basinFull.nameFolder(outName)
modelFile = os.path.join(outFolder, 'model_ep{}'.format(ep))
model = torch.load(modelFile)
modelStateFile = os.path.join(outFolder, 'modelState_ep{}'.format(ep))
torch.save(model.state_dict(), modelStateFile)

# load
dictP = basinFull.loadMaster(outName)
DF = dbBasin.DataFrameBasin(dictP['dataName'])
dictVar = {k: dictP[k] for k in ('varX', 'varXC', 'varY', 'varYC')}
DM = dbBasin.DataModelBasin(DF, subset='A10', **dictVar)
DM.loadStat(outFolder)
dataTup = DM.getData()
[nx, nxc, ny, nyc, nt, ns] = trainBasin.getSize(dataTup)
dataTup = trainBasin.dealNaN(dataTup, dictP['optNaN'])
if dictP['modelName'] == 'CudnnLSTM':
    model = rnn.CudnnLstmModel(nx=nx + nxc,
                               ny=ny + nyc,
                               hiddenSize=dictP['hiddenSize'])
elif dictP['modelName'] == 'LstmModel':
    model = rnn.LstmModel(nx=nx + nxc,
                          ny=ny + nyc,
                          hiddenSize=dictP['hiddenSize'])
else:
    raise RuntimeError('Model not specified')
model.load_state_dict(torch.load(modelStateFile))
Example #7
0
dictSiteName = 'dictWeathering.json'
with open(os.path.join(dirSel, dictSiteName)) as f:
    dictSite = json.load(f)
siteNoLst = dictSite['k12']

dataName = 'weathering'
DF = dbBasin.DataFrameBasin(dataName)
trainSet = 'rmYr5'
testSet = 'pkYr5'

# input
codeSel = ['00915', '00925', '00930', '00935', '00940', '00945', '00955']
subset = trainSet
DF = dbBasin.localNorm(DF, subset=trainSet)

DM = dbBasin.DataModelBasin(DF)

# plot
code = '00915'
nfy, nfx = (4, 3)
bins = 20
data0 = DF.c
data1 = DF.extractSubset(DF.c, subsetName=trainSet)
data2 = DF.extractSubset(DF.c, subsetName=testSet)
dataLst = [data0, data1, data2]
titleLst = ['all', 'train', 'test']
for data, title in zip(dataLst, titleLst):
    fig, axes = plt.subplots(nfy, nfx)
    for k, siteNo in enumerate(DF.siteNoLst):
        j, i = utils.index2d(k, nfy, nfx)
        ax = axes[j, i]
Example #8
0
mtdX = dbBasin.io.extractVarMtd(varX)
varXC = None
mtdXC = dbBasin.io.extractVarMtd(varXC)
# varY = ['runoff']+codeSel
varY = codeSel
# mtdY = ['QT']
mtdY = dbBasin.io.extractVarMtd(varY)

varYC = None
mtdYC = dbBasin.io.extractVarMtd(varYC)
trainSet = 'rmYr5'
testSet = 'pkYr5'

d1 = dbBasin.DataModelBasin(DF,
                            subset=trainSet,
                            varX=varX,
                            varY=varY,
                            varXC=varXC,
                            varYC=varYC)
d1.trans(mtdX=mtdX, mtdXC=mtdXC, mtdY=mtdY, mtdYC=mtdYC)
dataLst = d1.getData()
dataLst = trainBasin.dealNaN(dataLst, [1, 1, 0, 0])

# train
importlib.reload(test)
sizeLst = trainBasin.getSize(dataLst)
[nx, nxc, ny, nyc, nt, ns] = sizeLst
model = test.LSTM(nx + nxc, ny + nyc, 256).cuda()
lossFun = crit.RmseLoss().cuda()
optim = torch.optim.Adadelta(model.parameters())

rho = 365
Example #9
0
dataName = 'weathering'
freq = 'D'
# DM = dbBasin.DataFrameBasin.new(
#     dataName, siteNoLst, sdStr=sd, edStr=ed, freq=freq)
DF = dbBasin.DataFrameBasin(dataName)

DF.saveSubset('B10', ed='2009-12-31')
DF.saveSubset('A10', sd='2010-01-01')
yrIn = np.arange(1985, 2020, 5).tolist()
t1 = dbBasin.func.pickByYear(DF.t, yrIn)
t2 = dbBasin.func.pickByYear(DF.t, yrIn, pick=False)
DF.createSubset('pkYr5', dateLst=t1)
DF.createSubset('rmYr5', dateLst=t2)

codeSel = ['00915', '00925', '00930', '00935', '00940', '00945', '00955']
d1 = dbBasin.DataModelBasin(DF, subset='rmYr5', varY=codeSel)
d2 = dbBasin.DataModelBasin(DF, subset='pkYr5', varY=codeSel)
print(type(DF) is hydroDL.data.dbBasin.DataModelBasin)

tempFolder = os.path.join(kPath.dirCode, 'temp')
mtdX = dbBasin.io.extractVarMtd(d1.v)

x = d1.X[:, :, 9]
dataIn = np.repeat(x[:, :, None], 6, axis=2)
mtdLst = ['norm', 'log-norm', 'stan', 'log-stan', 'QT', 'log-QT']

q = d1.X[:, :, -1]

# transIn
a = dataIn.copy()
b, dictTran = transform.transIn(a, mtdLst=mtdLst)
Example #10
0
importlib.reload(figplot)

dm = dbBasin.DataFrameBasin('weathering')

# subset
dm.saveSubset('B10', ed='2009-12-31')
dm.saveSubset('A10', sd='2010-01-01')

yrIn = np.arange(1985, 2020, 5).tolist()
t1 = dbBasin.func.pickByYear(dm.t, yrIn, pick=False)
t2 = dbBasin.func.pickByYear(dm.t, yrIn)
dm.createSubset('rmYr5', dateLst=t1)
dm.createSubset('pkYr5', dateLst=t2)

codeSel = ['00915', '00925', '00930', '00935', '00940', '00945', '00955']
d1 = dbBasin.DataModelBasin(dm, varY=codeSel, subset='rmYr5')
d2 = dbBasin.DataModelBasin(dm, varY=codeSel, subset='pkYr5')

mtdY = ['QT' for var in codeSel]
d1.trans(mtdY=mtdY)
d1.saveStat('temp')
# d2.borrowStat(d1)
d2.loadStat('temp')
yy = d2.y
yP = d2.transOutY(yy)
yO = d2.Y

# TS
indS = 1
fig, axes = figplot.multiTS(d1.t, [yO[:, indS, :], yP[:, indS, :]])
fig.show()