Beispiel #1
0
def trainModelTS(outName):
    outFolder = nameFolder(outName)
    dictP = loadMaster(outName)

    # load data
    wqData = waterQuality.DataModelWQ(dictP['dataName'])
    varTup = (dictP['varX'], dictP['varXC'], dictP['varY'], dictP['varYC'])
    dataTup, statTup = wqData.transIn(subset=dictP['trainName'], varTup=varTup)
    dataTup = trainTS.dealNaN(dataTup, dictP['optNaN'])
    wrapStat(outName, statTup)

    # train model
    [nx, nxc, ny, nyc, nt, ns] = trainTS.getSize(dataTup)
    if dictP['modelName'] == 'CudnnLSTM':
        model = rnn.CudnnLstmModel(nx=nx + nxc,
                                   ny=ny + nyc,
                                   hiddenSize=dictP['hiddenSize'])
    lossFun = crit.RmseLoss()
    if torch.cuda.is_available():
        lossFun = lossFun.cuda()
        model = model.cuda()
    optim = torch.optim.Adadelta(model.parameters())
    lossLst = list()
    nEp = dictP['nEpoch']
    sEp = dictP['saveEpoch']
    logFile = os.path.join(outFolder, 'log')
    if os.path.exists(logFile):
        os.remove(logFile)
    for k in range(0, nEp, sEp):
        model, optim, lossEp = trainTS.trainModel(dataTup,
                                                  model,
                                                  lossFun,
                                                  optim,
                                                  batchSize=dictP['batchSize'],
                                                  nEp=sEp,
                                                  cEp=k,
                                                  logFile=logFile)
        # save model
        saveModel(outName, k + sEp, model, optim=optim)
        lossLst = lossLst + lossEp

    lossFile = os.path.join(outFolder, 'loss.csv')
    pd.DataFrame(lossLst).to_csv(lossFile, index=False, header=False)
Beispiel #2
0
def testModel(outName, testset, wqData=None, ep=None, reTest=False):
    # load master
    master = loadMaster(outName)
    if ep is None:
        ep = master['nEpoch']
    outFolder = nameFolder(outName)
    testFileName = 'testP-{}-Ep{}.npz'.format(testset, ep)
    testFile = os.path.join(outFolder, testFileName)

    if os.path.exists(testFile) and reTest is False:
        print('load saved test result')
        npz = np.load(testFile, allow_pickle=True)
        yP = npz['yP']
        ycP = npz['ycP']
    else:
        statTup = loadStat(outName)
        model = loadModel(outName, ep=ep)
        # load test data
        if wqData is None:
            wqData = waterQuality.DataModelWQ(master['dataName'])
        varTup = (master['varX'], master['varXC'], master['varY'],
                  master['varYC'])
        testDataLst = wqData.transIn(subset=testset,
                                     statTup=statTup,
                                     varTup=varTup)
        sizeLst = trainTS.getSize(testDataLst)
        testDataLst = trainTS.dealNaN(testDataLst, master['optNaN'])
        x = testDataLst[0]
        xc = testDataLst[1]
        ny = sizeLst[2]
        # test model - point by point
        yOut, ycOut = trainTS.testModel(model, x, xc, ny)
        yP = wqData.transOut(yOut, statTup[2], master['varY'])
        ycP = wqData.transOut(ycOut, statTup[3], master['varYC'])
        np.savez(testFile, yP=yP, ycP=ycP)
    return yP, ycP
Beispiel #3
0
def testModel(outName, testset, wqData=None, ep=None, reTest=False):
    # load master
    master = loadMaster(outName)
    if master['crit'] == 'SigmaLoss':
        doSigma = True
    else:
        doSigma = False

    if ep is None:
        ep = master['nEpoch']
    outFolder = nameFolder(outName)
    testFileName = 'testP-{}-Ep{}.npz'.format(testset, ep)
    testFile = os.path.join(outFolder, testFileName)

    if os.path.exists(testFile) and reTest is False:
        print('load saved test result')
        npz = np.load(testFile, allow_pickle=True)
        yP = npz['yP']
        ycP = npz['ycP']
        if doSigma:
            sP = npz['sP']
            scP = npz['scP']
    else:
        statTup = loadStat(outName)
        model = loadModel(outName, ep=ep)
        # load test data
        if wqData is None:
            wqData = waterQuality.DataModelWQ(master['dataName'])
        varTup = (master['varX'], master['varXC'], master['varY'],
                  master['varYC'])
        testDataLst = wqData.transIn(subset=testset,
                                     statTup=statTup,
                                     varTup=varTup)
        sizeLst = trainTS.getSize(testDataLst)
        if master['optNaN'] == [2, 2, 0, 0]:
            master['optNaN'] = [0, 0, 0, 0]
        testDataLst = trainTS.dealNaN(testDataLst, master['optNaN'])
        x = testDataLst[0]
        xc = testDataLst[1]
        ny = sizeLst[2]
        if not doSigma:
            # test model - point by point
            yOut, ycOut = trainTS.testModel(model, x, xc, ny)
            yP = wqData.transOut(yOut, statTup[2], master['varY'])
            ycP = wqData.transOut(ycOut, statTup[3], master['varYC'])
            np.savez(testFile, yP=yP, ycP=ycP)
        else:
            print('sigma model')
            ny = ny * 2
            yOut, ycOut = trainTS.testModel(model, x, xc, ny)
            yP = wqData.transOut(yOut[:, :, ::2], statTup[2], master['varY'])
            sP = wqData.transOut(np.sqrt(np.exp(yOut[:, :, 1::2])), statTup[2],
                                 master['varY'])
            ycP = wqData.transOut(ycOut[:, ::2], statTup[3], master['varYC'])
            scP = wqData.transOut(np.sqrt(np.exp(ycOut[:, 1::2])), statTup[3],
                                  master['varYC'])
            np.savez(testFile, yP=yP, ycP=ycP, sP=sP, scP=scP)
    if doSigma:
        return yP, ycP, sP, scP
    else:
        return yP, ycP
Beispiel #4
0
def trainModelTS(outName):
    outFolder = nameFolder(outName)
    dictP = loadMaster(outName)

    # load data
    rmFlag = dictP['rmFlag'] if 'rmFlag' in dictP else False
    wqData = waterQuality.DataModelWQ(dictP['dataName'], rmFlag)
    varTup = (dictP['varX'], dictP['varXC'], dictP['varY'], dictP['varYC'])
    dataTup, statTup = wqData.transIn(subset=dictP['trainName'], varTup=varTup)
    dataTup = trainTS.dealNaN(dataTup, dictP['optNaN'])
    wrapStat(outName, statTup)

    # train model
    [nx, nxc, ny, nyc, nt, ns] = trainTS.getSize(dataTup)
    # define loss
    if dictP['crit'] == 'RmseLoss':
        lossFun = crit.RmseLoss()
    elif dictP['crit'] == 'RmseLoss2D':
        lossFun = crit.RmseLoss2D()
    elif dictP['crit'] == 'SigmaLoss':
        lossFun = crit.SigmaLoss()
        ny = ny * 2
        nyc = nyc * 2
    else:
        raise RuntimeError('loss function not specified')
    # define model
    if dictP['modelName'] == 'CudnnLSTM':
        model = rnn.CudnnLstmModel(nx=nx + nxc,
                                   ny=ny + nyc,
                                   hiddenSize=dictP['hiddenSize'])
    elif dictP['modelName'] == 'LstmModel':
        model = rnn.LstmModel(nx=nx + nxc,
                              ny=ny + nyc,
                              hiddenSize=dictP['hiddenSize'])
    elif dictP['modelName'] == 'AgeLSTM':
        model = rnn.AgeLSTM2(nx=nx + nxc,
                             ny=ny,
                             nyc=nyc,
                             rho=365,
                             nh=dictP['hiddenSize'])
    else:
        raise RuntimeError('Model not specified')

    if torch.cuda.is_available():
        lossFun = lossFun.cuda()
        model = model.cuda()

    if dictP['optim'] == 'AdaDelta':
        optim = torch.optim.Adadelta(model.parameters())
    else:
        raise RuntimeError('optimizor function not specified')

    lossLst = list()
    nEp = dictP['nEpoch']
    sEp = dictP['saveEpoch']
    logFile = os.path.join(outFolder, 'log')
    if os.path.exists(logFile):
        os.remove(logFile)
    for k in range(0, nEp, sEp):
        model, optim, lossEp = trainTS.trainModel(dataTup,
                                                  model,
                                                  lossFun,
                                                  optim,
                                                  batchSize=dictP['batchSize'],
                                                  nEp=sEp,
                                                  cEp=k,
                                                  logFile=logFile)
        # save model
        saveModel(outName, k + sEp, model, optim=optim)
        lossLst = lossLst + lossEp

    lossFile = os.path.join(outFolder, 'loss.csv')
    pd.DataFrame(lossLst).to_csv(lossFile, index=False, header=False)
Beispiel #5
0
label = 'plain'
trainSet = '{}-Y1'.format(code)
testSet = '{}-Y2'.format(code)
outName = '{}-{}-{}-{}'.format(dataName, code, label, trainSet)

outFolder = basins.nameFolder(outName)
dictP = basins.loadMaster(outName)

# load data
rmFlag = dictP['rmFlag'] if 'rmFlag' in dictP else False
wqData = waterQuality.DataModelWQ(dictP['dataName'], rmFlag)
varTup = (dictP['varX'], dictP['varXC'], dictP['varY'], dictP['varYC'])
dataTup, statTup = wqData.transIn(subset=dictP['trainName'], varTup=varTup)
dataTup = trainTS.dealNaN(dataTup, dictP['optNaN'])
# wrapStat(outName, statTup)
[nx, nxc, ny, nyc, nt, ns] = trainTS.getSize(dataTup)
model = basins.loadModel(outName, ep=500)

lossFun = crit.RmseLoss()
lossFun = lossFun.cuda()
model = model.cuda()

# training parts
dataLst = dataTup
sizeLst = trainTS.getSize(dataLst)
[nx, nxc, ny, nyc, nt, ns] = sizeLst
rho, nbatch = dictP['batchSize']
rho = nt
batchSize = [rho, nbatch]
xT, yT = trainTS.subsetRandom(dataLst, batchSize, sizeLst)
yP = model(xT)
Beispiel #6
0
siteNo = '07060710'
codeLst = ['00660', '00600']
# codeLst = ['00915', '00955']
nh = 256
batchSize = [365, 50]
# if not waterQuality.exist(siteNo):
#     wqData = waterQuality.DataModelWQ.new(siteNo, [siteNo])
wqData = waterQuality.DataModelWQ(siteNo, rmFlag=False)
varX = wqData.varF
varXC = wqData.varG
varY = [wqData.varQ[0]]
varYC = codeLst
varTup = (varX, varXC, varY, varYC)
dataTup, statTup = wqData.transIn(varTup=varTup)
dataTup = trainTS.dealNaN(dataTup, [1, 1, 0, 0])
sizeLst = trainTS.getSize(dataTup)
[nx, nxc, ny, nyc, nt, ns] = sizeLst

tabG = gageII.readData(varLst=varXC, siteNoLst=[siteNo])
tabG = gageII.updateCode(tabG)
dfX = waterQuality.readSiteX(siteNo, varX, nFill=5)
dfY = waterQuality.readSiteY(siteNo, varY)
dfYC = waterQuality.readSiteY(siteNo, varYC)

importlib.reload(rnn)
model = rnn.AgeLSTM(nx=nx + nxc, ny=ny, nyc=nyc, nh=nh)
optim = torch.optim.Adadelta(model.parameters())
lossFun = crit.RmseMix()
if torch.cuda.is_available():
    lossFun = lossFun.cuda()
    model = model.cuda()
Beispiel #7
0
varYC = None

varTup = (varX, varXC, varY, varYC)

# dataTup = wqData.extractData(varTup=varTup)
# xR, xcR, yR, ycR = dataTup
# mtdX = ['log-norm', 'norm', 'norm', 'norm', 'norm', 'norm', 'norm']
# x, statX = transform.transInAll(xR, mtdX)

dataTup, statTup = wqData.transIn(varTup=varTup)
(x, xc, y, yc) = dataTup
dataTup = trainTS.dealNaN(dataTup, [1, 1, 0, 0])
(statX, statXC, statY, statYC) = statTup

# concatenate all data
[nx, nxc, ny, nyc, nt, ns] = trainTS.getSize(dataTup)
xx = np.zeros([ns, nt, nx + nxc])
for k in range(ns):
    xTemp = dataTup[0][:, k, :]
    xcTemp = dataTup[1][k, :]
    temp = np.concatenate([xTemp, np.tile(xcTemp, [365, 1])], axis=-1)
    xx[k, :, :] = temp
xT = torch.from_numpy(xx).float().cuda()
yy = np.swapaxes(dataTup[2], 0, 1)
yT = torch.from_numpy(yy).float().cuda()

# xT = xT[0:1, :, :]
# yT = yT[0:1, :, :]

# train model
model = rnn.CudnnLstmModel(nx=nx + nxc, ny=ny + nyc, hiddenSize=128)