def trainModelTS(outName): outFolder = nameFolder(outName) dictP = loadMaster(outName) # load data wqData = waterQuality.DataModelWQ(dictP['dataName']) varTup = (dictP['varX'], dictP['varXC'], dictP['varY'], dictP['varYC']) dataTup, statTup = wqData.transIn(subset=dictP['trainName'], varTup=varTup) dataTup = trainTS.dealNaN(dataTup, dictP['optNaN']) wrapStat(outName, statTup) # train model [nx, nxc, ny, nyc, nt, ns] = trainTS.getSize(dataTup) if dictP['modelName'] == 'CudnnLSTM': model = rnn.CudnnLstmModel(nx=nx + nxc, ny=ny + nyc, hiddenSize=dictP['hiddenSize']) lossFun = crit.RmseLoss() if torch.cuda.is_available(): lossFun = lossFun.cuda() model = model.cuda() optim = torch.optim.Adadelta(model.parameters()) lossLst = list() nEp = dictP['nEpoch'] sEp = dictP['saveEpoch'] logFile = os.path.join(outFolder, 'log') if os.path.exists(logFile): os.remove(logFile) for k in range(0, nEp, sEp): model, optim, lossEp = trainTS.trainModel(dataTup, model, lossFun, optim, batchSize=dictP['batchSize'], nEp=sEp, cEp=k, logFile=logFile) # save model saveModel(outName, k + sEp, model, optim=optim) lossLst = lossLst + lossEp lossFile = os.path.join(outFolder, 'loss.csv') pd.DataFrame(lossLst).to_csv(lossFile, index=False, header=False)
def testModel(outName, testset, wqData=None, ep=None, reTest=False): # load master master = loadMaster(outName) if ep is None: ep = master['nEpoch'] outFolder = nameFolder(outName) testFileName = 'testP-{}-Ep{}.npz'.format(testset, ep) testFile = os.path.join(outFolder, testFileName) if os.path.exists(testFile) and reTest is False: print('load saved test result') npz = np.load(testFile, allow_pickle=True) yP = npz['yP'] ycP = npz['ycP'] else: statTup = loadStat(outName) model = loadModel(outName, ep=ep) # load test data if wqData is None: wqData = waterQuality.DataModelWQ(master['dataName']) varTup = (master['varX'], master['varXC'], master['varY'], master['varYC']) testDataLst = wqData.transIn(subset=testset, statTup=statTup, varTup=varTup) sizeLst = trainTS.getSize(testDataLst) testDataLst = trainTS.dealNaN(testDataLst, master['optNaN']) x = testDataLst[0] xc = testDataLst[1] ny = sizeLst[2] # test model - point by point yOut, ycOut = trainTS.testModel(model, x, xc, ny) yP = wqData.transOut(yOut, statTup[2], master['varY']) ycP = wqData.transOut(ycOut, statTup[3], master['varYC']) np.savez(testFile, yP=yP, ycP=ycP) return yP, ycP
def testModel(outName, testset, wqData=None, ep=None, reTest=False): # load master master = loadMaster(outName) if master['crit'] == 'SigmaLoss': doSigma = True else: doSigma = False if ep is None: ep = master['nEpoch'] outFolder = nameFolder(outName) testFileName = 'testP-{}-Ep{}.npz'.format(testset, ep) testFile = os.path.join(outFolder, testFileName) if os.path.exists(testFile) and reTest is False: print('load saved test result') npz = np.load(testFile, allow_pickle=True) yP = npz['yP'] ycP = npz['ycP'] if doSigma: sP = npz['sP'] scP = npz['scP'] else: statTup = loadStat(outName) model = loadModel(outName, ep=ep) # load test data if wqData is None: wqData = waterQuality.DataModelWQ(master['dataName']) varTup = (master['varX'], master['varXC'], master['varY'], master['varYC']) testDataLst = wqData.transIn(subset=testset, statTup=statTup, varTup=varTup) sizeLst = trainTS.getSize(testDataLst) if master['optNaN'] == [2, 2, 0, 0]: master['optNaN'] = [0, 0, 0, 0] testDataLst = trainTS.dealNaN(testDataLst, master['optNaN']) x = testDataLst[0] xc = testDataLst[1] ny = sizeLst[2] if not doSigma: # test model - point by point yOut, ycOut = trainTS.testModel(model, x, xc, ny) yP = wqData.transOut(yOut, statTup[2], master['varY']) ycP = wqData.transOut(ycOut, statTup[3], master['varYC']) np.savez(testFile, yP=yP, ycP=ycP) else: print('sigma model') ny = ny * 2 yOut, ycOut = trainTS.testModel(model, x, xc, ny) yP = wqData.transOut(yOut[:, :, ::2], statTup[2], master['varY']) sP = wqData.transOut(np.sqrt(np.exp(yOut[:, :, 1::2])), statTup[2], master['varY']) ycP = wqData.transOut(ycOut[:, ::2], statTup[3], master['varYC']) scP = wqData.transOut(np.sqrt(np.exp(ycOut[:, 1::2])), statTup[3], master['varYC']) np.savez(testFile, yP=yP, ycP=ycP, sP=sP, scP=scP) if doSigma: return yP, ycP, sP, scP else: return yP, ycP
def trainModelTS(outName): outFolder = nameFolder(outName) dictP = loadMaster(outName) # load data rmFlag = dictP['rmFlag'] if 'rmFlag' in dictP else False wqData = waterQuality.DataModelWQ(dictP['dataName'], rmFlag) varTup = (dictP['varX'], dictP['varXC'], dictP['varY'], dictP['varYC']) dataTup, statTup = wqData.transIn(subset=dictP['trainName'], varTup=varTup) dataTup = trainTS.dealNaN(dataTup, dictP['optNaN']) wrapStat(outName, statTup) # train model [nx, nxc, ny, nyc, nt, ns] = trainTS.getSize(dataTup) # define loss if dictP['crit'] == 'RmseLoss': lossFun = crit.RmseLoss() elif dictP['crit'] == 'RmseLoss2D': lossFun = crit.RmseLoss2D() elif dictP['crit'] == 'SigmaLoss': lossFun = crit.SigmaLoss() ny = ny * 2 nyc = nyc * 2 else: raise RuntimeError('loss function not specified') # define model if dictP['modelName'] == 'CudnnLSTM': model = rnn.CudnnLstmModel(nx=nx + nxc, ny=ny + nyc, hiddenSize=dictP['hiddenSize']) elif dictP['modelName'] == 'LstmModel': model = rnn.LstmModel(nx=nx + nxc, ny=ny + nyc, hiddenSize=dictP['hiddenSize']) elif dictP['modelName'] == 'AgeLSTM': model = rnn.AgeLSTM2(nx=nx + nxc, ny=ny, nyc=nyc, rho=365, nh=dictP['hiddenSize']) else: raise RuntimeError('Model not specified') if torch.cuda.is_available(): lossFun = lossFun.cuda() model = model.cuda() if dictP['optim'] == 'AdaDelta': optim = torch.optim.Adadelta(model.parameters()) else: raise RuntimeError('optimizor function not specified') lossLst = list() nEp = dictP['nEpoch'] sEp = dictP['saveEpoch'] logFile = os.path.join(outFolder, 'log') if os.path.exists(logFile): os.remove(logFile) for k in range(0, nEp, sEp): model, optim, lossEp = trainTS.trainModel(dataTup, model, lossFun, optim, batchSize=dictP['batchSize'], nEp=sEp, cEp=k, logFile=logFile) # save model saveModel(outName, k + sEp, model, optim=optim) lossLst = lossLst + lossEp lossFile = os.path.join(outFolder, 'loss.csv') pd.DataFrame(lossLst).to_csv(lossFile, index=False, header=False)
label = 'plain' trainSet = '{}-Y1'.format(code) testSet = '{}-Y2'.format(code) outName = '{}-{}-{}-{}'.format(dataName, code, label, trainSet) outFolder = basins.nameFolder(outName) dictP = basins.loadMaster(outName) # load data rmFlag = dictP['rmFlag'] if 'rmFlag' in dictP else False wqData = waterQuality.DataModelWQ(dictP['dataName'], rmFlag) varTup = (dictP['varX'], dictP['varXC'], dictP['varY'], dictP['varYC']) dataTup, statTup = wqData.transIn(subset=dictP['trainName'], varTup=varTup) dataTup = trainTS.dealNaN(dataTup, dictP['optNaN']) # wrapStat(outName, statTup) [nx, nxc, ny, nyc, nt, ns] = trainTS.getSize(dataTup) model = basins.loadModel(outName, ep=500) lossFun = crit.RmseLoss() lossFun = lossFun.cuda() model = model.cuda() # training parts dataLst = dataTup sizeLst = trainTS.getSize(dataLst) [nx, nxc, ny, nyc, nt, ns] = sizeLst rho, nbatch = dictP['batchSize'] rho = nt batchSize = [rho, nbatch] xT, yT = trainTS.subsetRandom(dataLst, batchSize, sizeLst) yP = model(xT)
siteNo = '07060710' codeLst = ['00660', '00600'] # codeLst = ['00915', '00955'] nh = 256 batchSize = [365, 50] # if not waterQuality.exist(siteNo): # wqData = waterQuality.DataModelWQ.new(siteNo, [siteNo]) wqData = waterQuality.DataModelWQ(siteNo, rmFlag=False) varX = wqData.varF varXC = wqData.varG varY = [wqData.varQ[0]] varYC = codeLst varTup = (varX, varXC, varY, varYC) dataTup, statTup = wqData.transIn(varTup=varTup) dataTup = trainTS.dealNaN(dataTup, [1, 1, 0, 0]) sizeLst = trainTS.getSize(dataTup) [nx, nxc, ny, nyc, nt, ns] = sizeLst tabG = gageII.readData(varLst=varXC, siteNoLst=[siteNo]) tabG = gageII.updateCode(tabG) dfX = waterQuality.readSiteX(siteNo, varX, nFill=5) dfY = waterQuality.readSiteY(siteNo, varY) dfYC = waterQuality.readSiteY(siteNo, varYC) importlib.reload(rnn) model = rnn.AgeLSTM(nx=nx + nxc, ny=ny, nyc=nyc, nh=nh) optim = torch.optim.Adadelta(model.parameters()) lossFun = crit.RmseMix() if torch.cuda.is_available(): lossFun = lossFun.cuda() model = model.cuda()
varYC = None varTup = (varX, varXC, varY, varYC) # dataTup = wqData.extractData(varTup=varTup) # xR, xcR, yR, ycR = dataTup # mtdX = ['log-norm', 'norm', 'norm', 'norm', 'norm', 'norm', 'norm'] # x, statX = transform.transInAll(xR, mtdX) dataTup, statTup = wqData.transIn(varTup=varTup) (x, xc, y, yc) = dataTup dataTup = trainTS.dealNaN(dataTup, [1, 1, 0, 0]) (statX, statXC, statY, statYC) = statTup # concatenate all data [nx, nxc, ny, nyc, nt, ns] = trainTS.getSize(dataTup) xx = np.zeros([ns, nt, nx + nxc]) for k in range(ns): xTemp = dataTup[0][:, k, :] xcTemp = dataTup[1][k, :] temp = np.concatenate([xTemp, np.tile(xcTemp, [365, 1])], axis=-1) xx[k, :, :] = temp xT = torch.from_numpy(xx).float().cuda() yy = np.swapaxes(dataTup[2], 0, 1) yT = torch.from_numpy(yy).float().cuda() # xT = xT[0:1, :, :] # yT = yT[0:1, :, :] # train model model = rnn.CudnnLstmModel(nx=nx + nxc, ny=ny + nyc, hiddenSize=128)