def testWRTDS(dataName, trainSet, testSet, codeLst): DF = dbBasin.DataFrameBasin(dataName) # Calculate WRTDS from train and test set varX = ['00060'] varY = codeLst d1 = dbBasin.DataModelBasin(DF, subset=trainSet, varX=varX, varY=varY) d2 = dbBasin.DataModelBasin(DF, subset=testSet, varX=varX, varY=varY) tt1 = pd.to_datetime(d1.t) yr1 = tt1.year.values t1 = yr1 + tt1.dayofyear.values / 365 sinT1 = np.sin(2 * np.pi * t1) cosT1 = np.cos(2 * np.pi * t1) tt2 = pd.to_datetime(d2.t) yr2 = tt2.year.values t2 = yr2 + tt2.dayofyear.values / 365 sinT2 = np.sin(2 * np.pi * t2) cosT2 = np.cos(2 * np.pi * t2) ### yOut = np.full([len(d2.t), len(d2.siteNoLst), len(varY)], np.nan) t0 = time.time() for indS, siteNo in enumerate(d2.siteNoLst): for indC, code in enumerate(varY): print('{} {} {} {}'.format(indS, siteNo, code, time.time() - t0)) y1 = d1.Y[:, indS, indC].copy() q1 = d1.X[:, indS, 0].copy() q1[q1 < 0] = 0 logq1 = np.log(q1 + sn) x1 = np.stack([logq1, yr1, sinT1, cosT1]).T y2 = d2.Y[:, indS, indC].copy() q2 = d2.X[:, indS, 0].copy() q2[q2 < 0] = 0 logq2 = np.log(q2 + sn) x2 = np.stack([logq2, yr2, sinT2, cosT2]).T [xx1, yy1], ind1 = utils.rmNan([x1, y1]) if testSet == 'all': [xx2], ind2 = utils.rmNan([x2]) else: [xx2, yy2], ind2 = utils.rmNan([x2, y2]) if len(ind1) < 40: continue for k in ind2: dY = np.abs(t2[k] - t1[ind1]) dQ = np.abs(logq2[k] - logq1[ind1]) dS = np.min(np.stack( [abs(np.ceil(dY) - dY), abs(dY - np.floor(dY))]), axis=0) d = np.stack([dY, dQ, dS]) ww, ind = calWeight(d) model = sm.WLS(yy1[ind], xx1[ind], weights=ww).fit() yp = model.predict(x2[k, :])[0] yOut[k, indS, indC] = yp return yOut
def trainModel(outName): outFolder = nameFolder(outName) dictP = loadMaster(outName) # load data DF = dbBasin.DataFrameBasin(dictP['dataName']) dictVar = {k: dictP[k] for k in ('varX', 'varXC', 'varY', 'varYC')} DM = dbBasin.DataModelBasin(DF, subset=dictP['trainSet'], **dictVar) if dictP['borrowStat'] is not None: DM.loadStat(dictP['borrowStat']) DM.trans(mtdX=dictP['mtdX'], mtdXC=dictP['mtdXC'], mtdY=dictP['mtdY'], mtdYC=dictP['mtdYC']) DM.saveStat(outFolder) dataTup = DM.getData() dataTup = trainBasin.dealNaN(dataTup, dictP['optNaN']) # define loss lossFun = getattr(crit, dictP['crit'])() # define model model = defineModel(dataTup, dictP) if torch.cuda.is_available(): lossFun = lossFun.cuda() model = model.cuda() if dictP['optim'] == 'AdaDelta': optim = torch.optim.Adadelta(model.parameters()) else: raise RuntimeError('optimizor function not specified') lossLst = list() nEp = dictP['nEpoch'] sEp = dictP['saveEpoch'] logFile = os.path.join(outFolder, 'log') if os.path.exists(logFile): os.remove(logFile) for k in range(0, nEp, sEp): model, optim, lossEp = trainBasin.trainModel( dataTup, model, lossFun, optim, batchSize=dictP['batchSize'], nEp=sEp, cEp=k, logFile=logFile, optBatch=dictP['optBatch'], nIterEp=dictP['nIterEp']) # save model saveModelState(outName, k+sEp, model, optim=optim) lossLst = lossLst+lossEp lossFile = os.path.join(outFolder, 'loss.csv') pd.DataFrame(lossLst).to_csv(lossFile, index=False, header=False)
def testModel(outName, DF=None, testSet='all', ep=None, reTest=False, batchSize=20): # load master dictP = loadMaster(outName) if ep is None: ep = dictP['nEpoch'] outFolder = nameFolder(outName) testFileName = 'testP-{}-Ep{}.npz'.format(testSet, ep) testFile = os.path.join(outFolder, testFileName) if os.path.exists(testFile) and reTest is False: print('load saved test result') npz = np.load(testFile, allow_pickle=True) yP = npz['yP'] ycP = npz['ycP'] else: # load test data if DF is None: DF = dbBasin.DataFrameBasin(dictP['dataName']) dictVar = {k: dictP[k] for k in ('varX', 'varXC', 'varY', 'varYC')} DM = dbBasin.DataModelBasin(DF, subset=testSet, **dictVar) DM.loadStat(outFolder) dataTup = DM.getData() dataTup = trainBasin.dealNaN(dataTup, dictP['optNaN']) model = defineModel(dataTup, dictP) model = loadModelState(outName, ep, model) # test x = dataTup[0] xc = dataTup[1] ny = np.shape(dataTup[2])[2] # test model - point by point yOut, ycOut = trainBasin.testModel( model, x, xc, ny, batchSize=batchSize) yP = DM.transOutY(yOut) ycP = DM.transOutYC(ycOut) np.savez(testFile, yP=yP, ycP=ycP) return yP, ycP
# pick by year yrIn = np.arange(1985, 2020, 5).tolist() t1 = dbBasin.func.pickByYear(DF.t, yrIn) t2 = dbBasin.func.pickByYear(DF.t, yrIn, pick=False) DF.createSubset('pkYr5', dateLst=t1) DF.createSubset('rmYr5', dateLst=t2) # pick by day t1 = dbBasin.func.pickByDay(DF.t, dBase=5, dSel=1) t2 = dbBasin.func.pickByDay(DF.t, dBase=5, dSel=1, pick=False) DF.createSubset('pkD5', dateLst=t1) DF.createSubset('rmD5', dateLst=t2) # pick by random t1 = dbBasin.func.pickRandT(DF.t, 0.2) t2 = dbBasin.func.pickRandT(DF.t, 0.2, pick=False) DF.createSubset('pkRT20', dateLst=t1) DF.createSubset('rmRT20', dateLst=t2) # plot codeSel = ['00915', '00925', '00930', '00935', '00940', '00945', '00955'] d1 = dbBasin.DataModelBasin(DF, subset='pkR20', varY=codeSel) d2 = dbBasin.DataModelBasin(DF, subset='rmR20', varY=codeSel) k = 0 fig, axes = figplot.multiTS(d2.t, [d2.Y[:, k, :], d1.Y[:, k, :]], cLst='br', styLst='..') fig.show()
yOut1 = np.ndarray(yP1.shape) yOut2 = np.ndarray(yP2.shape) for k, code in enumerate(codeLst): m = DF.g[:, DF.varG.index(code + '-M')] s = DF.g[:, DF.varG.index(code + '-S')] yOut1[:, :, k] = yP1[:, :, k] * s + m yOut2[:, :, k] = yP2[:, :, k] * s + m # WRTDS # yW = WRTDS.testWRTDS(dataName, trainSet, testSet, codeLst) dirRoot = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS-dbBasin') fileName = '{}-{}-{}'.format(dataName, trainSet, testSet) yW = np.load(os.path.join(dirRoot, fileName) + '.npz')['arr_0'] # correlation matrix d1 = dbBasin.DataModelBasin(DF, subset=trainSet, varY=codeLst) d2 = dbBasin.DataModelBasin(DF, subset=testSet, varY=codeLst) siteNoLst = DF.siteNoLst mat1 = np.full([len(siteNoLst), len(codeLst), 4], np.nan) mat2 = np.full([len(siteNoLst), len(codeLst), 4], np.nan) mat3 = np.full([len(siteNoLst), len(codeLst), 4], np.nan) for indS, siteNo in enumerate(siteNoLst): for indC, code in enumerate(codeLst): n1 = np.sum(~np.isnan(d1.Y[:, indS, indC]), axis=0) n2 = np.sum(~np.isnan(d2.Y[:, indS, indC]), axis=0) if n1 >= 160 and n2 >= 40: stat1 = utils.stat.calStat(yOut1[:, indS, indC], d2.Y[:, indS, indC]) stat2 = utils.stat.calStat(yOut2[:, indS, indC], d2.Y[:, indS, indC]) stat3 = utils.stat.calStat(yW[:, indS, indC], d2.Y[:, indS, indC])
outName = 'weathering-FPR2QC-t365-B10' ep = 100 # save outFolder = basinFull.nameFolder(outName) modelFile = os.path.join(outFolder, 'model_ep{}'.format(ep)) model = torch.load(modelFile) modelStateFile = os.path.join(outFolder, 'modelState_ep{}'.format(ep)) torch.save(model.state_dict(), modelStateFile) # load dictP = basinFull.loadMaster(outName) DF = dbBasin.DataFrameBasin(dictP['dataName']) dictVar = {k: dictP[k] for k in ('varX', 'varXC', 'varY', 'varYC')} DM = dbBasin.DataModelBasin(DF, subset='A10', **dictVar) DM.loadStat(outFolder) dataTup = DM.getData() [nx, nxc, ny, nyc, nt, ns] = trainBasin.getSize(dataTup) dataTup = trainBasin.dealNaN(dataTup, dictP['optNaN']) if dictP['modelName'] == 'CudnnLSTM': model = rnn.CudnnLstmModel(nx=nx + nxc, ny=ny + nyc, hiddenSize=dictP['hiddenSize']) elif dictP['modelName'] == 'LstmModel': model = rnn.LstmModel(nx=nx + nxc, ny=ny + nyc, hiddenSize=dictP['hiddenSize']) else: raise RuntimeError('Model not specified') model.load_state_dict(torch.load(modelStateFile))
dictSiteName = 'dictWeathering.json' with open(os.path.join(dirSel, dictSiteName)) as f: dictSite = json.load(f) siteNoLst = dictSite['k12'] dataName = 'weathering' DF = dbBasin.DataFrameBasin(dataName) trainSet = 'rmYr5' testSet = 'pkYr5' # input codeSel = ['00915', '00925', '00930', '00935', '00940', '00945', '00955'] subset = trainSet DF = dbBasin.localNorm(DF, subset=trainSet) DM = dbBasin.DataModelBasin(DF) # plot code = '00915' nfy, nfx = (4, 3) bins = 20 data0 = DF.c data1 = DF.extractSubset(DF.c, subsetName=trainSet) data2 = DF.extractSubset(DF.c, subsetName=testSet) dataLst = [data0, data1, data2] titleLst = ['all', 'train', 'test'] for data, title in zip(dataLst, titleLst): fig, axes = plt.subplots(nfy, nfx) for k, siteNo in enumerate(DF.siteNoLst): j, i = utils.index2d(k, nfy, nfx) ax = axes[j, i]
mtdX = dbBasin.io.extractVarMtd(varX) varXC = None mtdXC = dbBasin.io.extractVarMtd(varXC) # varY = ['runoff']+codeSel varY = codeSel # mtdY = ['QT'] mtdY = dbBasin.io.extractVarMtd(varY) varYC = None mtdYC = dbBasin.io.extractVarMtd(varYC) trainSet = 'rmYr5' testSet = 'pkYr5' d1 = dbBasin.DataModelBasin(DF, subset=trainSet, varX=varX, varY=varY, varXC=varXC, varYC=varYC) d1.trans(mtdX=mtdX, mtdXC=mtdXC, mtdY=mtdY, mtdYC=mtdYC) dataLst = d1.getData() dataLst = trainBasin.dealNaN(dataLst, [1, 1, 0, 0]) # train importlib.reload(test) sizeLst = trainBasin.getSize(dataLst) [nx, nxc, ny, nyc, nt, ns] = sizeLst model = test.LSTM(nx + nxc, ny + nyc, 256).cuda() lossFun = crit.RmseLoss().cuda() optim = torch.optim.Adadelta(model.parameters()) rho = 365
dataName = 'weathering' freq = 'D' # DM = dbBasin.DataFrameBasin.new( # dataName, siteNoLst, sdStr=sd, edStr=ed, freq=freq) DF = dbBasin.DataFrameBasin(dataName) DF.saveSubset('B10', ed='2009-12-31') DF.saveSubset('A10', sd='2010-01-01') yrIn = np.arange(1985, 2020, 5).tolist() t1 = dbBasin.func.pickByYear(DF.t, yrIn) t2 = dbBasin.func.pickByYear(DF.t, yrIn, pick=False) DF.createSubset('pkYr5', dateLst=t1) DF.createSubset('rmYr5', dateLst=t2) codeSel = ['00915', '00925', '00930', '00935', '00940', '00945', '00955'] d1 = dbBasin.DataModelBasin(DF, subset='rmYr5', varY=codeSel) d2 = dbBasin.DataModelBasin(DF, subset='pkYr5', varY=codeSel) print(type(DF) is hydroDL.data.dbBasin.DataModelBasin) tempFolder = os.path.join(kPath.dirCode, 'temp') mtdX = dbBasin.io.extractVarMtd(d1.v) x = d1.X[:, :, 9] dataIn = np.repeat(x[:, :, None], 6, axis=2) mtdLst = ['norm', 'log-norm', 'stan', 'log-stan', 'QT', 'log-QT'] q = d1.X[:, :, -1] # transIn a = dataIn.copy() b, dictTran = transform.transIn(a, mtdLst=mtdLst)
importlib.reload(figplot) dm = dbBasin.DataFrameBasin('weathering') # subset dm.saveSubset('B10', ed='2009-12-31') dm.saveSubset('A10', sd='2010-01-01') yrIn = np.arange(1985, 2020, 5).tolist() t1 = dbBasin.func.pickByYear(dm.t, yrIn, pick=False) t2 = dbBasin.func.pickByYear(dm.t, yrIn) dm.createSubset('rmYr5', dateLst=t1) dm.createSubset('pkYr5', dateLst=t2) codeSel = ['00915', '00925', '00930', '00935', '00940', '00945', '00955'] d1 = dbBasin.DataModelBasin(dm, varY=codeSel, subset='rmYr5') d2 = dbBasin.DataModelBasin(dm, varY=codeSel, subset='pkYr5') mtdY = ['QT' for var in codeSel] d1.trans(mtdY=mtdY) d1.saveStat('temp') # d2.borrowStat(d1) d2.loadStat('temp') yy = d2.y yP = d2.transOutY(yy) yO = d2.Y # TS indS = 1 fig, axes = figplot.multiTS(d1.t, [yO[:, indS, :], yP[:, indS, :]]) fig.show()