def runModel(dfX, dfG): # test model xA = np.expand_dims(dfX.values, axis=1) xcA = np.expand_dims(dfG.values.astype(np.float), axis=0) mtdX = wqData.extractVarMtd(varX) x = transform.transInAll(xA, mtdX, statLst=statX) mtdXC = wqData.extractVarMtd(varXC) xc = transform.transInAll(xcA, mtdXC, statLst=statXC) yOut = trainTS.testModel(model, x, xc) # transfer out nt = len(dfX) ny = len(varY) if varY is not None else 0 nyc = len(varYC) if varYC is not None else 0 yP = np.full([nt, ny + nyc], np.nan) yP[:, :ny] = wqData.transOut(yOut[:, 0, :ny], statY, varY) yP[:, ny:] = wqData.transOut(yOut[:, 0, ny:], statYC, varYC) # save output t = dfX.index.values.astype('datetime64[D]') colY = [] if varY is None else varY colYC = [] if varYC is None else varYC dfOut = pd.DataFrame(data=yP, columns=colY + colYC, index=t) dfOut.index.name = 'date' dfOut = dfOut.reset_index() return dfOut
def testModel(outName, testset, wqData=None, ep=None, reTest=False): # load master master = loadMaster(outName) if ep is None: ep = master['nEpoch'] outFolder = nameFolder(outName) testFileName = 'testP-{}-Ep{}.npz'.format(testset, ep) testFile = os.path.join(outFolder, testFileName) if os.path.exists(testFile) and reTest is False: print('load saved test result') npz = np.load(testFile, allow_pickle=True) yP = npz['yP'] ycP = npz['ycP'] else: statTup = loadStat(outName) model = loadModel(outName, ep=ep) # load test data if wqData is None: wqData = waterQuality.DataModelWQ(master['dataName']) varTup = (master['varX'], master['varXC'], master['varY'], master['varYC']) testDataLst = wqData.transIn(subset=testset, statTup=statTup, varTup=varTup) sizeLst = trainTS.getSize(testDataLst) testDataLst = trainTS.dealNaN(testDataLst, master['optNaN']) x = testDataLst[0] xc = testDataLst[1] ny = sizeLst[2] # test model - point by point yOut, ycOut = trainTS.testModel(model, x, xc, ny) yP = wqData.transOut(yOut, statTup[2], master['varY']) ycP = wqData.transOut(ycOut, statTup[3], master['varYC']) np.savez(testFile, yP=yP, ycP=ycP) return yP, ycP
def testModelSeq(outName, siteNoLst, wqData=None, ep=None, returnOut=False, retest=False, sd=np.datetime64('1979-01-01'), ed=np.datetime64('2019-12-31')): # run sequence test for all sites, default to be from first date to last date if type(siteNoLst) is not list: siteNoLst = [siteNoLst] master = loadMaster(outName) if master['crit'] == 'SigmaLoss': doSigma = True else: doSigma = False if ep is None: ep = master['nEpoch'] outDir = nameFolder(outName) sdS = pd.to_datetime(sd).strftime('%Y%m%d') edS = pd.to_datetime(ed).strftime('%Y%m%d') saveDir = os.path.join(outDir, 'seq-{}-{}-ep{}'.format(sdS, edS, ep)) if not os.path.exists(saveDir): os.mkdir(saveDir) siteSaveLst = os.listdir(saveDir) if retest is True: sitePredLst = siteNoLst else: sitePredLst = [ siteNo for siteNo in siteNoLst if siteNo not in siteSaveLst ] if len(sitePredLst) != 0: if wqData is None: wqData = waterQuality.DataModelWQ(master['dataName']) (varX, varXC, varY, varYC) = (master['varX'], master['varXC'], master['varY'], master['varYC']) (statX, statXC, statY, statYC) = loadStat(outName) model = loadModel(outName, ep=ep) tabG = gageII.readData(varLst=varXC, siteNoLst=siteNoLst) tabG = gageII.updateCode(tabG) for siteNo in sitePredLst: if 'DRAIN_SQKM' in varXC: area = tabG[tabG.index == siteNo]['DRAIN_SQKM'].values[0] else: area = None # test model print('testing {} from {} to {}'.format(siteNo, sdS, edS)) freq = wqData.freq dfX = waterQuality.readSiteTS(siteNo, varX, freq=freq, area=area, sd=sd, ed=ed) # dfX = waterQuality.readSiteX( # siteNo, varX, sd=sd, ed=ed, area=area, nFill=5) xA = np.expand_dims(dfX.values, axis=1) xcA = np.expand_dims(tabG.loc[siteNo].values.astype(np.float), axis=0) mtdX = waterQuality.extractVarMtd(varX) x = transform.transInAll(xA, mtdX, statLst=statX) mtdXC = waterQuality.extractVarMtd(varXC) xc = transform.transInAll(xcA, mtdXC, statLst=statXC) [x, xc] = trainTS.dealNaN([x, xc], master['optNaN'][:2]) yOut = trainTS.testModel(model, x, xc) # transfer out nt = len(dfX) ny = len(varY) if varY is not None else 0 nyc = len(varYC) if varYC is not None else 0 if doSigma: yP = np.full([nt, ny + nyc], np.nan) sP = np.full([nt, ny + nyc], np.nan) yP[:, :ny] = wqData.transOut(yOut[:, 0, :ny * 2:2], statY, varY) yP[:, ny:] = wqData.transOut(yOut[:, 0, ny * 2::2], statYC, varYC) sP[:, :ny] = wqData.transOut( np.sqrt(np.exp(yOut[:, 0, 1:ny * 2:2])), statY, varY) sP[:, ny:] = wqData.transOut( np.sqrt(np.exp(yOut[:, 0, ny * 2 + 1::2])), statYC, varYC) else: yP = np.full([nt, ny + nyc], np.nan) yP[:, :ny] = wqData.transOut(yOut[:, 0, :ny], statY, varY) yP[:, ny:] = wqData.transOut(yOut[:, 0, ny:], statYC, varYC) # save output t = dfX.index.values.astype('datetime64[D]') colY = [] if varY is None else varY colYC = [] if varYC is None else varYC dfOut = pd.DataFrame(data=yP, columns=[colY + colYC], index=t) dfOut.index.name = 'date' dfOut = dfOut.reset_index() dfOut.to_csv(os.path.join(saveDir, siteNo), index=False) if doSigma: dfOutS = pd.DataFrame(data=sP, columns=[colY + colYC], index=t) dfOutS.index.name = 'date' dfOutS = dfOut.reset_index() dfOutS.to_csv(os.path.join(saveDir, siteNo + '_sigma'), index=False) # load all csv if returnOut: dictOut = dict() for siteNo in siteNoLst: # print('loading {} from {} to {}'.format(siteNo, sdS, edS)) dfOut = pd.read_csv(os.path.join(saveDir, siteNo)) dictOut[siteNo] = dfOut if doSigma: dfOut = pd.read_csv(os.path.join(saveDir, siteNo + '_sigma')) dictOut[siteNo + '_sigma'] = dfOut return dictOut
def testModel(outName, testset, wqData=None, ep=None, reTest=False): # load master master = loadMaster(outName) if master['crit'] == 'SigmaLoss': doSigma = True else: doSigma = False if ep is None: ep = master['nEpoch'] outFolder = nameFolder(outName) testFileName = 'testP-{}-Ep{}.npz'.format(testset, ep) testFile = os.path.join(outFolder, testFileName) if os.path.exists(testFile) and reTest is False: print('load saved test result') npz = np.load(testFile, allow_pickle=True) yP = npz['yP'] ycP = npz['ycP'] if doSigma: sP = npz['sP'] scP = npz['scP'] else: statTup = loadStat(outName) model = loadModel(outName, ep=ep) # load test data if wqData is None: wqData = waterQuality.DataModelWQ(master['dataName']) varTup = (master['varX'], master['varXC'], master['varY'], master['varYC']) testDataLst = wqData.transIn(subset=testset, statTup=statTup, varTup=varTup) sizeLst = trainTS.getSize(testDataLst) if master['optNaN'] == [2, 2, 0, 0]: master['optNaN'] = [0, 0, 0, 0] testDataLst = trainTS.dealNaN(testDataLst, master['optNaN']) x = testDataLst[0] xc = testDataLst[1] ny = sizeLst[2] if not doSigma: # test model - point by point yOut, ycOut = trainTS.testModel(model, x, xc, ny) yP = wqData.transOut(yOut, statTup[2], master['varY']) ycP = wqData.transOut(ycOut, statTup[3], master['varYC']) np.savez(testFile, yP=yP, ycP=ycP) else: print('sigma model') ny = ny * 2 yOut, ycOut = trainTS.testModel(model, x, xc, ny) yP = wqData.transOut(yOut[:, :, ::2], statTup[2], master['varY']) sP = wqData.transOut(np.sqrt(np.exp(yOut[:, :, 1::2])), statTup[2], master['varY']) ycP = wqData.transOut(ycOut[:, ::2], statTup[3], master['varYC']) scP = wqData.transOut(np.sqrt(np.exp(ycOut[:, 1::2])), statTup[3], master['varYC']) np.savez(testFile, yP=yP, ycP=ycP, sP=sP, scP=scP) if doSigma: return yP, ycP, sP, scP else: return yP, ycP
dfY = dfY.join(dfQ) dfX = dfX.join(dfF) dfY = dfY.join(dfC) dfX = dfX[varX] dfY = dfY[varY + varYC] # normalize concat input data dfX = dfX.interpolate(limit=nFill, limit_direction='both') xA = np.expand_dims(dfX.values, axis=1) xcA = np.expand_dims(tabG.loc[siteNo].values.astype(np.float), axis=0) mtdX = wqData.extractVarMtd(varX) x = transform.transInAll(xA, mtdX, statLst=statX) mtdXC = wqData.extractVarMtd(varXC) xc = transform.transInAll(xcA, mtdXC, statLst=statXC) yP = trainTS.testModel(model, x, xc) # # test # nt = len(dfX) # x, xc = trainTS.dealNaN((x, xc), dictP['optNaN'][:2]) # xx = np.concatenate([x, np.tile(xc[0, :], [1, nt, 1])], axis=-1).swapaxes(0, 1) # xT = torch.from_numpy(xx).float() # if torch.cuda.is_available(): # xT = xT.cuda() # # if i == 0 and ind1 == 0: # # try: # # yT = model(xT) # # except: # # print('first iteration failed again') # yT = model(xT) # yP = yT.detach().cpu().numpy()[:, 0, :]