def funcPoint(iP, axP): siteNo = siteNoLst[iP] dfPred1, dfObs1 = basins.loadSeq(outLst[0], siteNo) dfPred2, dfObs2 = basins.loadSeq(outLst[1], siteNo) t = dfPred1['date'].values.astype(np.datetime64) tBar = np.datetime64('2000-01-01') axplot.plotTS(axP, t, [dfPred1['00060'], dfPred2['00060'], dfObs2['00060']], tBar=tBar, legLst=['w/ C', 'w/o C', 'obs'], styLst='---', cLst='bgr')
def funcPoint(iP, axP): [axTS, axH1, axH2, axH3, axP1, axP2] = axP siteNo = siteNoLstCode[iP] outName1 = '{}-{}-{}-{}'.format(dataName, 'comb', 'QTFP_C', trainSet) outName2 = '{}-{}-{}-{}'.format(dataName, 'comb', 'QT_C', trainSet) dfL1 = basins.loadSeq(outName1, siteNo) dfL2 = basins.loadSeq(outName2, siteNo) dfW = pd.read_csv(os.path.join(dirWrtds, 'output', siteNo), index_col=None).set_index('date') dfO = waterQuality.readSiteTS(siteNo, codeLst + ['00060'], freq=wqData.freq) dfOD = waterQuality.readSiteTS(siteNo, codeLst + ['00060'], freq='D') t = dfO.index # ts tBar = np.datetime64('2010-01-01') sd = np.datetime64('1980-01-01') legLst = ['LSTM QTFP', 'LSTM QT', 'WRTDS', 'Obs'] axplot.plotTS(axTS, t, [dfL1[code], dfL2[code], dfW[code], dfO[code]], tBar=tBar, sd=sd, styLst='---*', cLst='mrbk', legLst=legLst) corrL = corrMat[indS[iP], iCode, 0] corrW = corrMat[indS[iP], iCode, 1] axplot.titleInner(axTS, 'siteNo {} {:.2f} {:.2f}'.format(siteNo, corrL, corrW)) axTS.legend() # hist axH1.hist(dfOD[code].values, density=True, bins=50) axplot.titleInner(axH1, 'histogram {}'.format(shortName)) axH2.hist(dfOD['00060'].values, density=True, bins=50) axplot.titleInner(axH2, 'histogram {}'.format('Q')) axH3.hist(np.log(dfOD['00060'].values + 1), density=True, bins=50) axplot.titleInner(axH3, 'histogram {}'.format('log Q')) # periodgram freqQ, powerQ, pQ = calPower('00060', dfOD) freqC, powerC, pC = calPower(code, dfOD) axP1.plot(1 / freqQ, powerC, '-*b', label='Periodograms') axP1.plot(1 / freqQ, pQ, '-*r', label='baluev probability') axplot.titleInner(axP1, 'streamflow') axP1.legend() axP2.plot(1 / freqC, powerC, '-*b', label='Periodograms') axP2.plot(1 / freqC, pC, '-*r', label='baluev probability') axplot.titleInner(axP2, shortName) axP2.legend()
def funcPoint(iP, axP): siteNo = siteNoLst[iP] dfP1, dfObs = basins.loadSeq(outName, siteNo) rmse1, corr1 = waterQuality.calErrSeq(dfP1[code], dfObs[code]) dfP3 = wqLinear.loadSeq(siteNo, code, 'LR', optT='Y8090') rmse3, corr3 = waterQuality.calErrSeq(dfP3[code], dfObs[code]) t = dfObs.index.values tBar = np.datetime64('2000-01-01') styLst = '-*' # styLst = ['-', '-*'] dfPLst = [dfP1, dfP3] rmseLst = [rmse1, rmse3] corrLst = [corr1, corr3] for k, dfP in enumerate(dfPLst): axplot.plotTS(axP[k], t, [dfP[code], dfObs[code]], tBar=tBar, legLst=[modLst[k], 'obs'], styLst=styLst, cLst='br') # ind = np.where(~np.isnan(dfObs[code].values)) # axplot.plotTS(axP[k], t[ind], dfObs[code].values[ind], tBar=tBar, # legLst=[modLst[k], 'obs'], styLst=styLst, cLst='r') tStr = '{}, rmse [{:.2f} {:.2f}], corr [{:.2f} {:.2f}]'.format( siteNo, rmseLst[k][0], rmseLst[k][1], corrLst[k][0], corrLst[k][1]) axP[k].set_title(tStr)
def funcPoint(iP, axP): siteNo = siteNoLst[iP] dfPred, dfObs = basins.loadSeq(outName, siteNo, ep=200) dfPred = dfPred[dfPred.index >= np.datetime64('1980-01-01')] dfObs = dfObs[dfObs.index >= np.datetime64('1980-01-01')] t = dfPred.index.values.astype(np.datetime64) tBar = np.datetime64('2000-01-01') axplot.plotTS(axP[0], t, [dfPred['00060'], dfObs['00060']], tBar=tBar, legLst=['pred', 'obs'], styLst='--', cLst='br') axP[0].set_title('streamflow') for k, var in enumerate(codeLst): styLst = '-*' shortName = codePdf.loc[var]['shortName'] title = ' {} {}'.format(shortName, var) axplot.plotTS(axP[k + 1], t, [dfPred[var], dfObs[var]], tBar=tBar, legLst=['pred', 'obs'], styLst=styLst, cLst='br') axP[k + 1].set_title(title)
def funcPoint(iP, axP): siteNo = siteNoLst[iP] dfPred, dfObs = basins.loadSeq(outName, siteNo) t = dfPred['date'].values.astype(np.datetime64) tBar = np.datetime64('2000-01-01') # linear model ind1 = infoTrain[infoTrain['siteNo'] == siteNo].index [x1, y1, yc1], _ = utils.rmNan([xL1[ind1, :], yL1[ind1, :], ycL1[ind1, :]]) modelY = LinearRegression().fit(x1, y1) modelYC = LinearRegression().fit(x1, yc1) sd = np.datetime64('1979-01-01') ed = np.datetime64('2020-01-01') dfX = waterQuality.readSiteX(siteNo, sd, ed, varX) x2 = transform.transInAll(dfX.values, mtdX, statLst=statX) y2 = modelY.predict(x2) yc2 = modelYC.predict(x2) yp = wqData.transOut(y2, statY, varY) ycp = wqData.transOut(yc2, statYC, varYC) code = codeLst[0] axplot.plotTS(axP[0], t, [dfPred['00060'], yp, dfObs['00060']], tBar=tBar, legLst=['lstm', 'lr', 'obs'], styLst='---', cLst='bgr') axplot.plotTS(axP[1], t, [dfPred[code], ycp, dfObs[code]], tBar=tBar, legLst=['lstm', 'lr', 'obs'], styLst='--*', cLst='bgr')
def loadModel(siteNoLst, outNameLSTM, codeLst): # load all sequence # LSTM dictLSTM = dict() for k, siteNo in enumerate(siteNoLst): print('\t LSTM site {}/{}'.format(k, len(siteNoLst)), end='\r') df = basins.loadSeq(outNameLSTM, siteNo) dictLSTM[siteNo] = df # WRTDS dictWRTDS = dict() dirWRTDS = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS-W', 'B10') for k, siteNo in enumerate(siteNoLst): print('\t WRTDS site {}/{}'.format(k, len(siteNoLst)), end='\r') saveFile = os.path.join(dirWRTDS, siteNo) df = pd.read_csv(saveFile, index_col=None).set_index('date') # df = utils.time.datePdf(df) dictWRTDS[siteNo] = df # Observation dictObs = dict() for k, siteNo in enumerate(siteNoLst): print('\t USGS site {}/{}'.format(k, len(siteNoLst)), end='\r') df = waterQuality.readSiteTS(siteNo, varLst=['00060'] + codeLst, freq='W', rmFlag=True) dictObs[siteNo] = df return dictLSTM, dictWRTDS, dictObs,
def funcPoint(iP, axP): siteNo = siteNoLst[iP] dfO = waterQuality.readSiteTS(siteNo, [code], freq=wqData.freq)[code] t = dfO.index yr = t.year.values ind1 = (yr <= 2016) & (yr >= 1980) ind2 = yr > 2016 o1 = dfO[ind1].values o2 = dfO[ind2].values t1 = t[ind1] t2 = t[ind2] # LSTM outName = '{}-{}-{}-{}'.format(dataName, 'comb', label, trainSet) dfP = basins.loadSeq(outName, siteNo)[code] # WRTDS fileWrtds = os.path.join(dirWrtds, 'B16', siteNo) dfW = pd.read_csv(fileWrtds, index_col=None).set_index('date')[code] dfW.index = pd.to_datetime(dfW.index) v1 = [dfP[ind1].values, dfW[ind1].values, o1] v2 = [dfP[ind2].values, dfW[ind2].values, o2] axplot.plotTS(axP[0], t1, v1, styLst='--*', cLst='bgr') axplot.plotTS(axP[1], t2, v2, styLst='--*', cLst='bgr') # print corr rmseWRTDS1, corrWRTDS1 = utils.stat.calErr(dfW[ind1].values, o1) rmseLSTM1, corrLSTM1 = utils.stat.calErr(dfP[ind1].values, o1) axP[0].set_title('site {} WRTDS {:.2f} LSTM {:.2f}'.format( siteNo, corrWRTDS1, corrLSTM1)) rmseWRTDS2, corrWRTDS2 = utils.stat.calErr(dfW[ind2].values, o2) rmseLSTM2, corrLSTM2 = utils.stat.calErr(dfP[ind2].values, o2) axP[1].set_title('site {} WRTDS {:.2f} LSTM {:.2f}'.format( siteNo, corrWRTDS2, corrLSTM2))
def funcPoint(iP, axP): siteNo = siteNoLst[iP] cLst = 'cb' dfO = waterQuality.readSiteTS(siteNo, [code], freq=wqData.freq)[code] yr = pd.DatetimeIndex(dfO.index).year dfO1 = dfO[yr % 2 == 1] dfO2 = dfO[yr % 2 == 0] dfC = pd.DataFrame(index=dfO2.dropna().index) dfC['obs'] = dfO2 for k, label in enumerate(labelLst): outName = '{}-{}-{}-{}'.format(dataName, code, label, trainSet) dfP = basins.loadSeq(outName, siteNo)[code] dfC[label] = dfP axplot.plotTS(axP, dfP.index, dfP.values, styLst='-', cLst=cLst[k]) axplot.plotTS(axP, dfO1.index, dfO1.values, styLst='*', cLst='m') axplot.plotTS(axP, dfO2.index, dfO2.values, styLst='*', cLst='r') axP.legend(labelLst + ['obs train', 'obs test']) titleStr = 'site {}'.format(siteNo) for k, label in enumerate(labelLst): axplot.plotTS(axP, dfC[label].index, dfC[label].values, styLst='*', cLst=cLst[k]) rmse, corr = utils.stat.calErr(dfC[label].values, dfC['obs'].values) titleStr = titleStr + ' corr{}={:.3f}'.format(k, corr) axP.set_title(titleStr)
def funcPoint(iP, axP): siteNo = siteNoLstP[iP] dfPred1, _ = basins.loadSeq(outLst[0], siteNo) dfPred2, _ = basins.loadSeq(outLst[1], siteNo) sd = np.datetime64('1980-01-01') dfQ = waterQuality.readSiteY(siteNo, ['00060'], sd=sd) dfC = waterQuality.readSiteY(siteNo, codeSel + [code + '_cd' for code in codeSel], sd=sd) dfPred1 = dfPred1[dfPred1.index >= sd] dfPred2 = dfPred2[dfPred2.index >= sd] dfPred1 = dfPred1.multiply(dfPred1['00060'], axis='index') dfPred2 = dfPred2.multiply(dfPred2['00060'], axis='index') dfC[codeSel] = dfC[codeSel].multiply(dfQ['00060'], axis='index') t = dfPred1.index.values.astype(np.datetime64) # axplot.plotTS(axP[0], t, [dfPred1['00060'], dfQ['00060']], tBar=tBar, # legLst=['pred-opt1', 'obs'], styLst='--', cLst='br') # axP[0].set_title('{} streamflow'.format(siteNo)) for k, var in enumerate(codeSel): shortName = codePdf.loc[var]['shortName'] title = '{} {} {}'.format(siteNo, shortName, var) styLst = ['-', '-', '*', '*', '*', '*'] legLst = [ 'model odd', 'model even', 'obs odd', 'obs even', 'flag even', 'flag odd' ] yr = dfC.index.year c1 = dfC[var].values.copy() c2 = dfC[var].values.copy() f1 = dfC[var].values.copy() f2 = dfC[var].values.copy() vf = dfC[var + '_cd'].values c1[(vf != 'x') & (vf != 'X')] = np.nan c1[(yr % 2 == 0)] = np.nan c2[(vf != 'x') & (vf != 'X')] = np.nan c2[(yr % 2 == 1)] = np.nan f1[(vf == 'x') | (vf == 'X') | (yr % 2 == 0)] = np.nan f2[(vf == 'x') | (vf == 'X') | (yr % 2 == 1)] = np.nan data = [dfPred1[var].values, dfPred2[var].values, c1, c2, f1, f2] axplot.plotTS(axP[k], t, data, styLst=styLst, cLst='bgrmkk', legLst=legLst) axP[k].set_title(title)
def funcPoint(iP, axP): siteNo = siteNoLst[iP] dfPred, dfObs = basins.loadSeq(outName, siteNo) t = dfPred.index.values.astype(np.datetime64) tBar = np.datetime64('2000-01-01') info1 = wqData.subsetInfo(trainset) info2 = wqData.subsetInfo(testset) ind1 = info1[info1['siteNo'] == siteNo].index ind2 = info2[info2['siteNo'] == siteNo].index t1 = info1['date'][ind1].values.astype(np.datetime64) t2 = info2['date'][ind2].values.astype(np.datetime64) tp = np.concatenate([t1, t2]) yp = np.concatenate([ycP1[ind1], ycP2[ind2]]) for k, var in enumerate(plotVar): rmse, corr = waterQuality.calErrSeq(dfPred[var], dfObs[var]) tStr = '{}, rmse [{:.2f} {:.2f}], corr [{:.2f} {:.2f}]'.format( siteNo, rmse[0], rmse[1], corr[0], corr[1]) if var == '00060': styLst = '--' title = 'streamflow ' + tStr axplot.plotTS(axP[k], t, [dfPred[var], dfObs[var]], tBar=tBar, legLst=['LSTM', 'observation'], styLst=styLst, cLst='br') axP[k].set_title(title) else: styLst = '-*' shortName = codePdf.loc[var]['shortName'] title = shortName + ' ' + tStr axplot.plotTS(axP[k], t, dfPred[var], tBar=tBar, legLst=['LSTM-sequence'], styLst='-', cLst='b') axplot.plotTS(axP[k], tp, yp, legLst=['LSTM-sample'], styLst='*', cLst='g') axplot.plotTS(axP[k], t, dfObs[var], legLst=['observation'], styLst='*', cLst='r') axP[k].set_title(title)
def funcPoint(iP, axP): siteNo = siteNoLstCode[iP] outName1 = '{}-{}-{}-{}'.format(dataName, 'comb', 'QTFP_C', trainSet) dfL1 = basins.loadSeq(outName1, siteNo) dfO = waterQuality.readSiteTS(siteNo, [code], freq='W') t = dfO.index # ts tBar = np.datetime64('2010-01-01') sd = np.datetime64('1980-01-01') legLst = ['LSTM', 'Obs'] axplot.plotTS(axP, t, [dfL1[code], dfO[code]], tBar=tBar, sd=sd, styLst='-*', cLst='rk', legLst=legLst) axP.set_title('site {} corr={:.3f}'.format(siteNo, matMap[iP])) axP.legend()
def funcPoint(iP, axP): siteNo = siteNoLstP[iP] tBar = np.datetime64('2000-01-01') dfPred1, _ = basins.loadSeq(outLst[0], siteNo) dfPred2, _ = basins.loadSeq(outLst[1], siteNo) sd = np.datetime64('1980-01-01') dfQ = waterQuality.readSiteY(siteNo, ['00060'], sd=sd) dfC = waterQuality.readSiteY(siteNo, codeSel + [code + '_cd' for code in codeSel], sd=sd) dfPred1 = dfPred1[dfPred1.index >= sd] dfPred2 = dfPred2[dfPred2.index >= sd] t = dfPred1.index.values.astype(np.datetime64) axplot.plotTS(axP[0], t, [dfPred1['00060'], dfQ['00060']], tBar=tBar, legLst=['pred-opt1', 'obs'], styLst='--', cLst='br') axP[0].set_title('{} streamflow'.format(siteNo)) for k, var in enumerate(codeSel): shortName = codePdf.loc[var]['shortName'] title = ' {} {}'.format(shortName, var) styLst = ['-', '-', '*', '*'] vc = dfC[var].values.copy() vf = dfC[var + '_cd'].values vcf = dfC[var].values.copy() vcf[(vf == 'x') | (vf == 'X')] = np.nan data = [dfPred1[var].values, dfPred2[var].values, vc, vcf] axplot.plotTS(axP[k + 1], t, data, tBar=tBar, legLst=['pred', 'pred-rmFlag', 'obs', 'obs-flag'], styLst=styLst, cLst='bgrk') axP[k + 1].set_title(title)
def funcPoint(iP, axP): siteNo = siteNoLst[iP] dfP, dfO = basins.loadSeq(outName, siteNo) dfP1 = dfP[(dfP.index < tBar) & (dfP.index > sd)] dfO1 = dfO[(dfO.index < tBar) & (dfO.index > sd)] dfP2 = dfP[dfP.index >= tBar] dfO2 = dfO[dfO.index >= tBar] axP[0, 0].plot(np.log(dfO1['00060']), dfO1['00955'], '*') axP[0, 0].set_title('{} B2000 observation'.format(siteNo)) axP[0, 1].plot(np.log(dfO2['00060']), dfO2['00955'], '*') axP[0, 1].set_title('{} A2000 observation'.format(siteNo)) axP[1, 0].plot(np.log(dfP1['00060']), dfP1['00955'], '*') axP[1, 0].set_title('{} B2000 prediction'.format(siteNo)) axP[1, 1].plot(np.log(dfP2['00060']), dfP2['00955'], '*') axP[1, 1].set_title('{} A2000 prediction'.format(siteNo))
def funcPoint(iP, axP): siteNo = siteNoLst[iP] dfP1, dfObs = basins.loadSeq(outName, siteNo) rmse1, corr1 = waterQuality.calErrSeq(dfP1[code], dfObs[code]) t = dfObs.index.values tBar = np.datetime64('2000-01-01') axplot.plotTS(axP, t, [dfP1[code], dfObs[code]], tBar=tBar, legLst=['LSTM', 'obs'], styLst='-*', cLst='br') tStr = '{}, rmse [{:.2f} {:.2f}], corr [{:.2f} {:.2f}]'.format( siteNo, rmse1[0], rmse1[1], corr1[0], corr1[1]) axP.set_title(tStr)
def funcPoint(iP, axP): siteNo = siteNoLst[iP] dfP = basins.loadSeq(outName, siteNo)[code] dfO = waterQuality.readSiteTS(siteNo, [code], freq=wqData.freq)[code] yr = pd.DatetimeIndex(dfP.index).year dfO1 = dfO[yr % 2 == 1] dfO2 = dfO[yr % 2 == 0] axplot.plotTS(axP, dfP.index, dfP.values, styLst='-', cLst='b') axplot.plotTS(axP, dfO1.index, dfO1.values, styLst='*', cLst='m') axplot.plotTS(axP, dfO2.index, dfO2.values, styLst='*', cLst='r') axP.legend(['pred', 'obs train', 'obs test']) dfC = pd.DataFrame(index=dfO2.dropna().index) dfC['obs'] = dfO2 dfC['pred'] = dfP rmse, corr = utils.stat.calErr(dfC['pred'].values, dfC['obs'].values) axP.set_title('site {} corr = {:.3f}'.format(siteNo, corr))
def funcPoint(iP, axP): siteNo = siteNoLst[iP] dfP1, dfObs = basins.loadSeq(outName, siteNo) t = dfPred.index.values.astype(np.datetime64) tBar = np.datetime64('2000-01-01') # Silica rmse, corr = waterQuality.calErrSeq(dfP1[code], dfObs[code]) axplot.plotTS(axP[0], t, [dfP1[code], dfObs[code]], tBar=tBar, legLst=['LSTM', 'obs'], styLst='-*', cLst='br') tStr = '{}, rmse [{:.2f} {:.2f}], corr [{:.2f} {:.2f}]'.format( siteNo, rmse[0], rmse[1], corr[0], corr[1]) axP[0].set_title('Silica ' + tStr) # rm outlier df = dfObs[dfObs['00955'].notna().values] y = df['00955'].values yV = y[y < np.percentile(y, 99)] yV = yV[yV > np.percentile(y, 1)] ul = np.mean(yV) + np.std(yV) * 5 dfObs[dfObs['00955'] > ul] = np.nan # fourier df = dfObs[dfObs['00955'].notna().values] # nt = len(dfObs) nt = 365 * 5 x = (df.index.values.astype('datetime64[D]') - np.datetime64('1979-01-01')).astype(np.float) y = df['00955'].values freq = np.fft.fftfreq(nt)[1:] ls = LombScargle(x, y) power = ls.power(freq) df2 = dfP1['00955'] x2 = (df2.index.values.astype('datetime64[D]') - np.datetime64('1979-01-01')).astype(np.float) y2 = df2.values ls2 = LombScargle(x2, y2) power2 = ls2.power(freq) axP[1].set_ylabel('normalize spectrum') indF = np.where(freq > 0)[0] axP[1].plot(1 / freq[indF], power2[indF], 'b', label='lstm') axP[1].plot(1 / freq[indF], power[indF], 'r', label='obs') axP[1].legend() axP[1].set_ylabel('power') axP[1].set_xlabel('period (day)')
def funcPoint(iP, axP): siteNo = siteNoLst[iP] dfO = waterQuality.readSiteTS(siteNo, [code], freq=wqData.freq)[code] t = dfO.index yr = pd.DatetimeIndex(t).year o1 = dfO[yr <= 2016].values o2 = dfO[yr > 2016].values t1 = t[yr <= 2016] t2 = t[yr > 2016] pLst1, pLst2 = (list(), list()) for label in labelLst: outName = '{}-{}-{}-{}'.format(dataName, 'comb', label, trainSet) dfP = basins.loadSeq(outName, siteNo)[code] pLst1.append(dfP[yr <= 2016].values) pLst2.append(dfP[yr > 2016].values) axplot.plotTS(axP[0], t1, pLst1 + [o1], styLst='--*', cLst='bgr') axplot.plotTS(axP[1], t2, pLst2 + [o2], styLst='--*', cLst='bgr') axP[0].set_title(siteNo)
def funcPoint(iP, axP): siteNo = siteNoLst[iP] dfPred, dfObs = basins.loadSeq(outName, siteNo) t = dfPred['date'].values.astype(np.datetime64) tBar = np.datetime64('2000-01-01') for k, var in enumerate(varPred): if var == '00060': styLst = '--' title = 'streamflow' else: styLst = '-*' shortName = codePdf.loc[var]['shortName'] title = ' {} {}'.format(shortName, var) axplot.plotTS(axP[k], t, [dfPred[var], dfObs[var]], tBar=tBar, legLst=['pred', 'obs'], styLst=styLst, cLst='br') axP[k].set_title(title)
def funcPoint(iP, axP): siteNo = siteNoLst[iP] dfP = basins.loadSeq(outName, siteNo)[code] dfO = waterQuality.readSiteTS(siteNo, [code], freq=wqData.freq)[code] t = dfP.index yr = pd.DatetimeIndex(t).year dfO1 = dfO[yr <= 2016] dfO2 = dfO[yr > 2016] dfP1 = dfP[yr <= 2016] dfP2 = dfP[yr > 2016] axplot.plotTS(axP[0], dfP1.index, [dfP1.values, dfO1.values], styLst='-*', cLst='br') axplot.plotTS(axP[1], dfP2.index, [dfP2.values, dfO2.values], styLst='-*', cLst='br') # axP.legend(['pred', 'obs train', 'obs test']) rmse, corr = utils.stat.calErr(dfP1.values, dfO1.values) axP[0].set_title('site {} {:.2f} {:.2f}'.format( siteNo, corr, corrMat[iP, 0])) rmse, corr = utils.stat.calErr(dfP2.values, dfO2.values) axP[1].set_title('site {} {:.2f} {:.2f}'.format( siteNo, corr, corrMat[iP, 1]))
def funcPoint(iP, axP): siteNo = siteNoLstP[iP] dfPred1, _ = basins.loadSeq(outName, siteNo, ep=ep) dfPred2 = pd.read_csv(os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS', 'Yodd', siteNo), index_col=None) ctR = pd.date_range(pd.datetime(1979, 1, 1), pd.datetime(2020, 1, 1)) dfPred2.index = ctR dfPred2.index.name = 'date' sd = np.datetime64('1980-01-01') dfQ = waterQuality.readSiteY(siteNo, ['00060'], sd=sd) dfC = waterQuality.readSiteY( siteNo, codeSel+[code+'_cd' for code in codeSel], sd=sd) dfPred1 = dfPred1[dfPred1.index >= sd] dfPred2 = dfPred2[dfPred2.index >= sd] t = dfPred1.index.values.astype(np.datetime64) # axplot.plotTS(axP[0], t, [dfPred1['00060'], dfQ['00060']], tBar=tBar, # legLst=['pred-opt1', 'obs'], styLst='--', cLst='br') # axP[0].set_title('{} streamflow'.format(siteNo)) for k, var in enumerate(codeSel): shortName = codePdf.loc[var]['shortName'] title = '{} {} {}'.format(siteNo, shortName, var) styLst = ['-', '-', '*', '*', '*', '*'] legLst = ['LSTM', 'WRTDS', 'obs odd', 'obs even', 'flag even', 'flag odd'] yr = dfC.index.year c1 = dfC[var].values.copy() c2 = dfC[var].values.copy() f1 = dfC[var].values.copy() f2 = dfC[var].values.copy() vf = dfC[var+'_cd'].values c1[(vf != 'x') & (vf != 'X')] = np.nan c1[(yr % 2 == 0)] = np.nan c2[(vf != 'x') & (vf != 'X')] = np.nan c2[(yr % 2 == 1)] = np.nan f1[(vf == 'x') | (vf == 'X') | (yr % 2 == 0)] = np.nan f2[(vf == 'x') | (vf == 'X') | (yr % 2 == 1)] = np.nan data = [dfPred1[var].values, dfPred2[var].values, c1, c2, f1, f2] axplot.plotTS(axP[k], t, data, styLst=styLst, cLst='bgrmkk', legLst=legLst) axP[k].set_title(title)
def funcPoint(iP, axP): siteNo = siteNoLstCode[iP] outName1 = '{}-{}-{}-{}'.format(dataName, 'comb', 'QTFP_C', trainSet) dfL1 = basins.loadSeq(outName1, siteNo) dfW = pd.read_csv(os.path.join(dirWrtds, 'output', siteNo), index_col=None).set_index('date') dfO = waterQuality.readSiteTS(siteNo, codeLst+['00060'], freq=wqData.freq) t = dfO.index # ts tBar = np.datetime64('2010-01-01') sd = np.datetime64('1980-01-01') legLst = ['LSTM', 'WRTDS', 'Obs'] axplot.plotTS(axP, t, [dfL1[code], dfW[code], dfO[code]], tBar=tBar, sd=sd, styLst='--*', cLst='rbk', legLst=legLst) corrL = corrMat[indS[iP], iCode, 0] corrW = corrMat[indS[iP], iCode, 1] axP.set_title('{} site {}; LSTM corr={:.2f} WRTDS corr={:.2f}'.format( shortName, siteNo, corrL, corrW)) # axplot.titleInner( # axP, 'siteNo {} {:.2f} {:.2f}'.format(siteNo, corrL, corrW)) axP.legend()
def funcPoint(iP, axP): siteNo = siteNoLst[iP] dfPred, dfObs = basins.loadSeq(outName, siteNo) t = dfPred.index.values.astype(np.datetime64) tBar = np.datetime64('2000-01-01') for k, var in enumerate(plotVar): rmse, corr = waterQuality.calErrSeq(dfPred[var], dfObs[var]) tStr = '{}, rmse [{:.2f} {:.2f}], corr [{:.2f} {:.2f}]'.format( siteNo, rmse[0], rmse[1], corr[0], corr[1]) if var == '00060': styLst = '--' title = 'streamflow ' + tStr else: styLst = '-*' shortName = codePdf.loc[var]['shortName'] title = shortName + ' ' + tStr axplot.plotTS(axP[k], t, [dfPred[var], dfObs[var]], tBar=tBar, legLst=['pred', 'obs'], styLst=styLst, cLst='br') axP[k].set_title(tStr)
def funcPoint(iP, axP): siteNo = siteNoLst[iP] dfPLst = list() rmseLst = list() corrLst = list() for ep in epLst: dfP, dfObs = basins.loadSeq(outName, siteNo, ep=ep) rmse, corr = waterQuality.calErrSeq(dfP[code], dfObs[code]) dfPLst.append(dfP) rmseLst.append(rmse) corrLst.append(corr) t = dfObs.index.values tBar = np.datetime64('2000-01-01') for k, dfP in enumerate(dfPLst): axplot.plotTS(axP[k], t, [dfP[code], dfObs[code]], tBar=tBar, legLst=[epLst[k], 'obs'], styLst='-*', cLst='br') tStr = '{}, rmse [{:.2f} {:.2f}], corr [{:.2f} {:.2f}]'.format( siteNo, rmseLst[k][0], rmseLst[k][1], corrLst[k][0], corrLst[k][1]) axP[k].set_title(tStr)
code = '00010' siteNoLst = dictSite[code] nSite = len(siteNoLst) dataName = 'rbWN5' # load all sequence dictLSTMLst = list() # LSTM label = 'QTFP_C' dictLSTM = dict() trainSet = '{}-B10'.format('comb') outName = '{}-{}-{}-{}'.format(dataName, 'comb', label, trainSet) for k, siteNo in enumerate(siteNoLst): print('\t site {}/{}'.format(k, len(siteNoLst)), end='\r') df = basins.loadSeq(outName, siteNo) dictLSTM[siteNo] = df # WRTDS dictWRTDS = dict() dirWRTDS = os.path.join(kPath.dirWQ, 'modelStat', 'Linear-W', 'B10Q', 'output') for k, siteNo in enumerate(siteNoLst): print('\t site {}/{}'.format(k, len(siteNoLst)), end='\r') saveFile = os.path.join(dirWRTDS, siteNo) df = pd.read_csv(saveFile, index_col=None).set_index('date') # df = utils.time.datePdf(df) dictWRTDS[siteNo] = df # Observation dictObs = dict() for k, siteNo in enumerate(siteNoLst):
# test outName = 'Silica64-00955-Y8090-opt1' wqData = waterQuality.DataModelWQ('Silica64') code = '00955' trainset = 'Y8090' testset = 'Y0010' master = basins.loadMaster(outName) # seq test siteNoLst = wqData.info['siteNo'].unique().tolist() basins.testModelSeq(outName, siteNoLst, wqData=wqData) siteNo = siteNoLst[0] dfPred, dfObs = basins.loadSeq(outName, siteNo) fig, axes = plt.subplots(2, 1) axes[0].plot(np.log(dfPred['00060']), dfPred['00955'], '*') axes[1].plot(np.log(dfObs['00060']), dfObs['00955'], '*') fig.show() ceqMat1 = np.full([len(siteNoLst), 2], np.nan) dwMat1 = np.full([len(siteNoLst), 2], np.nan) ceqMat2 = np.full([len(siteNoLst), 2], np.nan) dwMat2 = np.full([len(siteNoLst), 2], np.nan) sd = np.datetime64('1980-01-01') tBar = np.datetime64('2000-01-01') importlib.reload(wqRela) for k, siteNo in enumerate(siteNoLst): print(k, siteNo)
reTest = False dataName = 'rbWN5' siteNoLst = dictSite['comb'] nSite = len(siteNoLst) # load all sequence dictLSTMLst = list() # LSTM labelLst = ['QTFP_C'] for label in labelLst: dictLSTM = dict() trainSet = 'comb-B10' outName = '{}-{}-{}-{}'.format(dataName, 'comb', label, trainSet) for k, siteNo in enumerate(siteNoLst): print('\t site {}/{}'.format(k, len(siteNoLst)), end='\r') df = basins.loadSeq(outName, siteNo) dictLSTM[siteNo] = df dictLSTMLst.append(dictLSTM) # WRTDS dictWRTDS = dict() dirWRTDS = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS-W', 'B10', 'output') for k, siteNo in enumerate(siteNoLst): print('\t site {}/{}'.format(k, len(siteNoLst)), end='\r') saveFile = os.path.join(dirWRTDS, siteNo) df = pd.read_csv(saveFile, index_col=None).set_index('date') # df = utils.time.datePdf(df) dictWRTDS[siteNo] = df # Observation dictObs = dict() for k, siteNo in enumerate(siteNoLst): print('\t site {}/{}'.format(k, len(siteNoLst)), end='\r')
[xx, yy], iv = utils.rmNan([x, y]) if len(yy) > 0: # yy, ind = utils.rmExt(yv, p=2.5, returnInd=True) # xx = xv[ind, :] lrModel = LinearRegression() lrModel = lrModel.fit(xx, yy) b = dfXN.isna().any(axis=1) yp = lrModel.predict(dfXN[~b].values) dfYP.at[dfYP[~b].index, code] = yp figP, axP = plt.subplots(1, 1, figsize=(8, 2.5)) outName1 = '{}-{}-{}-{}'.format(dataName, 'comb', 'QF_C', trainSet) outName2 = '{}-{}-{}-{}'.format(dataName, 'comb', 'QFP_C', trainSet) dfL1 = basins.loadSeq(outName1, siteNo) dfL2 = basins.loadSeq(outName2, siteNo) dfO = waterQuality.readSiteTS(siteNo, [code], freq='W') t = dfO.index # ts tBar = np.datetime64('2016-01-01') sd = np.datetime64('1980-01-01') legLst = ['LSTM w/o rainfall chem', 'LSTM w rainfall chem', 'Observation'] axplot.plotTS(axP, t, [dfL1[code], dfL2[code], dfO[code]], tBar=tBar, sd=sd, styLst='--*', cLst='rbk', legLst=legLst) axP.legend() figP.show() figP, axP = plt.subplots(1, 1, figsize=(8, 2.5)) outName1 = '{}-{}-{}-{}'.format(dataName, 'comb', 'QF_C', trainSet)
from hydroDL.model import trainTS from hydroDL.data import gageII, usgs from hydroDL.post import axplot, figplot import torch import os import json import numpy as np import pandas as pd import matplotlib.pyplot as plt siteNo = '01674500' code = '00955' outName = 'Silica64-Y8090-00955-opt1' dfP1, dfObs = basins.loadSeq(outName, siteNo) dfP2 = wqLinear.loadSeq(siteNo, code, 'ARMA', optT='Y8090', order=(5, 0, 0)) rmse2, corr2 = waterQuality.calErrSeq(dfP2[code], dfObs[code]) t = dfObs.index.values tBar = np.datetime64('2000-01-01') styLst = '-*' figP, axP = plt.subplots(1, 1, figsize=(8, 6)) axplot.plotTS(axP, t, [dfP2[code], dfObs[code]], tBar=tBar, styLst='-*', cLst='br') figP.show()
# seq test siteNoLst = wqData.info['siteNo'].unique().tolist() epLst = [100, 200, 300, 400, 500] epLst = [100, 300, 500] for ep in epLst: basins.testModelSeq(outName, siteNoLst, wqData=wqData, ep=ep) ns = len(siteNoLst) nep = len(epLst) # calculate error from sequence rmseMat = np.ndarray([ns, 5, 2]) corrMat = np.ndarray([ns, 5, 2]) for k, siteNo in enumerate(siteNoLst): print(k, siteNo) for i, ep in enumerate(epLst): dfPred, dfObs = basins.loadSeq(outName, siteNo, ep=ep) rmseLSTM, corrLSTM = waterQuality.calErrSeq(dfPred[code], dfObs[code]) rmseMat[k, i, :] = rmseLSTM corrMat[k, i, :] = corrLSTM # box for (errMat, title) in zip([rmseMat, corrMat], ['RMSE', 'Correlation']): dataBox = list() for k in range(2): temp = [errMat[:, i, k] for i in range(nep)] dataBox.append(temp) label1 = ['B2000', 'A2000'] label2 = epLst fig = figplot.boxPlot(dataBox, label1=label1, label2=label2, sharey=True) fig.suptitle(title) fig.show()
wqData = waterQuality2.DataModelWQ('Silica64Seq') outName = 'Silica64Seq-Y8090' siteNoLst = wqData.siteNoLst basins2.testModelSeq(outName, siteNoLst, wqData=wqData) outLst = ['Silica64Seq-Y8090', 'Silica64-Y8090-00955-opt1'] code = '00955' wqData = waterQuality.DataModelWQ('Silica64') siteNoLst = wqData.siteNoLst ns = len(siteNoLst) rmseMat = np.ndarray([ns, 2, 2]) corrMat = np.ndarray([ns, 2, 2]) for k, siteNo in enumerate(siteNoLst): for i, out in enumerate(outLst): print(k, siteNo) dfP, dfO = basins.loadSeq(out, siteNo) rmse, corr = waterQuality.calErrSeq(dfP[code], dfO[code]) rmseMat[k, i, :] = rmse corrMat[k, i, :] = corr # box for (errMat, title) in zip([rmseMat, corrMat], ['RMSE', 'Correlation']): dataBox = list() for k in range(2): temp = [errMat[:, i, k] for i in range(2)] dataBox.append(temp) label1 = ['B2000', 'A2000'] label2 = ['seq', 'point'] fig = figplot.boxPlot(dataBox, label1=label1, label2=label2, sharey=True) fig.suptitle(title) fig.show()