Ejemplo n.º 1
0
def funcPoint(iP, axP):
    [axTS, axH1, axH2, axH3, axP1, axP2] = axP
    siteNo = siteNoLstCode[iP]
    outName1 = '{}-{}-{}-{}'.format(dataName, 'comb', 'QTFP_C', trainSet)
    outName2 = '{}-{}-{}-{}'.format(dataName, 'comb', 'QT_C', trainSet)
    dfL1 = basins.loadSeq(outName1, siteNo)
    dfL2 = basins.loadSeq(outName2, siteNo)
    dfW = pd.read_csv(os.path.join(dirWrtds, 'output', siteNo),
                      index_col=None).set_index('date')
    dfO = waterQuality.readSiteTS(siteNo,
                                  codeLst + ['00060'],
                                  freq=wqData.freq)
    dfOD = waterQuality.readSiteTS(siteNo, codeLst + ['00060'], freq='D')
    t = dfO.index
    # ts
    tBar = np.datetime64('2010-01-01')
    sd = np.datetime64('1980-01-01')
    legLst = ['LSTM QTFP', 'LSTM QT', 'WRTDS', 'Obs']
    axplot.plotTS(axTS,
                  t, [dfL1[code], dfL2[code], dfW[code], dfO[code]],
                  tBar=tBar,
                  sd=sd,
                  styLst='---*',
                  cLst='mrbk',
                  legLst=legLst)
    corrL = corrMat[indS[iP], iCode, 0]
    corrW = corrMat[indS[iP], iCode, 1]
    axplot.titleInner(axTS,
                      'siteNo {} {:.2f} {:.2f}'.format(siteNo, corrL, corrW))
    axTS.legend()
    # hist
    axH1.hist(dfOD[code].values, density=True, bins=50)
    axplot.titleInner(axH1, 'histogram {}'.format(shortName))
    axH2.hist(dfOD['00060'].values, density=True, bins=50)
    axplot.titleInner(axH2, 'histogram {}'.format('Q'))
    axH3.hist(np.log(dfOD['00060'].values + 1), density=True, bins=50)
    axplot.titleInner(axH3, 'histogram {}'.format('log Q'))
    # periodgram
    freqQ, powerQ, pQ = calPower('00060', dfOD)
    freqC, powerC, pC = calPower(code, dfOD)
    axP1.plot(1 / freqQ, powerC, '-*b', label='Periodograms')
    axP1.plot(1 / freqQ, pQ, '-*r', label='baluev probability')
    axplot.titleInner(axP1, 'streamflow')
    axP1.legend()
    axP2.plot(1 / freqC, powerC, '-*b', label='Periodograms')
    axP2.plot(1 / freqC, pC, '-*r', label='baluev probability')
    axplot.titleInner(axP2, shortName)
    axP2.legend()
Ejemplo n.º 2
0
def funcPoint(iP, axP):
    siteNo = siteNoLst[iP]
    cLst = 'cb'
    dfO = waterQuality.readSiteTS(siteNo, [code], freq=wqData.freq)[code]
    yr = pd.DatetimeIndex(dfO.index).year
    dfO1 = dfO[yr % 2 == 1]
    dfO2 = dfO[yr % 2 == 0]
    dfC = pd.DataFrame(index=dfO2.dropna().index)
    dfC['obs'] = dfO2
    for k, label in enumerate(labelLst):
        outName = '{}-{}-{}-{}'.format(dataName, code, label, trainSet)
        dfP = basins.loadSeq(outName, siteNo)[code]
        dfC[label] = dfP
        axplot.plotTS(axP, dfP.index, dfP.values, styLst='-', cLst=cLst[k])
    axplot.plotTS(axP, dfO1.index, dfO1.values, styLst='*', cLst='m')
    axplot.plotTS(axP, dfO2.index, dfO2.values, styLst='*', cLst='r')
    axP.legend(labelLst + ['obs train', 'obs test'])
    titleStr = 'site {}'.format(siteNo)
    for k, label in enumerate(labelLst):
        axplot.plotTS(axP,
                      dfC[label].index,
                      dfC[label].values,
                      styLst='*',
                      cLst=cLst[k])
        rmse, corr = utils.stat.calErr(dfC[label].values, dfC['obs'].values)
        titleStr = titleStr + ' corr{}={:.3f}'.format(k, corr)
    axP.set_title(titleStr)
Ejemplo n.º 3
0
def funcPoint(iP, axP):
    siteNo = siteNoLst[iP]
    dfO = waterQuality.readSiteTS(siteNo, ['00060', code], freq=freq)
    t = dfO.index.values
    axplot.plotTS(axP[0], t, dfO['00060'].values, styLst='-*', cLst='bgr')
    axplot.plotTS(axP[1], t, dfO[code].values, styLst='*', cLst='bgr')
    axP[0].set_title(siteNo)
Ejemplo n.º 4
0
def loadModel(siteNoLst, outNameLSTM, codeLst):
    # load all sequence
    # LSTM
    dictLSTM = dict()
    for k, siteNo in enumerate(siteNoLst):
        print('\t LSTM site {}/{}'.format(k, len(siteNoLst)), end='\r')
        df = basins.loadSeq(outNameLSTM, siteNo)
        dictLSTM[siteNo] = df
    # WRTDS
    dictWRTDS = dict()
    dirWRTDS = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS-W', 'B10')
    for k, siteNo in enumerate(siteNoLst):
        print('\t WRTDS site {}/{}'.format(k, len(siteNoLst)), end='\r')
        saveFile = os.path.join(dirWRTDS, siteNo)
        df = pd.read_csv(saveFile, index_col=None).set_index('date')
        # df = utils.time.datePdf(df)
        dictWRTDS[siteNo] = df
    # Observation
    dictObs = dict()
    for k, siteNo in enumerate(siteNoLst):
        print('\t USGS site {}/{}'.format(k, len(siteNoLst)), end='\r')
        df = waterQuality.readSiteTS(siteNo,
                                     varLst=['00060'] + codeLst,
                                     freq='W',
                                     rmFlag=True)
        dictObs[siteNo] = df
    return dictLSTM, dictWRTDS, dictObs,
Ejemplo n.º 5
0
def funcPoint(iP, axP):
    siteNo = siteNoLst[iP]
    dfO = waterQuality.readSiteTS(siteNo, [code], freq=wqData.freq)[code]
    t = dfO.index
    yr = t.year.values
    ind1 = (yr <= 2016) & (yr >= 1980)
    ind2 = yr > 2016
    o1 = dfO[ind1].values
    o2 = dfO[ind2].values
    t1 = t[ind1]
    t2 = t[ind2]
    # LSTM
    outName = '{}-{}-{}-{}'.format(dataName, 'comb', label, trainSet)
    dfP = basins.loadSeq(outName, siteNo)[code]
    # WRTDS
    fileWrtds = os.path.join(dirWrtds, 'B16', siteNo)
    dfW = pd.read_csv(fileWrtds, index_col=None).set_index('date')[code]
    dfW.index = pd.to_datetime(dfW.index)
    v1 = [dfP[ind1].values, dfW[ind1].values, o1]
    v2 = [dfP[ind2].values, dfW[ind2].values, o2]
    axplot.plotTS(axP[0], t1, v1, styLst='--*', cLst='bgr')
    axplot.plotTS(axP[1], t2, v2, styLst='--*', cLst='bgr')
    # print corr
    rmseWRTDS1, corrWRTDS1 = utils.stat.calErr(dfW[ind1].values, o1)
    rmseLSTM1, corrLSTM1 = utils.stat.calErr(dfP[ind1].values, o1)
    axP[0].set_title('site {} WRTDS {:.2f} LSTM {:.2f}'.format(
        siteNo, corrWRTDS1, corrLSTM1))
    rmseWRTDS2, corrWRTDS2 = utils.stat.calErr(dfW[ind2].values, o2)
    rmseLSTM2, corrLSTM2 = utils.stat.calErr(dfP[ind2].values, o2)
    axP[1].set_title('site {} WRTDS {:.2f} LSTM {:.2f}'.format(
        siteNo, corrWRTDS2, corrLSTM2))
Ejemplo n.º 6
0
def funcPoint(iP, axP):
    siteNo = siteNoLst[iP]
    print(iP, siteNo)
    dfO = waterQuality.readSiteTS(siteNo, codeLst + ['00060'], freq='D')
    t = dfO.index
    for k, code in enumerate(codeLst):
        ax = axP[k]
        axplot.plotTS(ax, t, dfO[code] * dfO['00060'], styLst='*', cLst='k')
Ejemplo n.º 7
0
def funcPoint(iP, axP):
    siteNo = siteNoLst[iP]
    dfO = waterQuality.readSiteTS(siteNo, ['runoff', 'pr', code], freq=freq)
    t = dfO.index.values
    axplot.plotTS(axP[0], t, dfO['runoff'].values, styLst='-*', cLst='bgr')
    axplot.plotTS(axP[1], t, dfO['pr'].values, styLst='-*', cLst='bgr')
    axplot.plotTS(axP[2], t, dfO[code].values, styLst='*', cLst='bgr')
    r = np.nanmean(dfO['runoff'].values)/np.nanmean(dfO['pr'].values)*365/100    
    axP[0].set_title('{} {:.3f}'.format(siteNo, r))
Ejemplo n.º 8
0
def funcPoint(iP, axP):
    siteNo = siteNoLst[iP]
    dfO = waterQuality.readSiteTS(siteNo, ['00060', code], freq=freq)
    file1 = os.path.join(dirRoot1, 'output', siteNo)
    dfP = pd.read_csv(file1, index_col='date')
    t = dfO.index.values
    axplot.plotTS(axP[0], t, dfO['00060'].values, styLst='-*', cLst='bgr')
    axplot.plotTS(axP[1], t, dfP[code].values, styLst='-', cLst='r')
    axplot.plotTS(axP[1], t, dfO[code].values, styLst='*', cLst='b')
    axP[0].set_title(siteNo)
Ejemplo n.º 9
0
def funcPoint(iP, axP):
    siteNo = siteNoLstCode[iP]
    outName1 = '{}-{}-{}-{}'.format(dataName, 'comb', 'QTFP_C', trainSet)
    dfL1 = basins.loadSeq(outName1, siteNo)
    dfO = waterQuality.readSiteTS(siteNo, [code], freq='W')
    t = dfO.index
    # ts
    tBar = np.datetime64('2010-01-01')
    sd = np.datetime64('1980-01-01')
    legLst = ['LSTM', 'Obs']
    axplot.plotTS(axP, t, [dfL1[code],  dfO[code]],
                  tBar=tBar, sd=sd, styLst='-*', cLst='rk', legLst=legLst)
    axP.set_title('site {} corr={:.3f}'.format(siteNo, matMap[iP]))
    axP.legend()
Ejemplo n.º 10
0
def funcPoint(iP, axP):
    siteNo = siteNoLst[iP]
    dfP = basins.loadSeq(outName, siteNo)[code]
    dfO = waterQuality.readSiteTS(siteNo, [code], freq=wqData.freq)[code]
    yr = pd.DatetimeIndex(dfP.index).year
    dfO1 = dfO[yr % 2 == 1]
    dfO2 = dfO[yr % 2 == 0]
    axplot.plotTS(axP, dfP.index, dfP.values, styLst='-', cLst='b')
    axplot.plotTS(axP, dfO1.index, dfO1.values, styLst='*', cLst='m')
    axplot.plotTS(axP, dfO2.index, dfO2.values, styLst='*', cLst='r')
    axP.legend(['pred', 'obs train', 'obs test'])
    dfC = pd.DataFrame(index=dfO2.dropna().index)
    dfC['obs'] = dfO2
    dfC['pred'] = dfP
    rmse, corr = utils.stat.calErr(dfC['pred'].values, dfC['obs'].values)
    axP.set_title('site {} corr = {:.3f}'.format(siteNo, corr))
Ejemplo n.º 11
0
def funcPoint(iP, axP):
    siteNo = siteNoLst[iP]
    dfO = waterQuality.readSiteTS(siteNo, [code], freq='W')[code]
    t = dfO.index
    file1 = os.path.join(dirRoot1, 'output', siteNo)
    file2 = os.path.join(dirRoot2, 'output', siteNo)
    dfP1 = pd.read_csv(file1, index_col='date')[code]
    dfP2 = pd.read_csv(file2, index_col='date')[code]
    v = [dfP1.values, dfP2.values, dfO.values]
    [v1, v2, o], iv = utils.rmNan([dfP1.values, dfP2.values, dfO.values])
    tt = t[iv]
    styLst = [['-*'] for x in range(3)]
    axplot.plotTS(axP, tt.values, [v1, v2, o], cLst='rbk')
    # print corr
    rmse1, corr1 = utils.stat.calErr(v[0], v[-1])
    rmse2, corr2 = utils.stat.calErr(v[1], v[-1])
    axP.set_title('site {} WRTDS {:.2f} only T {:.2f}'.format(
        siteNo, corr1, corr2))
Ejemplo n.º 12
0
def funcPoint(iP, axP):
    siteNo = siteNoLst[iP]
    dfO = waterQuality.readSiteTS(siteNo, [code], freq=wqData.freq)[code]
    t = dfO.index
    yr = pd.DatetimeIndex(t).year
    o1 = dfO[yr <= 2016].values
    o2 = dfO[yr > 2016].values
    t1 = t[yr <= 2016]
    t2 = t[yr > 2016]
    pLst1, pLst2 = (list(), list())
    for label in labelLst:
        outName = '{}-{}-{}-{}'.format(dataName, 'comb', label, trainSet)
        dfP = basins.loadSeq(outName, siteNo)[code]
        pLst1.append(dfP[yr <= 2016].values)
        pLst2.append(dfP[yr > 2016].values)
    axplot.plotTS(axP[0], t1, pLst1 + [o1], styLst='--*', cLst='bgr')
    axplot.plotTS(axP[1], t2, pLst2 + [o2], styLst='--*', cLst='bgr')
    axP[0].set_title(siteNo)
Ejemplo n.º 13
0
def funcPoint(iP, axP):
    siteNo = siteNoLst[iP]
    dfP = basins.loadSeq(outName, siteNo)[code]
    dfO = waterQuality.readSiteTS(siteNo, [code], freq=wqData.freq)[code]
    t = dfP.index
    yr = pd.DatetimeIndex(t).year
    dfO1 = dfO[yr <= 2016]
    dfO2 = dfO[yr > 2016]
    dfP1 = dfP[yr <= 2016]
    dfP2 = dfP[yr > 2016]
    axplot.plotTS(axP[0], dfP1.index, [dfP1.values, dfO1.values],
                  styLst='-*', cLst='br')
    axplot.plotTS(axP[1], dfP2.index, [dfP2.values, dfO2.values],
                  styLst='-*', cLst='br')
    # axP.legend(['pred', 'obs train', 'obs test'])
    rmse, corr = utils.stat.calErr(dfP1.values, dfO1.values)
    axP[0].set_title('site {} {:.2f} {:.2f}'.format(
        siteNo, corr, corrMat[iP, 0]))
    rmse, corr = utils.stat.calErr(dfP2.values, dfO2.values)
    axP[1].set_title('site {} {:.2f} {:.2f}'.format(
        siteNo, corr, corrMat[iP, 1]))
Ejemplo n.º 14
0
def funcPoint(iP, axP):
    siteNo = siteNoLst[iP]
    print(iP, siteNo)
    dfO = waterQuality.readSiteTS(siteNo, codeLst+['00060'], freq='D')
    dfW = pd.read_csv(os.path.join(dirWrtds, 'output', siteNo),
                      index_col=None).set_index('date')
    t = dfO.index
    for k, code in enumerate(codeLst):
        ax = axP[k, 0]
        ax2 = axP[k, 1]
        axplot.plotTS(ax2, t, dfO['00060'], styLst='-', cLst='b', alpha=0.3)
        axplot.plotTS(ax, t, dfO[code], styLst='*', cLst='k')
        axplot.plotTS(ax, t, dfW[code], styLst='-', cLst='r', alpha=0.3)
    for k, code in enumerate(codeLst):
        dfTemp = dfO[[code, '00060']].dropna(how='any')
        ax = axP[k, 2]
        x = np.log(dfTemp['00060'].values)
        y = dfTemp[code].values
        c = dfTemp.index.month.values
        cs = ax.plot(x, y, 'k-', alpha=0.3)
        cs = ax.scatter(x, y, c=c)
        cbar = figP.colorbar(cs, ax=ax, cax=axP[k, 3])
Ejemplo n.º 15
0
def funcPoint(iP, axP):
    siteNo = siteNoLstCode[iP]
    outName1 = '{}-{}-{}-{}'.format(dataName, 'comb', 'QTFP_C', trainSet)
    dfL1 = basins.loadSeq(outName1, siteNo)
    dfW = pd.read_csv(os.path.join(dirWrtds, 'output', siteNo),
                      index_col=None).set_index('date')
    dfO = waterQuality.readSiteTS(siteNo, codeLst+['00060'], freq=wqData.freq)
    t = dfO.index
    # ts
    tBar = np.datetime64('2010-01-01')
    sd = np.datetime64('1980-01-01')
    legLst = ['LSTM', 'WRTDS', 'Obs']
    axplot.plotTS(axP, t, [dfL1[code], dfW[code], dfO[code]],
                  tBar=tBar, sd=sd, styLst='--*', cLst='rbk', legLst=legLst)
    corrL = corrMat[indS[iP], iCode, 0]
    corrW = corrMat[indS[iP], iCode, 1]
    axP.set_title('{} site {}; LSTM corr={:.2f} WRTDS corr={:.2f}'.format(
        shortName, siteNo, corrL, corrW))

    # axplot.titleInner(
    #     axP, 'siteNo {} {:.2f} {:.2f}'.format(siteNo, corrL, corrW))
    axP.legend()
Ejemplo n.º 16
0
def testModelSeq(outName,
                 siteNoLst,
                 wqData=None,
                 ep=None,
                 returnOut=False,
                 retest=False,
                 sd=np.datetime64('1979-01-01'),
                 ed=np.datetime64('2019-12-31')):
    # run sequence test for all sites, default to be from first date to last date
    if type(siteNoLst) is not list:
        siteNoLst = [siteNoLst]
    master = loadMaster(outName)
    if master['crit'] == 'SigmaLoss':
        doSigma = True
    else:
        doSigma = False
    if ep is None:
        ep = master['nEpoch']
    outDir = nameFolder(outName)
    sdS = pd.to_datetime(sd).strftime('%Y%m%d')
    edS = pd.to_datetime(ed).strftime('%Y%m%d')
    saveDir = os.path.join(outDir, 'seq-{}-{}-ep{}'.format(sdS, edS, ep))
    if not os.path.exists(saveDir):
        os.mkdir(saveDir)
    siteSaveLst = os.listdir(saveDir)
    if retest is True:
        sitePredLst = siteNoLst
    else:
        sitePredLst = [
            siteNo for siteNo in siteNoLst if siteNo not in siteSaveLst
        ]
    if len(sitePredLst) != 0:
        if wqData is None:
            wqData = waterQuality.DataModelWQ(master['dataName'])
        (varX, varXC, varY, varYC) = (master['varX'], master['varXC'],
                                      master['varY'], master['varYC'])
        (statX, statXC, statY, statYC) = loadStat(outName)
        model = loadModel(outName, ep=ep)
        tabG = gageII.readData(varLst=varXC, siteNoLst=siteNoLst)
        tabG = gageII.updateCode(tabG)
        for siteNo in sitePredLst:
            if 'DRAIN_SQKM' in varXC:
                area = tabG[tabG.index == siteNo]['DRAIN_SQKM'].values[0]
            else:
                area = None
            # test model
            print('testing {} from {} to {}'.format(siteNo, sdS, edS))
            freq = wqData.freq
            dfX = waterQuality.readSiteTS(siteNo,
                                          varX,
                                          freq=freq,
                                          area=area,
                                          sd=sd,
                                          ed=ed)
            # dfX = waterQuality.readSiteX(
            #     siteNo, varX, sd=sd, ed=ed, area=area, nFill=5)
            xA = np.expand_dims(dfX.values, axis=1)
            xcA = np.expand_dims(tabG.loc[siteNo].values.astype(np.float),
                                 axis=0)
            mtdX = waterQuality.extractVarMtd(varX)
            x = transform.transInAll(xA, mtdX, statLst=statX)
            mtdXC = waterQuality.extractVarMtd(varXC)
            xc = transform.transInAll(xcA, mtdXC, statLst=statXC)
            [x, xc] = trainTS.dealNaN([x, xc], master['optNaN'][:2])
            yOut = trainTS.testModel(model, x, xc)
            # transfer out
            nt = len(dfX)
            ny = len(varY) if varY is not None else 0
            nyc = len(varYC) if varYC is not None else 0
            if doSigma:
                yP = np.full([nt, ny + nyc], np.nan)
                sP = np.full([nt, ny + nyc], np.nan)
                yP[:, :ny] = wqData.transOut(yOut[:, 0, :ny * 2:2], statY,
                                             varY)
                yP[:, ny:] = wqData.transOut(yOut[:, 0, ny * 2::2], statYC,
                                             varYC)
                sP[:, :ny] = wqData.transOut(
                    np.sqrt(np.exp(yOut[:, 0, 1:ny * 2:2])), statY, varY)
                sP[:, ny:] = wqData.transOut(
                    np.sqrt(np.exp(yOut[:, 0, ny * 2 + 1::2])), statYC, varYC)
            else:
                yP = np.full([nt, ny + nyc], np.nan)
                yP[:, :ny] = wqData.transOut(yOut[:, 0, :ny], statY, varY)
                yP[:, ny:] = wqData.transOut(yOut[:, 0, ny:], statYC, varYC)
            # save output
            t = dfX.index.values.astype('datetime64[D]')
            colY = [] if varY is None else varY
            colYC = [] if varYC is None else varYC
            dfOut = pd.DataFrame(data=yP, columns=[colY + colYC], index=t)
            dfOut.index.name = 'date'
            dfOut = dfOut.reset_index()
            dfOut.to_csv(os.path.join(saveDir, siteNo), index=False)
            if doSigma:
                dfOutS = pd.DataFrame(data=sP, columns=[colY + colYC], index=t)
                dfOutS.index.name = 'date'
                dfOutS = dfOut.reset_index()
                dfOutS.to_csv(os.path.join(saveDir, siteNo + '_sigma'),
                              index=False)
    # load all csv
    if returnOut:
        dictOut = dict()
        for siteNo in siteNoLst:
            # print('loading {} from {} to {}'.format(siteNo, sdS, edS))
            dfOut = pd.read_csv(os.path.join(saveDir, siteNo))
            dictOut[siteNo] = dfOut
            if doSigma:
                dfOut = pd.read_csv(os.path.join(saveDir, siteNo + '_sigma'))
                dictOut[siteNo + '_sigma'] = dfOut
        return dictOut
Ejemplo n.º 17
0
import numpy as np
import pandas as pd
import time
import matplotlib.pyplot as plt

siteNo = '08195000'
code = '00955'
freq = 'W'
sn = 1

# load data
varF = gridMET.varLst+ntn.varLst
varC = usgs.varC
varQ = usgs.varQ
varLst = varF+varC+varQ
df = waterQuality.readSiteTS(siteNo, varLst=varLst, freq='W')

# training / testing
yr = df.index.year.values
ind1 = np.where(yr <= 2016)[0]
ind2 = np.where(yr > 2016)[0]
dfYP = pd.DataFrame(index=df.index, columns=['WRTDS', 'LSTM'])

# WRTDS
dfX = pd.DataFrame({'date': df.index}).set_index('date')
dfX = dfX.join(np.log(df['00060']+sn)).rename(
    columns={'00060': 'logQ'})
t = yr+dfX.index.dayofyear.values/365
dfX['sinT'] = np.sin(2*np.pi*t)
dfX['cosT'] = np.cos(2*np.pi*t)
x = dfX.iloc[ind1].values
Ejemplo n.º 18
0
def func(siteNo, fitAll=True):
    # prep data
    print(siteNo)
    saveName = os.path.join(dirOut, siteNo)
    if os.path.exists(saveName):
        return ()
    t0 = time.time()
    varQ = '00060'
    varLst = codeLst + [varQ]
    df = waterQuality.readSiteTS(siteNo, varLst=varLst, freq='W')
    dfYP = pd.DataFrame(index=df.index, columns=codeLst)
    dfX = pd.DataFrame({'date': df.index}).set_index('date')
    dfX = dfX.join(np.log(df[varQ] + sn)).rename(columns={varQ: 'logQ'})
    yr = dfX.index.year.values
    t = yr + dfX.index.dayofyear.values / 365
    dfX['sinT'] = np.sin(2 * np.pi * t)
    dfX['cosT'] = np.cos(2 * np.pi * t)
    dfX['yr'] = yr
    dfX['t'] = t
    xVarLst = ['yr', 'logQ', 'sinT', 'cosT']
    # train / test
    fitCodeLst = list()
    for code in codeLst:
        if siteNo in dictSite[code]:
            fitCodeLst.append(code)
    for code in fitCodeLst:
        ind1 = np.where(yr < 2010)[0]
        ind2 = np.where(yr >= 2010)[0]
        dfXY = dfX.join(np.log(df[code] + sn))
        df1 = dfXY.iloc[ind1].dropna()
        if fitAll:
            df2 = dfXY[xVarLst + ['t']].dropna()
        else:
            df2 = dfXY.iloc[ind2].dropna()  # only fit for observations now
        n = len(df1)
        if n == 0:
            break
        # calculate weight
        h = np.array([7, 2, 0.5])  # window [Y Q S] from EGRET
        tLst = df2.index.tolist()
        for t in tLst:
            dY = np.abs((df2.loc[t]['t'] - df1['t']).values)
            dQ = np.abs((df2.loc[t]['logQ'] - df1['logQ']).values)
            dS = np.min(np.stack(
                [abs(np.ceil(dY) - dY),
                 abs(dY - np.floor(dY))]),
                        axis=0)
            d = np.stack([dY, dQ, dS])
            if n > 100:
                hh = np.repeat(h[:, None], n, axis=1)
                bW = False
                while ~bW:
                    bW = np.min(np.sum((hh - d) > 0, axis=1)) > 100
                    hh = hh * 1.1 if not bW else hh
            else:
                htemp = np.max(d, axis=1) * 1.1
                hh = np.repeat(htemp[:, None], n, axis=1)
            w = (1 - (d / hh)**3)**3
            w[w < 0] = 0
            wAll = w[0] * w[1] * w[2]
            ind = np.where(wAll > 0)[0]
            ww = wAll[ind]
            # fit WLS
            Y = df1.iloc[ind][code].values
            X = df1.iloc[ind][xVarLst].values
            model = sm.WLS(Y, X, weights=ww).fit()
            xp = df2.loc[t][xVarLst].values
            yp = model.predict(xp)[0]
            dfYP.loc[t][code] = np.exp(yp) - sn
        t1 = time.time()
        print(siteNoLst.index(siteNo), siteNo, code, t1 - t0)
    saveName = os.path.join(dirOut, siteNo)
    dfYP.to_csv(saveName)
    return
Ejemplo n.º 19
0
            p = yP[-1, :, master['varY'].index(code)]
            o = wqData.c[-1, ind, ic]
        elif len(wqData.c.shape) == 2:
            p = ycP[:, master['varYC'].index(code)]
            o = wqData.c[ind, ic]
        for siteNo in dictSite[code]:
            iS = siteNoLst.index(siteNo)
            indS = info[info['siteNo'] == siteNo].index.values
            rmse, corr = utils.stat.calErr(p[indS], o[indS])
            corrMat[iS, iCode, iT] = corr
            rmseMat[iS, iCode, iT] = rmse

# seq test
for iS, siteNo in enumerate(siteNoLst):
    dfP = basins.loadSeq(outName, siteNo)
    dfO = waterQuality.readSiteTS(siteNo, codeLst, freq=wqData.freq)
    yr = pd.DatetimeIndex(dfP.index).year
    for iC, code in enumerate(codeLst):
        if siteNo in dictSite[code]:
            o1 = dfO[code].values[(yr <= 2016) & (yr >= 1980)]
            p1 = dfP[code].values[(yr <= 2016) & (yr >= 1980)]
            o2 = dfO[code].values[yr > 2016]
            p2 = dfP[code].values[yr > 2016]
            rmse1, corr1 = utils.stat.calErr(p1, o1)
            rmse2, corr2 = utils.stat.calErr(p2, o2)
            corrMat[iS, iC, 2] = corr1
            corrMat[iS, iC, 3] = corr2
            rmseMat[iS, iC, 2] = rmse1
            rmseMat[iS, iC, 3] = rmse2

# plot box
Ejemplo n.º 20
0
        dictLSTMLst.append(dictLSTM)
    # WRTDS
    dictWRTDS = dict()
    dirWRTDS = os.path.join(kPath.dirWQ, 'modelStat',
                            'WRTDS-W', 'B10', 'output')
    for k, siteNo in enumerate(siteNoLst):
        print('\t site {}/{}'.format(k, len(siteNoLst)), end='\r')
        saveFile = os.path.join(dirWRTDS, siteNo)
        df = pd.read_csv(saveFile, index_col=None).set_index('date')
        # df = utils.time.datePdf(df)
        dictWRTDS[siteNo] = df
    # Observation
    dictObs = dict()
    for k, siteNo in enumerate(siteNoLst):
        print('\t site {}/{}'.format(k, len(siteNoLst)), end='\r')
        df = waterQuality.readSiteTS(
            siteNo, varLst=['00060']+codeLst, freq='W')
        dictObs[siteNo] = df

    # calculate correlation
    tt = np.datetime64('2010-01-01')
    t0 = np.datetime64('1980-01-01')
    indT1 = np.where((df.index.values < tt) & (df.index.values >= t0))[0]
    indT2 = np.where(df.index.values >= tt)[0]
    dictLSTM = dictLSTMLst[0]
    corrMat = np.full([len(siteNoLst), len(codeLst), 3], np.nan)
    rmseMat = np.full([len(siteNoLst), len(codeLst), 3], np.nan)
    for ic, code in enumerate(codeLst):
        for siteNo in dictSite[code]:
            indS = siteNoLst.index(siteNo)
            v1 = dictLSTM[siteNo][code].iloc[indT2].values
            v2 = dictWRTDS[siteNo][code].iloc[indT2].values
Ejemplo n.º 21
0
import pandas as pd
import time
import matplotlib.pyplot as plt
from hydroDL.new.model import flowPath

siteNo = '07060710'
code = '00955'
freq = 'D'
sn = 1

# load data
varF = gridMET.varLst+ntn.varLst
varC = usgs.varC
varQ = usgs.varQ
varLst = varF+varC+varQ
df = waterQuality.readSiteTS(siteNo, varLst=varLst)

# plot data
fig, axes = plt.subplots(3, 1, figsize=(16, 6))
axplot.plotTS(axes[0], df.index, df['runoff'].values, styLst='-*', cLst='bgr')
axplot.plotTS(axes[1], df.index, df['pr'].values, styLst='-*', cLst='bgr')
axplot.plotTS(axes[2], df.index, df[code].values, styLst='*', cLst='bgr')
fig.show()

# training / testing
yrTrain = [2000, 2005]
yr = df.index.year.values
indTrain = np.where((yr >= yrTrain[0]) & (yr < yrTrain[1]))[0]

# data
# varX = varF
Ejemplo n.º 22
0
# outName = 'sbWT-00945-ntnS-00945-Y1'
outName = 'sbWT-00945-plain-00945-Y1'
dataName = 'sbWT'
wqData = waterQuality.DataModelWQ(dataName)
code = '00945'
siteNoLst = dictSite[code]
ep = None
retest = True
basins.testModelSeq(outName, siteNoLst, wqData=wqData)
rmseMat = np.ndarray([len(siteNoLst), 2])
corrMat = np.ndarray([len(siteNoLst), 2])
for k, siteNo in enumerate(siteNoLst):
    dfP = basins.loadSeq(outName, siteNo)
    dfO = waterQuality.readSiteTS(siteNo,
                                  dfP.columns.tolist(),
                                  freq=wqData.freq)
    codeLst = dfP.columns.tolist()
    codeLst = ['00945']
    sd = np.datetime64('1980-01-01')
    ed = np.datetime64('2020-12-31')
    dfP = dfP[dfP.index >= sd]
    dfO = dfO[dfO.index >= sd]
    yr = pd.DatetimeIndex(dfP.index).year
    dfP1 = dfP[yr % 2 == 1]
    dfO1 = dfO[yr % 2 == 1]
    dfP2 = dfP[yr % 2 == 0]
    dfO2 = dfO[yr % 2 == 0]
    rmse1, corr1 = utils.stat.calErr(dfP1[code].values, dfO1[code].values)
    rmse2, corr2 = utils.stat.calErr(dfP2[code].values, dfO2[code].values)
    rmseMat[k, :] = [rmse1, rmse2]
Ejemplo n.º 23
0
dirWRTDS = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS-W', 'B10')
dirOut = os.path.join(dirWRTDS, 'output')
dirPar = os.path.join(dirWRTDS, 'params')

dfCorr1 = df.copy()
dfCorr2 = df.copy()
dfRmse1 = df.copy()
dfRmse2 = df.copy()
t0 = time.time()
for kk, siteNo in enumerate(siteNoLst):
    print('{}/{} {:.2f}'.format(kk, len(siteNoLst), time.time() - t0))
    saveFile = os.path.join(dirOut, siteNo)
    dfP = pd.read_csv(saveFile, index_col=None).set_index('date')
    dfP.index = pd.to_datetime(dfP.index)
    dfC = waterQuality.readSiteTS(siteNo, varLst=usgs.newC, freq='W')
    yr = dfC.index.year.values
    for code in usgs.newC:
        ind1 = np.where(yr < 2010)[0]
        ind2 = np.where(yr >= 2010)[0]
        rmse1, corr1 = utils.stat.calErr(dfP.iloc[ind1][code].values,
                                         dfC.iloc[ind1][code].values)
        rmse2, corr2 = utils.stat.calErr(dfP.iloc[ind2][code].values,
                                         dfC.iloc[ind2][code].values)
        dfCorr1.loc[siteNo][code] = corr1
        dfRmse1.loc[siteNo][code] = rmse1
        dfCorr2.loc[siteNo][code] = corr2
        dfRmse2.loc[siteNo][code] = rmse2

dfCorr1.to_csv(os.path.join(dirWRTDS, '{}-{}-corr'.format(trainSet, trainSet)))
dfRmse1.to_csv(os.path.join(dirWRTDS, '{}-{}-rmse'.format(trainSet, trainSet)))
Ejemplo n.º 24
0
        dictLSTM[siteNo] = df
    dictLSTMLst.append(dictLSTM)
# WRTDS
dictWRTDS = dict()
dirWRTDS = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS-W', 'B10', 'output')
for k, siteNo in enumerate(siteNoLst):
    print('\t site {}/{}'.format(k, len(siteNoLst)), end='\r')
    saveFile = os.path.join(dirWRTDS, siteNo)
    df = pd.read_csv(saveFile, index_col=None).set_index('date')
    # df = utils.time.datePdf(df)
    dictWRTDS[siteNo] = df
# Observation
dictObs = dict()
for k, siteNo in enumerate(siteNoLst):
    print('\t site {}/{}'.format(k, len(siteNoLst)), end='\r')
    df = waterQuality.readSiteTS(siteNo, varLst=codeLst, freq='W')
    dictObs[siteNo] = df

# calculate correlation
tt = np.datetime64('2010-01-01')
ind1 = np.where(df.index.values < tt)[0]
ind2 = np.where(df.index.values >= tt)[0]
dictLSTM = dictLSTMLst[1]
dictLSTM2 = dictLSTMLst[0]
corrMat = np.full([len(siteNoLst), len(codeLst), 4], np.nan)
rmseMat = np.full([len(siteNoLst), len(codeLst), 4], np.nan)
for ic, code in enumerate(codeLst):
    for siteNo in dictSite[code]:
        indS = siteNoLst.index(siteNo)
        v1 = dictLSTM[siteNo][code].iloc[ind2].values
        v2 = dictWRTDS[siteNo][code].iloc[ind2].values
Ejemplo n.º 25
0
dirOut = os.path.join(dirRoot, 'output')
dirPar = os.path.join(dirRoot, 'params')
for folder in [dirRoot, dirOut, dirPar]:
    if not os.path.exists(folder):
        os.mkdir(folder)

colLst = ['count', 'pSinT', 'pCosT', 'b']
dfPar = pd.DataFrame(index=siteNoLst, columns=colLst)

for kk, siteNo in enumerate(siteNoLst):
    print('{}/{} {:.2f}'.format(
        kk, len(siteNoLst), time.time()-t0))
    saveName = os.path.join(dirOut, siteNo)
    # if os.path.exists(saveName):
    #     continue
    df = waterQuality.readSiteTS(siteNo, varLst=['00060'], freq='D')
    dfX = pd.DataFrame({'date': df.index}).set_index('date')
    yr = dfX.index.year.values
    t = yr+dfX.index.dayofyear.values/365
    dfX['sinT'] = np.sin(2*np.pi*t)
    dfX['cosT'] = np.cos(2*np.pi*t)
    x = dfX.values
    y = np.log(df['00060'].values+sn)
    [xx, yy], iv = utils.rmNan([x, y])
    if len(xx) > 0:
        lrModel = LinearRegression()
        lrModel = lrModel.fit(xx, yy)
        yp = lrModel.predict(dfX.values)
        # yp = np.exp(yp)-sn
        dfYP = pd.DataFrame(index=df.index, columns=[
                            '00060'], data=np.exp(yp)-1)
Ejemplo n.º 26
0
dictS = dict()
dirS = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS-DS', 'All', 'output')
dictQ = dict()
dirQ = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS-DQ', 'All', 'output')
for dirTemp, dictTemp in zip([dirL, dirS, dirQ], [dictL, dictS, dictQ]):
    for k, siteNo in enumerate(siteNoLst):
        print('\t WRTDS site {}/{}'.format(k, len(siteNoLst)), end='\r')
        saveFile = os.path.join(dirTemp, siteNo)
        df = pd.read_csv(saveFile, index_col=None).set_index('date')
        dictTemp[siteNo] = df

dictObs = dict()
for k, siteNo in enumerate(siteNoLst):
    print('\t USGS site {}/{}'.format(k, len(siteNoLst)), end='\r')
    df = waterQuality.readSiteTS(siteNo,
                                 varLst=['00060'] + codeLst,
                                 freq='D',
                                 rmFlag=True)
    dictObs[siteNo] = df

# calculate rsq
rMat = np.full([len(siteNoLst), len(codeLst), 2], np.nan)
for ic, code in enumerate(codeLst):
    for siteNo in dictSite[code]:
        indS = siteNoLst.index(siteNo)
        v1 = dictL[siteNo][code].values
        v2 = dictS[siteNo][code].values
        v0 = dictObs[siteNo][code].values
        (vv0, vv1, vv2), indV = utils.rmNan([v0, v1, v2])
        rmse1, corr1 = utils.stat.calErr(vv1, vv0)
        rmse2, corr2 = utils.stat.calErr(vv2, vv0)
        rMat[indS, ic, 0] = corr1**2
Ejemplo n.º 27
0
fig.show()

# prcp
t = np.arange('2000-01-01', '2005-01-01', dtype='datetime64[D]')
x = (t - np.datetime64('1990-01-01')).astype(np.float)
p = 10 * np.cos(x*2*np.pi/365) +\
    10 * np.cos((x+120)*np.pi/365*4)
p[p < 0] = 0
fig, ax = plt.subplots(1, 1, figsize=(12, 6))
ax.plot(t, p)
fig.show()

# # prcp - real world
siteNo = '401733105392404'
code = '00955'
dfO = waterQuality.readSiteTS(siteNo, ['runoff', 'pr', code])
t = dfO.index.values
p = dfO['pr'].values
q = dfO['runoff'].values
fig, axes = plt.subplots(3, 1, figsize=(12, 6))
axes[0].plot(t, p)
axes[1].plot(t, dfO['runoff'].values)
axes[2].plot(t, dfO[code].values, '*')
fig.show()
fig, ax = plt.subplots(1, 1, figsize=(6, 4))
ax.plot(dfO['runoff'].values, dfO[code].values, '*')
fig.show()

# calculate concentration curve
nf = len(kLst)
rho = 365
Ejemplo n.º 28
0
import pandas as pd
from sklearn.linear_model import LinearRegression

siteNo = '01545600'
code = '00955'
dataName = 'nbW'
labelLst = ['QF_C', 'QFP_C']
trainSet = '{}-B16'.format('comb')

# WRTDS
varF = gridMET.varLst
varP = ntn.varLst[2:3]
varQ = '00060'
varLst = ['00060', '00955']+varF+varP
varX = varF+varP
df = waterQuality.readSiteTS(siteNo, varLst=varLst, freq='W')
dfX = pd.DataFrame({'date': df.index}).set_index('date')
sn = 1
dfX = dfX.join(np.log(df[varQ]+sn)).rename(
    columns={varQ: 'logQ'})
dfX = dfX.join(df[varP])
yr = dfX.index.year.values
t = yr+dfX.index.dayofyear.values/365
dfX['sinT'] = np.sin(2*np.pi*t)
dfX['cosT'] = np.cos(2*np.pi*t)
ind = np.where(yr < 2010)[0]
dfYP = pd.DataFrame(index=df.index, columns=[code])
dfYP.index.name = 'date'
dfXN = (dfX-dfX.min())/(dfX.max()-dfX.min())
# dfXN = dfX
x = dfXN.iloc[ind].values
Ejemplo n.º 29
0
from hydroDL.app import waterQuality
from hydroDL.data import usgs
import numpy as np
import pandas as pd
from hydroDL.post import axplot, figplot
import matplotlib.pyplot as plt

siteNo = '09163500'
varC = ['00660', '00618']

sd = np.datetime64('1979-01-01')
ed = np.datetime64('2019-12-31')

df = waterQuality.readSiteTS(siteNo, varLst=['00060'] + varC)

dfC, dfCF = usgs.readSample(siteNo, codeLst=varC, startDate=sd, flag=2)

#
fig, axes = plt.subplots(2, 1)
for k, code in enumerate(varC):
    v = dfC[code].values
    f = dfCF[code + '_cd'].values
    t = dfC.index.values
    indF = np.where(f == 1)[0]
    axplot.plotTS(axes[k], t, v, cLst='r', styLst=['-*'])
    axplot.plotTS(axes[k], t[indF], v[indF], cLst='b', styLst='*')
fig.show()
Ejemplo n.º 30
0
from hydroDL import kPath, utils
from hydroDL.app import waterQuality
from hydroDL.master import basins
from hydroDL.data import usgs, gageII, gridMET, ntn, transform
from hydroDL.master import slurm
from hydroDL.post import axplot, figplot
import numpy as np
import matplotlib.pyplot as plt
code = '00660'

siteNo = '01111500'
df = waterQuality.readSiteTS(siteNo, [code], freq='D').dropna()
dfC, dfCF = usgs.readSample(siteNo, codeLst=[code], flag=2)
dfC = dfC.resample('W-TUE').mean()
dfCF = dfCF.fillna(0)
dfCFW = dfCF.resample('W-TUE').mean()
dfCFW = dfCFW.fillna(0)
dfCFW[dfCFW != 0] = 1
fig, ax = plt.subplots(1, 1, figsize=(12, 4))
t = dfC.index
v = dfC[code].values
flag = dfCFW[code+'_cd'].values
ax.plot(t[flag == 0], v[flag == 0], 'r*')
ax.plot(t[flag != 0], v[flag != 0], 'k*')
fig.show()