Beispiel #1
0
def funcPoint(iP, axP):
    siteNo = siteNoLstP[iP]
    dfPred1, _ = basins.loadSeq(outLst[0], siteNo)
    dfPred2, _ = basins.loadSeq(outLst[1], siteNo)
    sd = np.datetime64('1980-01-01')
    dfQ = waterQuality.readSiteY(siteNo, ['00060'], sd=sd)
    dfC = waterQuality.readSiteY(siteNo,
                                 codeSel + [code + '_cd' for code in codeSel],
                                 sd=sd)
    dfPred1 = dfPred1[dfPred1.index >= sd]
    dfPred2 = dfPred2[dfPred2.index >= sd]
    dfPred1 = dfPred1.multiply(dfPred1['00060'], axis='index')
    dfPred2 = dfPred2.multiply(dfPred2['00060'], axis='index')
    dfC[codeSel] = dfC[codeSel].multiply(dfQ['00060'], axis='index')

    t = dfPred1.index.values.astype(np.datetime64)
    # axplot.plotTS(axP[0], t, [dfPred1['00060'], dfQ['00060']], tBar=tBar,
    #               legLst=['pred-opt1', 'obs'], styLst='--', cLst='br')
    # axP[0].set_title('{} streamflow'.format(siteNo))
    for k, var in enumerate(codeSel):
        shortName = codePdf.loc[var]['shortName']
        title = '{} {} {}'.format(siteNo, shortName, var)
        styLst = ['-', '-', '*', '*', '*', '*']
        legLst = [
            'model odd', 'model even', 'obs odd', 'obs even', 'flag even',
            'flag odd'
        ]
        yr = dfC.index.year
        c1 = dfC[var].values.copy()
        c2 = dfC[var].values.copy()
        f1 = dfC[var].values.copy()
        f2 = dfC[var].values.copy()
        vf = dfC[var + '_cd'].values
        c1[(vf != 'x') & (vf != 'X')] = np.nan
        c1[(yr % 2 == 0)] = np.nan
        c2[(vf != 'x') & (vf != 'X')] = np.nan
        c2[(yr % 2 == 1)] = np.nan
        f1[(vf == 'x') | (vf == 'X') | (yr % 2 == 0)] = np.nan
        f2[(vf == 'x') | (vf == 'X') | (yr % 2 == 1)] = np.nan
        data = [dfPred1[var].values, dfPred2[var].values, c1, c2, f1, f2]
        axplot.plotTS(axP[k],
                      t,
                      data,
                      styLst=styLst,
                      cLst='bgrmkk',
                      legLst=legLst)
        axP[k].set_title(title)
Beispiel #2
0
def funcPoint(iP, axP):
    siteNo = siteNoLst[iP]
    dfC = waterQuality.readSiteY(siteNo, [code])
    t = dfC.index.values.astype(np.datetime64)
    tBar = np.datetime64('2000-01-01')
    axplot.plotTS(axP, t, dfC[code], styLst='*', tBar=tBar)
    n1 = dfC[dfC[code].index < tBar].count().values
    n2 = dfC[dfC[code].index >= tBar].count().values
    axP.set_title('{} #samples = {} {}'.format(siteNo, n1, n2))
Beispiel #3
0
def funcPoint(iP, axP):
    siteNo = siteNoLstP[iP]
    dfPred1, _ = basins.loadSeq(outName, siteNo, ep=ep)
    dfPred2 = pd.read_csv(os.path.join(kPath.dirWQ, 'modelStat',
                                       'WRTDS', 'Yodd', siteNo), index_col=None)
    ctR = pd.date_range(pd.datetime(1979, 1, 1), pd.datetime(2020, 1, 1))
    dfPred2.index = ctR
    dfPred2.index.name = 'date'
    sd = np.datetime64('1980-01-01')
    dfQ = waterQuality.readSiteY(siteNo, ['00060'], sd=sd)
    dfC = waterQuality.readSiteY(
        siteNo, codeSel+[code+'_cd' for code in codeSel], sd=sd)
    dfPred1 = dfPred1[dfPred1.index >= sd]
    dfPred2 = dfPred2[dfPred2.index >= sd]
    t = dfPred1.index.values.astype(np.datetime64)
    # axplot.plotTS(axP[0], t, [dfPred1['00060'], dfQ['00060']], tBar=tBar,
    #               legLst=['pred-opt1', 'obs'], styLst='--', cLst='br')
    # axP[0].set_title('{} streamflow'.format(siteNo))
    for k, var in enumerate(codeSel):
        shortName = codePdf.loc[var]['shortName']
        title = '{} {} {}'.format(siteNo, shortName, var)
        styLst = ['-', '-', '*', '*', '*', '*']
        legLst = ['LSTM', 'WRTDS', 'obs odd',
                  'obs even', 'flag even', 'flag odd']
        yr = dfC.index.year
        c1 = dfC[var].values.copy()
        c2 = dfC[var].values.copy()
        f1 = dfC[var].values.copy()
        f2 = dfC[var].values.copy()
        vf = dfC[var+'_cd'].values
        c1[(vf != 'x') & (vf != 'X')] = np.nan
        c1[(yr % 2 == 0)] = np.nan
        c2[(vf != 'x') & (vf != 'X')] = np.nan
        c2[(yr % 2 == 1)] = np.nan
        f1[(vf == 'x') | (vf == 'X') | (yr % 2 == 0)] = np.nan
        f2[(vf == 'x') | (vf == 'X') | (yr % 2 == 1)] = np.nan
        data = [dfPred1[var].values, dfPred2[var].values, c1, c2, f1, f2]
        axplot.plotTS(axP[k], t, data, styLst=styLst, cLst='bgrmkk',
                      legLst=legLst)
        axP[k].set_title(title)
def funcPoint(iP, axP):
    siteNo = siteNoLstP[iP]
    tBar = np.datetime64('2000-01-01')
    dfPred1, _ = basins.loadSeq(outLst[0], siteNo)
    dfPred2, _ = basins.loadSeq(outLst[1], siteNo)
    sd = np.datetime64('1980-01-01')
    dfQ = waterQuality.readSiteY(siteNo, ['00060'], sd=sd)
    dfC = waterQuality.readSiteY(siteNo,
                                 codeSel + [code + '_cd' for code in codeSel],
                                 sd=sd)
    dfPred1 = dfPred1[dfPred1.index >= sd]
    dfPred2 = dfPred2[dfPred2.index >= sd]
    t = dfPred1.index.values.astype(np.datetime64)
    axplot.plotTS(axP[0],
                  t, [dfPred1['00060'], dfQ['00060']],
                  tBar=tBar,
                  legLst=['pred-opt1', 'obs'],
                  styLst='--',
                  cLst='br')
    axP[0].set_title('{} streamflow'.format(siteNo))
    for k, var in enumerate(codeSel):
        shortName = codePdf.loc[var]['shortName']
        title = ' {} {}'.format(shortName, var)
        styLst = ['-', '-', '*', '*']
        vc = dfC[var].values.copy()
        vf = dfC[var + '_cd'].values
        vcf = dfC[var].values.copy()
        vcf[(vf == 'x') | (vf == 'X')] = np.nan
        data = [dfPred1[var].values, dfPred2[var].values, vc, vcf]
        axplot.plotTS(axP[k + 1],
                      t,
                      data,
                      tBar=tBar,
                      legLst=['pred', 'pred-rmFlag', 'obs', 'obs-flag'],
                      styLst=styLst,
                      cLst='bgrk')
        axP[k + 1].set_title(title)
Beispiel #5
0
def funcPoint(iP, axP):
    siteNo = siteNoLst[iP]
    dfY = waterQuality.readSiteY(siteNo, ['00955'])
    dfY = dfY.dropna()
    dfX = waterQuality.readSiteX(siteNo, varX)
    t = dfY.index
    y = dfY['00955'].values
    corrMat = np.zeros([nt, nx])
    for k in range(nt):
        x = dfX.loc[t.values - np.timedelta64(k, 'D')].values
        ind = np.where(~np.isnan(x))[0]
        for i in range(nx):
            corrMat[k, i] = np.corrcoef(x[ind, i], y[ind])[0, 1]
    axP[0].plot(dfX['00060'], '-b', label='streamflow')
    axP[1].plot(dfY, '-*r', label='silica')
    axP[2].plot(np.arange(nt), corrMat[:, 1].T, '-*')
    axP[2].set_ylabel('correlation')
    axP[2].set_xlabel('lag day')
Beispiel #6
0
import importlib

import pandas as pd
import numpy as np
import os
import time

import scipy.signal as signal

wqData = waterQuality.DataModelWQ('Silica64')
siteNoLst = wqData.siteNoLst

for siteNo in siteNoLst:
    print(siteNo)
    dfObs = waterQuality.readSiteY(siteNo, ['00955'])
    # rm outlier
    df = dfObs[dfObs['00955'].notna().values]
    y = df['00955'].values
    yV = y[y < np.percentile(y, 99)]
    yV = yV[yV > np.percentile(y, 1)]
    ul = np.mean(yV) + np.std(yV) * 5
    dfObs[dfObs['00955'] > ul] = np.nan
    # fourier
    df = dfObs[dfObs.notna().values]
    tt = dfObs.index.values
    xx = (tt.astype('datetime64[D]') - np.datetime64('1979-01-01')).astype(
        np.float)
    t = df.index.values
    x = (t.astype('datetime64[D]') - np.datetime64('1979-01-01')).astype(
        np.float)
Beispiel #7
0
def funcPoint(iP, axP):
    siteNo = siteNoLst[iP]
    dfC = waterQuality.readSiteY(siteNo, [code])
    t = dfC.index.values.astype(np.datetime64)
    axplot.plotTS(axP, t, dfC[code], styLst='*')
    axP.set_title('{} #samples = {}'.format(siteNo, dfC.count().values))
Beispiel #8
0
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn as nn
from hydroDL.model import rnn, crit
import os

siteNo = '01434025'
# siteNo = '01364959'
codeLst = ['00915', '00940', '00955']

varX = gridMET.varLst
varY = ['00060']
dfX = waterQuality.readSiteX(siteNo, varX)
dfY = waterQuality.readSiteY(siteNo, varY)

mtdX = waterQuality.extractVarMtd(varX)
normX, statX = transform.transInAll(dfX.values, mtdX)
dfXN = pd.DataFrame(data=normX, index=dfX.index, columns=dfX.columns)
mtdY = waterQuality.extractVarMtd(varY)
normY, statY = transform.transInAll(dfY.values, mtdY)
dfYN = pd.DataFrame(data=normY, index=dfY.index, columns=dfY.columns)

matX1 = dfXN[dfXN.index < np.datetime64('2000-01-01')].values
matY1 = dfYN[dfYN.index < np.datetime64('2000-01-01')].values
matX2 = dfXN[dfXN.index >= np.datetime64('2000-01-01')].values
matY2 = dfYN[dfYN.index >= np.datetime64('2000-01-01')].values
matX = dfXN.values
matY = dfYN.values
Beispiel #9
0
def loadSeq(siteNo, varY, model, optX='F', optT='Y8090', order=(5, 0, 5)):
    if model == 'ARMA':
        dirAR = os.path.join(kPath.dirWQ, 'modelStat', 'ARMA')
        strOrder = '-'.join([str(k) for k in order])
        saveFolderName = '{}-{}-{}-{}'.format(optX, optT, varY, strOrder)
        saveFolder = os.path.join(dirAR, saveFolderName)
    elif model == 'LR':
        dirLR = os.path.join(kPath.dirWQ, 'modelStat', 'LR')
        saveFolderName = '{}-{}-{}'.format(optX, optT, varY)
        saveFolder = os.path.join(dirLR, saveFolderName)
    else:
        raise Exception('model {} invalid!'.format(model))
    predFile = os.path.join(saveFolder, siteNo)
    if not os.path.exists(saveFolder):
        os.mkdir(saveFolder)

    if os.path.exists(predFile):
        dfP = pd.read_csv(predFile, index_col=None)
        dfP = utils.time.datePdf(dfP)
    else:
        if optX == 'F':
            varX = gridMET.varLst
        elif optX == 'QF':
            varX = ['00060'] + gridMET.varLst
        else:
            raise Exception('optX {} invalid!'.format(optX))
        dfX = waterQuality.readSiteX(siteNo, varX)
        dfY = waterQuality.readSiteY(siteNo, [varY])
        # normalize
        mtdX = waterQuality.extractVarMtd(varX)
        normX, statX = transform.transInAll(dfX.values, mtdX)
        dfXN = pd.DataFrame(data=normX, index=dfX.index, columns=dfX.columns)
        mtdY = waterQuality.extractVarMtd([varY])
        normY, statY = transform.transInAll(dfY.values, mtdY)
        dfYN = pd.DataFrame(data=normY, index=dfY.index, columns=dfY.columns)
        if optT == 'Y8090':
            dfXT = dfXN[dfXN.index < np.datetime64('2000-01-01')]
            dfYT = dfYN[dfYN.index < np.datetime64('2000-01-01')]
        elif optT == 'Y0010':
            dfXT = dfXN[dfXN.index >= np.datetime64('2000-01-01')]
            dfYT = dfYN[dfYN.index >= np.datetime64('2000-01-01')]
        else:
            raise Exception('optT {} invalid!'.format(optT))

        # train and test
        if model == 'ARMA':
            dfPN, resT = trainARMA(dfXT, dfYT, dfXN, dfYN, order)
        if model == 'LR':
            dfPN = trainLR(dfXT, dfYT, dfXN, dfYN)
        yP = transform.transOut(dfPN.values, mtdY[0], statY[0])
        dfP = pd.DataFrame(data=yP, index=dfYN.index, columns=dfYN.columns)

        # save result, model, stat
        dfP.reset_index().to_csv(predFile, index=False)
        statFile = os.path.join(saveFolder, siteNo + '_stat.json')
        with open(statFile, 'w') as fp:
            json.dump(dict(statX=statX, statY=statY), fp, indent=4)
        # save model
        # if model == 'ARMA':
        #     modelFile = os.path.join(saveFolder, siteNo+'_model.p')
        #     resT.save(modelFile)
    return dfP
Beispiel #10
0
code = '00955'

# silica num > 100 in both training and testing (named silica64)
siteNoLst = df0[(df1[code] > 100) & (df2[code] > 100)].index.tolist()
if not waterQuality.exist('Silica64'):
    wqData = waterQuality.DataModelWQ.new('Silica64', siteNoLst)
wqData = waterQuality.DataModelWQ('Silica64')
indYr1 = waterQuality.indYr(wqData.info, yrLst=[1979, 2000])[0]
# wqData.saveSubset('Y8090', indYr1)
indYr2 = waterQuality.indYr(wqData.info, yrLst=[2000, 2020])[0]
# wqData.saveSubset('Y0010', indYr2)

# subset only have silica
ic = wqData.varC.index(code)
indC = np.where(~np.isnan(wqData.c[:, ic]))[0]
wqData.saveSubset(code, indC)
indYr1 = waterQuality.indYr(wqData.info.iloc[indC], yrLst=[1979, 2000])[0]
# wqData.saveSubset('{}-Y8090'.format(code), indYr1)
indYr2 = waterQuality.indYr(wqData.info.iloc[indC], yrLst=[2000, 2020])[0]
# wqData.saveSubset('{}-Y0010'.format(code), indYr2)

figP, axP = plt.subplots(5, 1, figsize=(8, 6))
for k in range(5):
    kk = k + 5
    siteNo = siteNoLstAll[ind[kk]]
    dfC = waterQuality.readSiteY(siteNo, [code])
    t = dfC.index.values.astype(np.datetime64)
    axplot.plotTS(axP[k], t, dfC['00955'], styLst='*')
    axP[k].set_title('{} #samples = {}'.format(siteNo, dfC.count().values[0]))
figP.show()
Beispiel #11
0
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import statsmodels.api as sm
from scipy import stats
import scipy

siteNo = '401733105392404'
# siteNo = '01364959'
codeLst = ['00915', '00955']

varX = gridMET.varLst
varY = ['00060']
dfX = waterQuality.readSiteX(siteNo, varX)
dfY = waterQuality.readSiteY(siteNo, varY)
dfC = waterQuality.readSiteY(siteNo, codeLst)
x = dfX['pr'].values
xA = dfX.values
y = dfY['00060'].values
nt = len(x)
rho = 365
matX = np.ones([nt - rho, rho + 7])
for k in range(rho):
    matX[:, k] = x[k:nt - rho + k]
for k in range(5):
    matX[:, rho + k] = xA[rho:, k + 2]

matY = y[rho:]

indV = np.where(~np.isnan(matY))[0]
Beispiel #12
0
siteNoLstAll = pd.read_csv(fileSiteNo, header=None, dtype=str)[0].tolist()
codeLst = sorted(usgs.codeLst)

doLst = list()
# doLst.append('calCount')
# doLst.append('calCountCorr')

if 'calCount' in doLst:
    # calculate number of samples (all, B2000, A2000)
    df0 = pd.DataFrame(index=siteNoLstAll, columns=codeLst)
    df1 = pd.DataFrame(index=siteNoLstAll, columns=codeLst)
    df2 = pd.DataFrame(index=siteNoLstAll, columns=codeLst)
    tBar = np.datetime64('2000-01-01')
    for k, siteNo in enumerate(siteNoLstAll):
        print(k)
        dfC = waterQuality.readSiteY(siteNo, codeLst)
        df0.loc[siteNo] = dfC.count()
        df1.loc[siteNo] = dfC[dfC.index < tBar].count()
        df2.loc[siteNo] = dfC[dfC.index >= tBar].count()
    df0.to_csv(os.path.join(dirInv, 'codeCount.csv'))
    df1.to_csv(os.path.join(dirInv, 'codeCount_B2000.csv'))
    df2.to_csv(os.path.join(dirInv, 'codeCount_A2000.csv'))

if 'calCount' in doLst:
    # find out two variables (hopefully one rock one bio) that are most related
    df0 = pd.read_csv(os.path.join(dirInv, 'codeCount.csv'),
                    dtype={'siteNo': str}, index_col='siteNo')
    df1 = pd.read_csv(os.path.join(dirInv, 'codeCount_B2000.csv'),
                    dtype={'siteNo': str}, index_col='siteNo')
    df2 = pd.read_csv(os.path.join(dirInv, 'codeCount_A2000.csv'),
                    dtype={'siteNo': str}, index_col='siteNo')