Example #1
0
from hydroDL.data import gageII, usgs, gridMET
from hydroDL.master import basins

import pandas as pd
import numpy as np
import os
import time

caseLst = list()
dataName = 'Silica64Mess'
subsetLst = ['Y8090', 'Y0010']
codeLst = ['00955']
for subset in subsetLst:
    saveName = '{}-{}-opt1'.format(dataName, subset)
    caseName = basins.wrapMaster(dataName=dataName,
                                 trainName=subset,
                                 batchSize=[None, 200],
                                 outName=saveName)
    caseLst.append(caseName)
    saveName = '{}-{}-opt2'.format(dataName, subset)
    caseName = basins.wrapMaster(dataName=dataName,
                                 trainName=subset,
                                 batchSize=[None, 200],
                                 varY=None,
                                 varX=usgs.varQ + gridMET.varLst,
                                 outName=saveName)
    caseLst.append(caseName)
    # saveName = '{}-{}-opt3'.format(dataName, subset)
    # caseName = basins.wrapMaster(dataName=dataName, trainName=subset,
    #                              batchSize=[None, 200], varY=None, outName=saveName)
    # caseLst.append(caseName)
    # saveName = '{}-{}-opt4'.format(dataName, subset)
Example #2
0
]

# ntn variables
dataName = 'sbWT'
caseLst = list()
wqData = waterQuality.DataModelWQ(dataName)
codeLst = varNtnUsgsLst
label = 'ntnSq'
for code in codeLst:
    varX = ['00060'] + gridMET.varLst + \
        [varNtnLst[varNtnUsgsLst.index(code)], 'distNTN']
    varY = [code]
    varYC = None
    subsetLst = ['{}-Y{}'.format(code, x) for x in [1, 2]]
    # wrap up
    # for subset in subsetLst:
    subset = subsetLst[0]
    saveName = '{}-{}-{}-{}'.format(dataName, code, label, subset)
    caseName = basins.wrapMaster(dataName=dataName,
                                 trainName=subset,
                                 batchSize=[None, 100],
                                 outName=saveName,
                                 varX=varX,
                                 varY=varY,
                                 varYC=varYC)
    caseLst.append(caseName)

cmdP = 'python /home/users/kuaifang/GitHUB/geolearn/app/waterQual/model/cmdTrain.py -M {}'
for caseName in caseLst:
    slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=24)
Example #3
0
# wqData = waterQuality.DataModelWQ('Silica64')
# siteNoLst = wqData.siteNoLst
# if not waterQuality.exist('Silica64Seq'):
#     wqData = waterQuality2.DataModelWQ.new('Silica64Seq', siteNoLst)
# importlib.reload(waterQuality2)
# wqData = waterQuality2.DataModelWQ('Silica64Seq')
temp = waterQuality.DataModelWQ('Silica64')
siteNoLst = temp.siteNoLst
# wqData = waterQuality2.DataModelWQ.new('Silica64Seq', siteNoLst)

wqData = waterQuality2.DataModelWQ('Silica64Seq')

# subset only have silica
code = '00955'
ic = wqData.varQ.index(code)
indC = np.where(~np.isnan(wqData.q[-1,:, ic]))[0]
wqData.saveSubset(code, indC)
indYr1 = waterQuality.indYr(wqData.info.iloc[indC], yrLst=[1979, 2000])[0]
wqData.saveSubset('{}-Y8090'.format(code), indYr1)
indYr2 = waterQuality.indYr(wqData.info.iloc[indC], yrLst=[2000, 2020])[0]
wqData.saveSubset('{}-Y0010'.format(code), indYr2)

saveName = 'Silica64Seq-Y8090'
caseName = basins.wrapMaster(dataName='Silica64Seq', trainName='00955-Y8090',
                             batchSize=[None, 200], varY=['00060','00955'], varYC=None,
                             outName=saveName)


cmdP = 'python /home/users/kuaifang/GitHUB/geolearn/app/waterQual/model/cmdTrain.py -M {}'
slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=6)
Example #4
0
indYrO, indYrE = waterQuality.indYrOddEven(wqData.info)
wqData.saveSubset('Yodd', indYrO)
wqData.saveSubset('Yeven', indYrE)
codeLst = ['00945', '00935']
# subsetLst = ['Yodd', 'Yeven']
subsetLst = ['Yodd']

varXC = ['DRAIN_SQKM', 'SNOW_PCT_PRECIP', 'STREAMS_KM_SQ_KM', 'PCT_1ST_ORDER',
         'BFI_AVE', 'CONTACT', 'FORESTNLCD06', 'HLR_BAS_DOM_100M', 'ELEV_MEAN_M_BASIN',
         'PERMAVE', 'WTDEPAVE', 'ROCKDEPAVE', 'SLOPE_PCT']
varX1 = ['00060', 'pr', 'sph', 'srad', 'tmmn', 'tmmx',
         'pet', 'etr', 'ph', 'Conduc', 'K', 'Cl']
varX2 = ['00060', 'pr', 'sph', 'srad', 'tmmn', 'tmmx',
         'pet', 'etr']
nEp = 200
caseLst = list()
for subset in subsetLst:
    saveName = '{}-{}-ntn'.format(dataName, subset)
    caseName = basins.wrapMaster(
        dataName=dataName, trainName=subset, batchSize=[None, 50], nEpoch=nEp,
        outName=saveName, varXC=varXC, varX=varX1, varYC=codeLst)
    caseLst.append(caseName)
    saveName = '{}-{}'.format(dataName, subset)
    caseName = basins.wrapMaster(
        dataName=dataName, trainName=subset, batchSize=[None, 50], nEpoch=nEp,
        outName=saveName, varXC=varXC, varX=varX2, varYC=codeLst)
    caseLst.append(caseName)

for caseName in caseLst:
    basins.trainModelTS(caseName)
Example #5
0
from hydroDL.app import waterQuality
from hydroDL.master import slurm
from hydroDL.data import gageII, usgs, gridMET

# wqData = waterQuality.DataModelWQ('basinAll')
# ind1 = wqData.indByRatio(0.8)
# ind2 = wqData.indByRatio(0.8, first=False)
# wqData.saveSubset(['first80', 'last20'], [ind1, ind2])

# devide to 8090 and 0010
wqData = waterQuality.DataModelWQ('basinAll')
indYr1 = waterQuality.indYr(wqData.info, yrLst=[1979, 2000])[0]
wqData.saveSubset('Y8090', indYr1)
indYr2 = waterQuality.indYr(wqData.info, yrLst=[2000, 2020])[0]
wqData.saveSubset('Y0010', indYr2)

caseLst = list()
subsetLst = ['Y8090', 'Y0010']
for subset in subsetLst:
    saveName = 'basinAll-{}-opt1'.format(subset)
    caseName = basins.wrapMaster(dataName='basinAll',
                                 trainName=subset,
                                 saveEpoch=50,
                                 batchSize=[None, 2000],
                                 outName=saveName)
    caseLst.append(caseName)

cmdP = 'python /home/users/kuaifang/GitHUB/geolearn/app/waterQual/model/cmdTrain.py -M {}'
for caseName in caseLst:
    slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=48, nM=64)
Example #6
0
from hydroDL import kPath
from hydroDL.app import waterQuality
from hydroDL.master import basins
from hydroDL.data import usgs, gageII, gridMET, ntn, gageII
import numpy as np
from hydroDL.master import slurm
import importlib
from hydroDL.model import rnn, crit, trainTS

dataName = 'test'
varQ = ['00060']
varP = ntn.varLst
varT = ['sinT', 'cosT']
varF = gridMET.varLst


varX = varQ+varF+varP
varXC = gageII.varLst
varY = varQ
varYC = usgs.newC

saveName = 'test'
trainSet = 'comb-A10'
outName = basins.wrapMaster(
    dataName=dataName, trainName=trainSet, batchSize=[None, 500],
    outName=saveName, varX=varX, varY=varY, varYC=varYC,
    crit='SigmaLoss',
    nEpoch=10, saveEpoch=10)

basins.trainModelTS(outName)
Example #7
0
wqData = waterQuality.DataModelWQ(dataName)
indYrO, indYrE = waterQuality.indYrOddEven(wqData.info)
wqData.saveSubset('Yodd', indYrO)
wqData.saveSubset('Yeven', indYrE)

codeLst = usgs.varC
subsetLst = ['Yodd', 'Yeven']
varX1 = gridMET.varLst
varX2 = gridMET.varLst + ntn.varLst + ['distNTN']
nEp = 500
caseLst = list()
for subset in subsetLst:
    saveName = '{}-{}'.format(dataName, subset)
    caseName = basins.wrapMaster(dataName=dataName,
                                 trainName=subset,
                                 batchSize=[None, 200],
                                 nEpoch=nEp,
                                 outName=saveName,
                                 varX=varX1)
    caseLst.append(caseName)
    saveName = '{}-{}-ntn'.format(dataName, subset)
    caseName = basins.wrapMaster(dataName=dataName,
                                 trainName=subset,
                                 batchSize=[None, 200],
                                 nEpoch=nEp,
                                 outName=saveName,
                                 varX=varX2)
    caseLst.append(caseName)

cmdP = 'python /home/users/kuaifang/GitHUB/geolearn/app/waterQual/model/cmdTrain.py -M {}'
for caseName in caseLst:
    slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=24)
Example #8
0
            elif label == 'P_C':
                varX = varP
                varY = None
            elif label == 'Q_C':
                varX = varQ
                varY = None
            elif label == 'QT_C':
                varX = varQ + varT
                varY = None
            elif label == 'QTFP_C':
                varX = varQ + varT + varF + varP
                varY = None
            trainSet = '{}-B10'.format(code)
            saveName = '{}-{}-{}-{}-hs{}'.format(dataName, code, label,
                                                 trainSet, hs)
            caseName = basins.wrapMaster(dataName=dataName,
                                         trainName=trainSet,
                                         batchSize=[None, 500],
                                         outName=saveName,
                                         varX=varX,
                                         varY=varY,
                                         varYC=varYC,
                                         hiddenSize=hs)
            caseLst.append(caseName)

cmdP = 'python /home/users/kuaifang/GitHUB/geolearn/app/waterQual/model/cmdTrain.py -M {}'
for caseName in caseLst:
    slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=24)

# basins.trainModelTS(caseName)
Example #9
0
import pandas as pd
import numpy as np
import os
import time

caseLst = list()
dataName = 'Silica64'
subsetLst = ['00955-Y8090', '00955-Y0010']
codeLst = ['00955']
for subset in subsetLst:
    for hiddenSize in [256, 128, 64, 32]:
        saveName = '{}-{}-h{}-opt1'.format(dataName, subset, hiddenSize)
        caseName = basins.wrapMaster(dataName=dataName,
                                     trainName=subset,
                                     hiddenSize=hiddenSize,
                                     batchSize=[None, 200],
                                     outName=saveName)
        caseLst.append(caseName)
        # saveName = '{}-{}-opt2'.format(dataName, subset)
        # caseName = basins.wrapMaster(dataName=dataName, trainName=subset, hiddenSize=hiddenSize,
        #                              batchSize=[None, 200], varY=None,
        #                              varX=usgs.varQ+gridMET.varLst, outName=saveName)
        # caseLst.append(caseName)
    # saveName = '{}-{}-opt3'.format(dataName, subset)
    # caseName = basins.wrapMaster(dataName=dataName, trainName=subset,
    #                              batchSize=[None, 200], varY=None, outName=saveName)
    # caseLst.append(caseName)
    # saveName = '{}-{}-opt4'.format(dataName, subset)
    # caseName = basins.wrapMaster(dataName=dataName, trainName=subset,
    #                              batchSize=[None, 200], varYC=None, outName=saveName)
Example #10
0
from hydroDL.master import slurm
from hydroDL import kPath
from hydroDL.app import waterQuality
from hydroDL.data import gageII, usgs, gridMET
from hydroDL.master import basins

dataName = 'Silica64'
subset = '00955-Y8090'
saveName = '{}-{}-AgeLSTM'.format(dataName, subset)
caseName = basins.wrapMaster(dataName=dataName,
                             trainName=subset,
                             hiddenSize=512,
                             batchSize=[None, 200],
                             outName=saveName,
                             modelName='AgeLSTM',
                             crit='RmseLoss2D')
basins.trainModelTS(caseName)
Example #11
0
#             'eco0902-F50', 'nutr06-F50', 'nutr08-F50']
# for trainName in trainLst:
#     caseName = basins.wrapMaster(dataName='basinRef', trainName=trainName, batchSize=[
#                                  None, 1000], outName='basinRef-{}-opt1'.format(trainName))
#     slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=4)
# for trainName in trainLst:
#     caseName = basins.wrapMaster(dataName='basinRef', trainName=trainName, batchSize=[
#                                  None, 1000], outName='basinRef-{}-opt2'.format(trainName),
#                                  varX=usgs.varQ+gridMET.varLst, varY=None)
#     slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=4)

trainLst = ['pQ-F50', 'pQ-rmY10', 'pQ-rmY80']
for train in trainLst:
    caseName = basins.wrapMaster(dataName='basinRef',
                                 trainName=train,
                                 batchSize=[None, 1000],
                                 outName='basinRef-rq-{}'.format(train),
                                 varX=usgs.varQ + ['runoff'] + gridMET.varLst,
                                 varY=None)
    slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=24)

    caseName = basins.wrapMaster(dataName='basinRef',
                                 trainName=train,
                                 batchSize=[None, 1000],
                                 outName='basinRef-r-{}'.format(train),
                                 varX=usgs.varQ + ['runoff'] + gridMET.varLst,
                                 varY=None)
    slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=24)

    caseName = basins.wrapMaster(dataName='basinRef',
                                 trainName=train,
                                 batchSize=[None, 1000],
Example #12
0
                                                                  2000])[0]
        wqData.saveSubset('-'.join(sorted(codeLst) + [lab, 'Y8090']), indYr1)
        indYr2 = waterQuality.indYr(wqData.info.iloc[ind], yrLst=[2000,
                                                                  2020])[0]
        wqData.saveSubset('-'.join(sorted(codeLst) + [lab, 'Y0010']), indYr2)

if 'training' in doLst:
    dataName = 'HBN5'
    codeLst = ['00618', '00955']
    trainset = '00618-00955-all-Y8090'
    testset = '00618-00955-all-Y0010'
    out = 'HBN5-00618-00955-all-Y8090'
    wqData = waterQuality.DataModelWQ(dataName)
    masterName = basins.wrapMaster(dataName='HBN5',
                                   trainName=trainset,
                                   batchSize=[None, 100],
                                   outName=out,
                                   varYC=codeLst,
                                   nEpoch=100)
    basins.trainModelTS(masterName)

# sequence testing
dataName = 'HBN'
outName = 'HBN-00618-00955-all-Y8090-opt2'
trainset = '00618-00955-all-Y8090'
testset = '00618-00955-all-Y0010'

wqData = waterQuality.DataModelWQ(dataName)

# point testing
yP, ycP = basins.testModel(outName, testset, wqData=wqData)
Example #13
0
            varY = None
        elif label == 'P_C':
            varX = varP
            varY = None
        elif label == 'Q_C':
            varX = varQ
            varY = None
        elif label == 'QT_C':
            varX = varQ + varT
            varY = None
        elif label == 'QTFP_C':
            varX = varQ + varT + varF + varP
            varY = None
        trainSet = '{}-B10'.format(code)
        saveName = '{}-{}-{}-{}-neck'.format(dataName, code, label, trainSet)
        caseName = basins.wrapMaster(dataName=dataName,
                                     trainName=trainSet,
                                     batchSize=[None, 500],
                                     outName=saveName,
                                     varX=varX,
                                     varY=varY,
                                     varYC=varYC,
                                     modelName='LstmModel')
        caseLst.append(caseName)

cmdP = 'python /home/users/kuaifang/GitHUB/geolearn/app/waterQual/model/cmdTrain.py -M {}'
for caseName in caseLst:
    slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=24)

# basins.trainModelTS(caseName)