コード例 #1
0
ファイル: WRTDS.py プロジェクト: fkwai/geolearn
def testWRTDS(dataName, trainSet, testSet, codeLst):
    DF = dbBasin.DataFrameBasin(dataName)
    # Calculate WRTDS from train and test set
    varX = ['00060']
    varY = codeLst
    d1 = dbBasin.DataModelBasin(DF, subset=trainSet, varX=varX, varY=varY)
    d2 = dbBasin.DataModelBasin(DF, subset=testSet, varX=varX, varY=varY)
    tt1 = pd.to_datetime(d1.t)
    yr1 = tt1.year.values
    t1 = yr1 + tt1.dayofyear.values / 365
    sinT1 = np.sin(2 * np.pi * t1)
    cosT1 = np.cos(2 * np.pi * t1)
    tt2 = pd.to_datetime(d2.t)
    yr2 = tt2.year.values
    t2 = yr2 + tt2.dayofyear.values / 365
    sinT2 = np.sin(2 * np.pi * t2)
    cosT2 = np.cos(2 * np.pi * t2)
    ###
    yOut = np.full([len(d2.t), len(d2.siteNoLst), len(varY)], np.nan)
    t0 = time.time()
    for indS, siteNo in enumerate(d2.siteNoLst):
        for indC, code in enumerate(varY):
            print('{} {} {} {}'.format(indS, siteNo, code, time.time() - t0))
            y1 = d1.Y[:, indS, indC].copy()
            q1 = d1.X[:, indS, 0].copy()
            q1[q1 < 0] = 0
            logq1 = np.log(q1 + sn)
            x1 = np.stack([logq1, yr1, sinT1, cosT1]).T
            y2 = d2.Y[:, indS, indC].copy()
            q2 = d2.X[:, indS, 0].copy()
            q2[q2 < 0] = 0
            logq2 = np.log(q2 + sn)
            x2 = np.stack([logq2, yr2, sinT2, cosT2]).T
            [xx1, yy1], ind1 = utils.rmNan([x1, y1])
            if testSet == 'all':
                [xx2], ind2 = utils.rmNan([x2])
            else:
                [xx2, yy2], ind2 = utils.rmNan([x2, y2])
            if len(ind1) < 40:
                continue
            for k in ind2:
                dY = np.abs(t2[k] - t1[ind1])
                dQ = np.abs(logq2[k] - logq1[ind1])
                dS = np.min(np.stack(
                    [abs(np.ceil(dY) - dY),
                     abs(dY - np.floor(dY))]),
                            axis=0)
                d = np.stack([dY, dQ, dS])
                ww, ind = calWeight(d)
                model = sm.WLS(yy1[ind], xx1[ind], weights=ww).fit()
                yp = model.predict(x2[k, :])[0]
                yOut[k, indS, indC] = yp
    return yOut
コード例 #2
0
ファイル: basinFull.py プロジェクト: fkwai/geolearn
def trainModel(outName):
    outFolder = nameFolder(outName)
    dictP = loadMaster(outName)

    # load data
    DF = dbBasin.DataFrameBasin(dictP['dataName'])
    dictVar = {k: dictP[k]
               for k in ('varX', 'varXC', 'varY', 'varYC')}
    DM = dbBasin.DataModelBasin(DF, subset=dictP['trainSet'], **dictVar)
    if dictP['borrowStat'] is not None:
        DM.loadStat(dictP['borrowStat'])
    DM.trans(mtdX=dictP['mtdX'], mtdXC=dictP['mtdXC'],
             mtdY=dictP['mtdY'], mtdYC=dictP['mtdYC'])
    DM.saveStat(outFolder)
    dataTup = DM.getData()
    dataTup = trainBasin.dealNaN(dataTup, dictP['optNaN'])

    # define loss
    lossFun = getattr(crit, dictP['crit'])()
    # define model
    model = defineModel(dataTup, dictP)

    if torch.cuda.is_available():
        lossFun = lossFun.cuda()
        model = model.cuda()

    if dictP['optim'] == 'AdaDelta':
        optim = torch.optim.Adadelta(model.parameters())
    else:
        raise RuntimeError('optimizor function not specified')

    lossLst = list()
    nEp = dictP['nEpoch']
    sEp = dictP['saveEpoch']
    logFile = os.path.join(outFolder, 'log')
    if os.path.exists(logFile):
        os.remove(logFile)
    for k in range(0, nEp, sEp):
        model, optim, lossEp = trainBasin.trainModel(
            dataTup, model, lossFun, optim, batchSize=dictP['batchSize'],
            nEp=sEp, cEp=k, logFile=logFile,
            optBatch=dictP['optBatch'], nIterEp=dictP['nIterEp'])
        # save model
        saveModelState(outName, k+sEp, model, optim=optim)
        lossLst = lossLst+lossEp

    lossFile = os.path.join(outFolder, 'loss.csv')
    pd.DataFrame(lossLst).to_csv(lossFile, index=False, header=False)
コード例 #3
0
ファイル: basinFull.py プロジェクト: fkwai/geolearn
def testModel(outName,  DF=None, testSet='all', ep=None, reTest=False, batchSize=20):
    # load master
    dictP = loadMaster(outName)
    if ep is None:
        ep = dictP['nEpoch']
    outFolder = nameFolder(outName)
    testFileName = 'testP-{}-Ep{}.npz'.format(testSet, ep)
    testFile = os.path.join(outFolder, testFileName)

    if os.path.exists(testFile) and reTest is False:
        print('load saved test result')
        npz = np.load(testFile, allow_pickle=True)
        yP = npz['yP']
        ycP = npz['ycP']
    else:
        # load test data
        if DF is None:
            DF = dbBasin.DataFrameBasin(dictP['dataName'])
        dictVar = {k: dictP[k]
                   for k in ('varX', 'varXC', 'varY', 'varYC')}
        DM = dbBasin.DataModelBasin(DF, subset=testSet, **dictVar)
        DM.loadStat(outFolder)
        dataTup = DM.getData()
        dataTup = trainBasin.dealNaN(dataTup, dictP['optNaN'])

        model = defineModel(dataTup, dictP)
        model = loadModelState(outName, ep, model)
        # test
        x = dataTup[0]
        xc = dataTup[1]
        ny = np.shape(dataTup[2])[2]
        # test model - point by point
        yOut, ycOut = trainBasin.testModel(
            model, x, xc, ny, batchSize=batchSize)
        yP = DM.transOutY(yOut)
        ycP = DM.transOutYC(ycOut)
        np.savez(testFile, yP=yP, ycP=ycP)
    return yP, ycP
コード例 #4
0
ファイル: test.py プロジェクト: fkwai/geolearn
from hydroDL.data import usgs, gageII, gridMET, ntn, GLASS
from hydroDL.master import slurm
from hydroDL.data import dbBasin
from hydroDL.master import basinFull

if __name__ == '__main__':
    dataNameLst = ['G200Norm', 'G400Norm']
    for dataName in dataNameLst:
        outName = dataName
        DF = dbBasin.DataFrameBasin(dataName)
        testSet = 'all'
        try:
            yP, ycP = basinFull.testModel(outName,
                                          DF=DF,
                                          testSet=testSet,
                                          ep=200,
                                          reTest=True)
            print('tested {}'.format(outName), flush=True)
        except:
            print('skiped {}'.format(outName), flush=True)
コード例 #5
0
ファイル: tsMap.py プロジェクト: fkwai/geolearn
import numpy as np
import matplotlib.pyplot as plt
from hydroDL.post import axplot, figplot
from hydroDL import kPath, utils
import json
import os
import importlib
from hydroDL.master import basinFull
from hydroDL.app.waterQuality import WRTDS

dataName = 'G400Norm'
outName = dataName
trainSet = 'rmRT20'
testSet = 'pkRT20'

DF = dbBasin.DataFrameBasin(outName)
yP, ycP = basinFull.testModel(outName, DF=DF, testSet=testSet, ep=500)

# deal with mean and std
codeLst = usgs.newC
yOut = np.ndarray(yP.shape)
for k, code in enumerate(codeLst):
    m = DF.g[:, DF.varG.index(code + '-M')]
    s = DF.g[:, DF.varG.index(code + '-S')]
    data = yP[:, :, k]
    yOut[:, :, k] = data * s + m

# WRTDS
# yW = WRTDS.testWRTDS(dataName, trainSet, testSet, codeLst)
dirRoot = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS-dbBasin')
fileName = '{}-{}-{}'.format(dataName, trainSet, 'all')
コード例 #6
0
import matplotlib.pyplot as plt
from hydroDL.post import axplot, figplot
from hydroDL import kPath, utils
import json
import os
import importlib
from hydroDL.master import basinFull
importlib.reload(utils)
dirSel = os.path.join(kPath.dirData, 'USGS', 'inventory', 'siteSel')
dictSiteName = 'dictWeathering.json'
with open(os.path.join(dirSel, dictSiteName)) as f:
    dictSite = json.load(f)
siteNoLst = dictSite['k12']

# normalize
DF = dbBasin.DataFrameBasin('weathering')
codeSel = ['00915', '00925', '00930', '00935', '00940', '00945', '00955']
DF = dbBasin.func.localNorm(DF, subset='rmD5')
DF.saveAs('weatheringNorm')
dataName = 'weatheringNorm'

label = 'test'
varX = DF.varF + DF.varQ
mtdX = dbBasin.io.extractVarMtd(varX)
varY = [c + '-N' for c in codeSel]
mtdY = dbBasin.io.extractVarMtd(varY)
varXC = gageII.varLst + [c + '-M'
                         for c in codeSel] + [c + '-S' for c in codeSel]
mtdXC = dbBasin.io.extractVarMtd(varXC)
varYC = None
mtdYC = dbBasin.io.extractVarMtd(varYC)
コード例 #7
0
ファイル: modelSave.py プロジェクト: fkwai/geolearn
import torch
from hydroDL.model import rnn, crit, trainBasin

outName = 'weathering-FPR2QC-t365-B10'
ep = 100

# save
outFolder = basinFull.nameFolder(outName)
modelFile = os.path.join(outFolder, 'model_ep{}'.format(ep))
model = torch.load(modelFile)
modelStateFile = os.path.join(outFolder, 'modelState_ep{}'.format(ep))
torch.save(model.state_dict(), modelStateFile)

# load
dictP = basinFull.loadMaster(outName)
DF = dbBasin.DataFrameBasin(dictP['dataName'])
dictVar = {k: dictP[k] for k in ('varX', 'varXC', 'varY', 'varYC')}
DM = dbBasin.DataModelBasin(DF, subset='A10', **dictVar)
DM.loadStat(outFolder)
dataTup = DM.getData()
[nx, nxc, ny, nyc, nt, ns] = trainBasin.getSize(dataTup)
dataTup = trainBasin.dealNaN(dataTup, dictP['optNaN'])
if dictP['modelName'] == 'CudnnLSTM':
    model = rnn.CudnnLstmModel(nx=nx + nxc,
                               ny=ny + nyc,
                               hiddenSize=dictP['hiddenSize'])
elif dictP['modelName'] == 'LstmModel':
    model = rnn.LstmModel(nx=nx + nxc,
                          ny=ny + nyc,
                          hiddenSize=dictP['hiddenSize'])
else:
コード例 #8
0
import matplotlib.pyplot as plt
# load all site counts
dirInv = os.path.join(kPath.dirData, 'USGS', 'inventory')
fileSiteNo = os.path.join(dirInv, 'siteNoLst-1979')
siteNoLstAll = pd.read_csv(fileSiteNo, header=None, dtype=str)[0].tolist()
codeLst = sorted(usgs.codeLst)
countD = np.load(os.path.join(dirInv, 'matCountDaily.npy'))

code = '00915'
indC = codeLst.index(code)
count = np.sum(countD[:, :, indC], axis=1)
indSLst = np.where(count > 200)[0]
siteNoLst = [siteNoLstAll[ind] for ind in indSLst]

# DF = dbBasin.DataFrameBasin.new('00915G200', siteNoLst)
DF = dbBasin.DataFrameBasin('00915G200')
q = DF.q[:, :, 1]
c = DF.c[:, :, DF.varC.index(code)]

ns = len(DF.siteNoLst)
out = np.ndarray([ns, 2])
for indS in range(ns):
    q1 = np.log(q[:, indS]+1)
    ind = np.where(~np.isnan(c[:, indS]))[0]
    q2 = np.log(q[ind, indS]+1)
    s, p = scipy.stats.ks_2samp(q1, q2)
    out[indS, 0] = s
    out[indS, 1] = len(ind)

dfCrd = gageII.readData(
    varLst=['LAT_GAGE', 'LNG_GAGE'], siteNoLst=siteNoLst)
コード例 #9
0
import scipy
import time
import matplotlib.pyplot as plt
from hydroDL.post import axplot, figplot
from hydroDL.master import basins
from hydroDL.data import gageII, usgs, gridMET
from hydroDL import kPath, utils
import os
import pandas as pd
import numpy as np
from hydroDL import kPath
from hydroDL.data import dbBasin, usgs

# create a dataFrame contains all C and Q

fileSiteNo = os.path.join(kPath.dirData, 'USGS', 'inventory', 'siteNoLst-1979')
siteNoLstAll = pd.read_csv(fileSiteNo, header=None, dtype=str)[0].tolist()

# varG = ['LAT_GAGE', 'LNG_GAGE', 'CLASS', 'DRAIN_SQKM']
# DF = dbBasin.DataFrameBasin.new(
#     'allCQ', siteNoLstAll, varF=['pr'], varQ=['00060'], varG=varG)

DF = dbBasin.DataFrameBasin('allCQ')
コード例 #10
0
ファイル: wrtds.py プロジェクト: fkwai/geolearn
import pandas as pd
from hydroDL.data import usgs, gageII, gridMET, ntn, GLASS, transform, dbBasin
import numpy as np
import matplotlib.pyplot as plt
from hydroDL.post import axplot, figplot
from hydroDL import kPath, utils
import json
import os
import importlib
from hydroDL.master import basinFull
import statsmodels.api as sm
import time

DF = dbBasin.DataFrameBasin('G400Norm')
trainSet = 'rmRT20'
testSet = 'all'

h = [7, 2, 0.5]
the = 100
sn = 1e-5

# Calculate WRTDS from train and test set
varX = ['00060', 'sinT', 'cosT', 'datenum']
varY = usgs.newC
d1 = dbBasin.DataModelBasin(DF, subset=trainSet, varX=varX, varY=varY)
d2 = dbBasin.DataModelBasin(DF, subset=testSet, varX=varX, varY=varY)
tt = pd.to_datetime(DF.t)
yr = tt.year.values
t = yr+tt.dayofyear.values/365
###
yOut = np.full([len(d2.t), len(d2.siteNoLst), len(varY)], np.nan)